linkify: simplify noscheme detection logic

Overriding the built in is poor form, as this prevents adding a new type handler with its own normalize handler. We only ever want to override protocol-less URLs to http, so we just do so explicitly in the "//" schema normalizer. This also means that we don't need all that type conversion dance, we simply set the schema to null when we patch it and filter on the schema directly
2024-04-30 13:42:55 +02:00 · 2024-01-21 18:04:18 +01:00 · 2024-01-21 18:04:18 +01:00 · dd24cb1300
parent ae6bae69ac
commit dd24cb1300
2 changed files with 53 additions and 29 deletions
--- a/shared/linkify.ts
+++ b/shared/linkify.ts
@ -1,36 +1,12 @@
 import LinkifyIt, {Match} from "linkify-it";
 import tlds from "tlds";

-export type NoSchemaMatch = Match & {
-	noschema: boolean;
-};
-
 export type LinkPart = {
 	start: number;
 	end: number;
 	link: string;
 };

-LinkifyIt.prototype.normalize = function normalize(match: NoSchemaMatch) {
-	match.noschema = false;
-
-	if (!match.schema) {
-		match.schema = "http:";
-		match.url = "http://" + match.url;
-		match.noschema = true;
-	}
-
-	if (match.schema === "//") {
-		match.schema = "http:";
-		match.url = "http:" + match.url;
-		match.noschema = true;
-	}
-
-	if (match.schema === "mailto:" && !/^mailto:/i.test(match.url)) {
-		match.url = "mailto:" + match.url;
-	}
-};
-
 const linkify = LinkifyIt().tlds(tlds).tlds("onion", true);

 // Known schemes to detect in text
@ -73,12 +49,25 @@ linkify.add("web+", {
 	},
 	normalize(match) {
 		match.schema = match.text.slice(0, match.text.indexOf(":") + 1);
-		LinkifyIt.prototype.normalize(match); // hand over to the global override
+	},
+});
+
+// we must rewrite protocol less urls to http, else if TL is hosted
+// on https, this would incorrectly use https for the remote link.
+// See https://github.com/thelounge/thelounge/issues/2525
+//
+// We take the validation logic from linkify and just add our own
+// normalizer.
+linkify.add("//", {
+	validate: (linkify as any).__schemas__["//"].validate,
+	normalize(match) {
+		match.schema = ""; // this counts as not having a schema
+		match.url = "http:" + match.url;
 	},
 });

 export function findLinks(text: string) {
-	const matches = linkify.match(text) as NoSchemaMatch[];
+	const matches = linkify.match(text);

 	if (!matches) {
 		return [];
@ -88,16 +77,16 @@ export function findLinks(text: string) {
 }

 export function findLinksWithSchema(text: string) {
-	const matches = linkify.match(text) as NoSchemaMatch[];
+	const matches = linkify.match(text);

 	if (!matches) {
 		return [];
 	}

-	return matches.filter((url) => !url.noschema).map(makeLinkPart);
+	return matches.filter((url) => !!url.schema).map(makeLinkPart);
 }

-function makeLinkPart(url: NoSchemaMatch): LinkPart {
+function makeLinkPart(url: Match): LinkPart {
 	return {
 		start: url.index,
 		end: url.lastIndex,
--- a/test/shared/findLinks.ts
+++ b/test/shared/findLinks.ts
@ -353,6 +353,26 @@ describe("findLinks", () => {
 		expect(actual).to.deep.equal(expected);
 	});

+	it("should parse mailto links", () => {
+		const input = "mail@example.com mailto:mail@example.org";
+		const expected = [
+			{
+				link: "mailto:mail@example.com",
+				start: 0,
+				end: 16,
+			},
+			{
+				link: "mailto:mail@example.org",
+				start: 17,
+				end: 40,
+			},
+		];
+
+		const actual = findLinks(input);
+
+		expect(actual).to.deep.equal(expected);
+	});
+
 	it("should not return urls with no schema if flag is specified", () => {
 		const input = "https://example.global //example.com http://example.group example.py";
 		const expected = [
@ -373,6 +393,21 @@ describe("findLinks", () => {
 		expect(actual).to.deep.equal(expected);
 	});

+	it("should use http for protocol-less URLs", () => {
+		const input = "//example.com";
+		const expected = [
+			{
+				link: "http://example.com",
+				start: 0,
+				end: 13,
+			},
+		];
+
+		const actual = findLinks(input);
+
+		expect(actual).to.deep.equal(expected);
+	});
+
 	it("should find web+ schema urls", () => {
 		const input = "web+ap://instance.example/@Example web+whatever://example.com?some=value";
 		const expected = [