linkify: simplify noscheme detection logic

Overriding the built in is poor form, as this prevents adding
a new type handler with its own normalize handler.

We only ever want to override protocol-less URLs to http, so
we just do so explicitly in the "//" schema normalizer.

This also means that we don't need all that type conversion dance,
we simply set the schema to null when we patch it and filter on the
schema directly
This commit is contained in:
Reto Brunner 2024-01-21 18:04:18 +01:00
parent ae6bae69ac
commit dd24cb1300
2 changed files with 53 additions and 29 deletions

View file

@ -1,36 +1,12 @@
import LinkifyIt, {Match} from "linkify-it";
import tlds from "tlds";
export type NoSchemaMatch = Match & {
noschema: boolean;
};
export type LinkPart = {
start: number;
end: number;
link: string;
};
LinkifyIt.prototype.normalize = function normalize(match: NoSchemaMatch) {
match.noschema = false;
if (!match.schema) {
match.schema = "http:";
match.url = "http://" + match.url;
match.noschema = true;
}
if (match.schema === "//") {
match.schema = "http:";
match.url = "http:" + match.url;
match.noschema = true;
}
if (match.schema === "mailto:" && !/^mailto:/i.test(match.url)) {
match.url = "mailto:" + match.url;
}
};
const linkify = LinkifyIt().tlds(tlds).tlds("onion", true);
// Known schemes to detect in text
@ -73,12 +49,25 @@ linkify.add("web+", {
},
normalize(match) {
match.schema = match.text.slice(0, match.text.indexOf(":") + 1);
LinkifyIt.prototype.normalize(match); // hand over to the global override
},
});
// we must rewrite protocol less urls to http, else if TL is hosted
// on https, this would incorrectly use https for the remote link.
// See https://github.com/thelounge/thelounge/issues/2525
//
// We take the validation logic from linkify and just add our own
// normalizer.
linkify.add("//", {
validate: (linkify as any).__schemas__["//"].validate,
normalize(match) {
match.schema = ""; // this counts as not having a schema
match.url = "http:" + match.url;
},
});
export function findLinks(text: string) {
const matches = linkify.match(text) as NoSchemaMatch[];
const matches = linkify.match(text);
if (!matches) {
return [];
@ -88,16 +77,16 @@ export function findLinks(text: string) {
}
export function findLinksWithSchema(text: string) {
const matches = linkify.match(text) as NoSchemaMatch[];
const matches = linkify.match(text);
if (!matches) {
return [];
}
return matches.filter((url) => !url.noschema).map(makeLinkPart);
return matches.filter((url) => !!url.schema).map(makeLinkPart);
}
function makeLinkPart(url: NoSchemaMatch): LinkPart {
function makeLinkPart(url: Match): LinkPart {
return {
start: url.index,
end: url.lastIndex,

View file

@ -353,6 +353,26 @@ describe("findLinks", () => {
expect(actual).to.deep.equal(expected);
});
it("should parse mailto links", () => {
const input = "mail@example.com mailto:mail@example.org";
const expected = [
{
link: "mailto:mail@example.com",
start: 0,
end: 16,
},
{
link: "mailto:mail@example.org",
start: 17,
end: 40,
},
];
const actual = findLinks(input);
expect(actual).to.deep.equal(expected);
});
it("should not return urls with no schema if flag is specified", () => {
const input = "https://example.global //example.com http://example.group example.py";
const expected = [
@ -373,6 +393,21 @@ describe("findLinks", () => {
expect(actual).to.deep.equal(expected);
});
it("should use http for protocol-less URLs", () => {
const input = "//example.com";
const expected = [
{
link: "http://example.com",
start: 0,
end: 13,
},
];
const actual = findLinks(input);
expect(actual).to.deep.equal(expected);
});
it("should find web+ schema urls", () => {
const input = "web+ap://instance.example/@Example web+whatever://example.com?some=value";
const expected = [