diff --git a/client/js/libs/handlebars/ircmessageparser/findLinks.js b/client/js/libs/handlebars/ircmessageparser/findLinks.js index 48726dc3..2c162f50 100644 --- a/client/js/libs/handlebars/ircmessageparser/findLinks.js +++ b/client/js/libs/handlebars/ircmessageparser/findLinks.js @@ -1,13 +1,12 @@ "use strict"; -const URI = require("urijs"); +const linkify = require("linkify-it")() + .tlds(require("tlds")) + .tlds("onion", true); -// Known schemes to detect in a text. If a text contains `foo...bar://foo.com`, -// the parsed scheme should be `foo...bar` but if it contains -// `foo...http://foo.com`, we assume the scheme to extract will be `http`. +// Known schemes to detect in text const commonSchemes = [ - "http", "https", - "ftp", "sftp", + "sftp", "smb", "file", "irc", "ircs", "svn", "git", @@ -15,54 +14,22 @@ const commonSchemes = [ "svn+ssh", "ssh", ]; +for (const schema of commonSchemes) { + linkify.add(schema + ":", "http:"); +} + function findLinks(text) { - const result = []; + const matches = linkify.match(text); - // URI.withinString() identifies URIs within text, e.g. to translate them to - // -Tags. - // See https://medialize.github.io/URI.js/docs.html#static-withinString - // In our case, we store each URI encountered in a result array. - try { - URI.withinString(text, function(url, start, end) { - let parsedScheme; - - try { - // Extract the scheme of the URL detected, if there is one - parsedScheme = URI(url).scheme().toLowerCase(); - } catch (e) { - // URI may throw an exception for malformed urls, - // as to why withinString finds these in the first place is a mystery - return; - } - - // Check if the scheme of the detected URL matches a common one above. - // In a URL like `foo..http://example.com`, the scheme would be `foo..http`, - // so we need to clean up the end of the scheme and filter out the rest. - const matchedScheme = commonSchemes.find((scheme) => parsedScheme.endsWith(scheme)); - - // A known scheme was found, extract the unknown part from the URL - if (matchedScheme) { - const prefix = parsedScheme.length - matchedScheme.length; - start += prefix; - url = url.slice(prefix); - } - - // The URL matched but does not start with a scheme (`www.foo.com`), add it - if (!parsedScheme.length) { - url = "http://" + url; - } - - result.push({ - start: start, - end: end, - link: url, - }); - }); - } catch (e) { - // withinString is wrapped in a try/catch due to https://github.com/medialize/URI.js/issues/359 + if (!matches) { + return []; } - return result; + return matches.map((url) => ({ + start: url.index, + end: url.lastIndex, + link: url.url, + })); } module.exports = findLinks; diff --git a/package.json b/package.json index ecd06af5..accbc349 100644 --- a/package.json +++ b/package.json @@ -46,6 +46,7 @@ "express": "4.16.3", "fs-extra": "6.0.1", "irc-framework": "2.11.0", + "linkify-it": "2.0.3", "lodash": "4.17.10", "mime-types": "2.1.18", "moment": "2.22.1", @@ -58,6 +59,7 @@ "spdy": "3.4.7", "sqlite3": "4.0.0", "thelounge-ldapjs-non-maintained-fork": "1.0.2", + "tlds": "1.203.1", "ua-parser-js": "0.7.18", "urijs": "1.19.1", "uuid": "3.2.1", diff --git a/test/client/js/libs/handlebars/ircmessageparser/findLinks.js b/test/client/js/libs/handlebars/ircmessageparser/findLinks.js index 7f8ab4e2..90113e6b 100644 --- a/test/client/js/libs/handlebars/ircmessageparser/findLinks.js +++ b/test/client/js/libs/handlebars/ircmessageparser/findLinks.js @@ -104,6 +104,144 @@ describe("findLinks", () => { expect(actual).to.deep.equal(expected); }); + it("should find domains without www. but valid tld", () => { + const input = "google.com google.lv google.museum"; + const expected = [{ + link: "http://google.com", + start: 0, + end: 10, + }, { + link: "http://google.lv", + start: 11, + end: 20, + }, { + link: "http://google.museum", + start: 21, + end: 34, + }]; + + const actual = findLinks(input); + + expect(actual).to.deep.equal(expected); + }); + + it("should find .onion domains", () => { + const input = "facebookcorewwwi.onion/test?url"; + const expected = [{ + link: "http://facebookcorewwwi.onion/test?url", + start: 0, + end: 31, + }]; + + const actual = findLinks(input); + + expect(actual).to.deep.equal(expected); + }); + + it("should not consider invalid TLDs as domains", () => { + const input = "google.wtfgugl google.xx www.google.wtfgugl www.google.xx"; + const expected = []; + + const actual = findLinks(input); + + expect(actual).to.deep.equal(expected); + }); + + it("should consider invalid TLDs as domains if protocol is specified", () => { + const input = "http://google.wtfgugl http://google.xx http://www.google.wtfgugl http://www.google.xx"; + const expected = [{ + link: "http://google.wtfgugl", + start: 0, + end: 21, + }, { + link: "http://google.xx", + start: 22, + end: 38, + }, { + link: "http://www.google.wtfgugl", + start: 39, + end: 64, + }, { + link: "http://www.google.xx", + start: 65, + end: 85, + }]; + + const actual = findLinks(input); + + expect(actual).to.deep.equal(expected); + }); + + it("should correctly stop at punctuation", () => { // Issue #2351 + const input = + "https://en.wikipedia.org/wiki/Dig! " + + "https://en.wikipedia.org/wiki/Dig? " + + "https://en.wikipedia.org/wiki/Dig. " + + "https://www.google.com* " + + "https://www.google.com/test* " + + "https://www.google.com@ " + + "https://www.google.com/test@ " + + "https://www.google.com! "; + const expected = [{ + link: "https://en.wikipedia.org/wiki/Dig", + start: 0, + end: 33, + }, { + link: "https://en.wikipedia.org/wiki/Dig", + start: 35, + end: 68, + }, { + link: "https://en.wikipedia.org/wiki/Dig", + start: 70, + end: 103, + }, { + link: "https://www.google.com", + start: 105, + end: 127, + }, { + link: "https://www.google.com/test*", + start: 129, + end: 157, + }, { + link: "https://www.google.com", + start: 158, + end: 180, + }, { + link: "https://www.google.com/test@", + start: 182, + end: 210, + }, { + link: "https://www.google.com", + start: 211, + end: 233, + }]; + + const actual = findLinks(input); + + expect(actual).to.deep.equal(expected); + }); + + it("should correctly stop at apostrophe", () => { + const input = "https://www.google.com's www.google.com's google.com's"; // Issue #1302 + const expected = [{ + link: "https://www.google.com", + start: 0, + end: 22, + }, { + link: "http://www.google.com", + start: 25, + end: 39, + }, { + link: "http://google.com", + start: 42, + end: 52, + }]; + + const actual = findLinks(input); + + expect(actual).to.deep.equal(expected); + }); + it("does not find invalid urls", () => { const input = "www.example.com ssh://-oProxyCommand=whois"; // Issue #1412 const expected = [{ diff --git a/test/client/js/libs/handlebars/parse.js b/test/client/js/libs/handlebars/parse.js index 98d79cd0..2e0dea8b 100644 --- a/test/client/js/libs/handlebars/parse.js +++ b/test/client/js/libs/handlebars/parse.js @@ -7,7 +7,7 @@ describe("parse Handlebars helper", () => { it("should not introduce xss", () => { const testCases = [{ input: "", - expected: "<img onerror='location.href="//youtube.com"'>", + expected: "<img onerror='location.href="//youtube.com"'>", }, { input: '#&">bug', expected: '#&">bug', diff --git a/yarn.lock b/yarn.lock index d0330abd..8cd59910 100644 --- a/yarn.lock +++ b/yarn.lock @@ -4310,6 +4310,12 @@ levn@^0.3.0, levn@~0.3.0: prelude-ls "~1.1.2" type-check "~0.3.2" +linkify-it@2.0.3: + version "2.0.3" + resolved "https://registry.yarnpkg.com/linkify-it/-/linkify-it-2.0.3.tgz#d94a4648f9b1c179d64fa97291268bdb6ce9434f" + dependencies: + uc.micro "^1.0.1" + listr-silent-renderer@^1.1.1: version "1.1.1" resolved "https://registry.yarnpkg.com/listr-silent-renderer/-/listr-silent-renderer-1.1.1.tgz#924b5a3757153770bf1a8e3fbf74b8bbf3f9242e" @@ -7446,6 +7452,10 @@ timers-browserify@^2.0.4: dependencies: setimmediate "^1.0.4" +tlds@1.203.1: + version "1.203.1" + resolved "https://registry.yarnpkg.com/tlds/-/tlds-1.203.1.tgz#4dc9b02f53de3315bc98b80665e13de3edfc1dfc" + tmp@^0.0.33: version "0.0.33" resolved "https://registry.yarnpkg.com/tmp/-/tmp-0.0.33.tgz#6d34335889768d21b2bcda0aa277ced3b1bfadf9" @@ -7559,6 +7569,10 @@ ua-parser-js@0.7.18: version "0.7.18" resolved "https://registry.yarnpkg.com/ua-parser-js/-/ua-parser-js-0.7.18.tgz#a7bfd92f56edfb117083b69e31d2aa8882d4b1ed" +uc.micro@^1.0.1: + version "1.0.5" + resolved "https://registry.yarnpkg.com/uc.micro/-/uc.micro-1.0.5.tgz#0c65f15f815aa08b560a61ce8b4db7ffc3f45376" + uglify-es@^3.3.4: version "3.3.9" resolved "https://registry.yarnpkg.com/uglify-es/-/uglify-es-3.3.9.tgz#0c1c4f0700bed8dbc124cdb304d2592ca203e677"