From f475cc39efea62441042ba6ac0b8e6e08c24134c Mon Sep 17 00:00:00 2001 From: Pavel Djundik Date: Thu, 8 Mar 2018 19:43:39 +0200 Subject: [PATCH] Normalize unicode URLs in link prefetcher Fixes #1644 --- src/plugins/irc-events/link.js | 17 +++++--------- test/plugins/link.js | 43 ++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 11 deletions(-) diff --git a/src/plugins/irc-events/link.js b/src/plugins/irc-events/link.js index 46c66adb..8d4ca196 100644 --- a/src/plugins/irc-events/link.js +++ b/src/plugins/irc-events/link.js @@ -3,6 +3,7 @@ const cheerio = require("cheerio"); const request = require("request"); const url = require("url"); +const URI = require("urijs"); const mime = require("mime-types"); const Helper = require("../../helper"); const cleanIrcMessage = require("../../../client/js/libs/handlebars/ircmessageparser/cleanIrcMessage"); @@ -40,7 +41,7 @@ module.exports = function(client, chan, msg) { } msg.previews = Array.from(new Set( // Remove duplicate links - links.map((link) => escapeHeader(link.link)) + links.map((link) => normalizeURL(link.link)) )).map((link) => ({ type: "loading", head: "", @@ -101,7 +102,7 @@ function parseHtml(preview, res, client) { // Verify that thumbnail pic exists and is under allowed size if (preview.thumb.length) { - fetch(escapeHeader(preview.thumb), {language: client.language}, (resThumb) => { + fetch(normalizeURL(preview.thumb), {language: client.language}, (resThumb) => { if (resThumb === null || !(/^image\/.+/.test(resThumb.type)) || resThumb.size > (Helper.config.prefetchMaxImageSize * 1024)) { @@ -140,7 +141,7 @@ function parseHtmlMedia($, preview, res, client) { foundMedia = true; - fetch(escapeHeader(mediaUrl), {language: client.language}, (resMedia) => { + fetch(normalizeURL(mediaUrl), {language: client.language}, (resMedia) => { if (resMedia === null || !mediaTypeRegex.test(resMedia.type)) { return reject(); } @@ -356,12 +357,6 @@ function fetch(uri, {language}, cb) { }); } -// https://github.com/request/request/issues/2120 -// https://github.com/nodejs/node/issues/1693 -// https://github.com/alexeyten/descript/commit/50ee540b30188324198176e445330294922665fc -function escapeHeader(header) { - return header - .replace(/([\uD800-\uDBFF][\uDC00-\uDFFF])+/g, encodeURI) - .replace(/[\uD800-\uDFFF]/g, "") - .replace(/[\u0000-\u001F\u007F-\uFFFF]+/g, encodeURI); +function normalizeURL(header) { + return URI(header).normalize().toString(); } diff --git a/test/plugins/link.js b/test/plugins/link.js index 1721477c..cc06d672 100644 --- a/test/plugins/link.js +++ b/test/plugins/link.js @@ -283,4 +283,47 @@ describe("Link plugin", function() { link(this.irc, this.network.channels[0], message); }); + + it("should work on non-ASCII urls", function(done) { + const message = this.irc.createMessage({ + text: + "http://localhost:9002/unicode/ıoı-test " + + "http://localhost:9002/unicode/русский-текст-test " + + "http://localhost:9002/unicode/🙈-emoji-test " + + "http://localhost:9002/unicodeq/?q=ıoı-test " + + "http://localhost:9002/unicodeq/?q=русский-текст-test " + + "http://localhost:9002/unicodeq/?q=🙈-emoji-test", + }); + + link(this.irc, this.network.channels[0], message); + + app.get("/unicode/:q", function(req, res) { + res.send(`${req.params.q}`); + }); + + app.get("/unicodeq/", function(req, res) { + res.send(`${req.query.q}`); + }); + + const previews = []; + + this.irc.on("msg:preview", function(data) { + previews.push(data.preview.link); + + if (data.preview.link.indexOf("%C4%B1o%C4%B1-test") > 0) { + expect(data.preview.head).to.equal("ıoı-test"); + } else if (data.preview.link.indexOf("%D1%80%D1%83%D1%81%D1%81%D0%BA%D0%B8%D0%B9-%D1%82%D0%B5%D0%BA%D1%81%D1%82-test") > 0) { + expect(data.preview.head).to.equal("русский-текст-test"); + } else if (data.preview.link.indexOf("%F0%9F%99%88-emoji-test") > 0) { + expect(data.preview.head).to.equal("🙈-emoji-test"); + } else { + expect("This should never happen").to.equal(data.preview.link); + } + + if (previews.length === 5) { + expect(message.previews.map((preview) => preview.link)).to.deep.equal(previews); + done(); + } + }); + }); });