Normalize unicode URLs in link prefetcher

Fixes #1644
This commit is contained in:
Pavel Djundik 2018-03-08 19:43:39 +02:00
parent d1648823c3
commit f475cc39ef
2 changed files with 49 additions and 11 deletions

View file

@ -3,6 +3,7 @@
const cheerio = require("cheerio");
const request = require("request");
const url = require("url");
const URI = require("urijs");
const mime = require("mime-types");
const Helper = require("../../helper");
const cleanIrcMessage = require("../../../client/js/libs/handlebars/ircmessageparser/cleanIrcMessage");
@ -40,7 +41,7 @@ module.exports = function(client, chan, msg) {
}
msg.previews = Array.from(new Set( // Remove duplicate links
links.map((link) => escapeHeader(link.link))
links.map((link) => normalizeURL(link.link))
)).map((link) => ({
type: "loading",
head: "",
@ -101,7 +102,7 @@ function parseHtml(preview, res, client) {
// Verify that thumbnail pic exists and is under allowed size
if (preview.thumb.length) {
fetch(escapeHeader(preview.thumb), {language: client.language}, (resThumb) => {
fetch(normalizeURL(preview.thumb), {language: client.language}, (resThumb) => {
if (resThumb === null
|| !(/^image\/.+/.test(resThumb.type))
|| resThumb.size > (Helper.config.prefetchMaxImageSize * 1024)) {
@ -140,7 +141,7 @@ function parseHtmlMedia($, preview, res, client) {
foundMedia = true;
fetch(escapeHeader(mediaUrl), {language: client.language}, (resMedia) => {
fetch(normalizeURL(mediaUrl), {language: client.language}, (resMedia) => {
if (resMedia === null || !mediaTypeRegex.test(resMedia.type)) {
return reject();
}
@ -356,12 +357,6 @@ function fetch(uri, {language}, cb) {
});
}
// https://github.com/request/request/issues/2120
// https://github.com/nodejs/node/issues/1693
// https://github.com/alexeyten/descript/commit/50ee540b30188324198176e445330294922665fc
function escapeHeader(header) {
return header
.replace(/([\uD800-\uDBFF][\uDC00-\uDFFF])+/g, encodeURI)
.replace(/[\uD800-\uDFFF]/g, "")
.replace(/[\u0000-\u001F\u007F-\uFFFF]+/g, encodeURI);
function normalizeURL(header) {
return URI(header).normalize().toString();
}

View file

@ -283,4 +283,47 @@ describe("Link plugin", function() {
link(this.irc, this.network.channels[0], message);
});
it("should work on non-ASCII urls", function(done) {
const message = this.irc.createMessage({
text:
"http://localhost:9002/unicode/ıoı-test " +
"http://localhost:9002/unicode/русский-текст-test " +
"http://localhost:9002/unicode/🙈-emoji-test " +
"http://localhost:9002/unicodeq/?q=ıoı-test " +
"http://localhost:9002/unicodeq/?q=русский-текст-test " +
"http://localhost:9002/unicodeq/?q=🙈-emoji-test",
});
link(this.irc, this.network.channels[0], message);
app.get("/unicode/:q", function(req, res) {
res.send(`<title>${req.params.q}</title>`);
});
app.get("/unicodeq/", function(req, res) {
res.send(`<title>${req.query.q}</title>`);
});
const previews = [];
this.irc.on("msg:preview", function(data) {
previews.push(data.preview.link);
if (data.preview.link.indexOf("%C4%B1o%C4%B1-test") > 0) {
expect(data.preview.head).to.equal("ıoı-test");
} else if (data.preview.link.indexOf("%D1%80%D1%83%D1%81%D1%81%D0%BA%D0%B8%D0%B9-%D1%82%D0%B5%D0%BA%D1%81%D1%82-test") > 0) {
expect(data.preview.head).to.equal("русский-текст-test");
} else if (data.preview.link.indexOf("%F0%9F%99%88-emoji-test") > 0) {
expect(data.preview.head).to.equal("🙈-emoji-test");
} else {
expect("This should never happen").to.equal(data.preview.link);
}
if (previews.length === 5) {
expect(message.previews.map((preview) => preview.link)).to.deep.equal(previews);
done();
}
});
});
});