From 0b85ded53fa66a1ac1474194052416662d9c9533 Mon Sep 17 00:00:00 2001 From: Bonuspunkt Date: Sat, 18 Mar 2017 10:35:17 +0200 Subject: [PATCH 1/6] Add bonuspunkt's parser Fixes #15. Fixes #199. Fixes #583. Fixes #654. Fixes #928. Fixes #1001. --- .../ircmessageparser/anyIntersection.js | 10 + .../libs/handlebars/ircmessageparser/fill.js | 29 +++ .../ircmessageparser/findChannels.js | 32 +++ .../libs/handlebars/ircmessageparser/merge.js | 47 ++++ .../handlebars/ircmessageparser/parseStyle.js | 131 +++++++++++ client/js/libs/handlebars/parse.js | 210 +++++++++--------- 6 files changed, 348 insertions(+), 111 deletions(-) create mode 100644 client/js/libs/handlebars/ircmessageparser/anyIntersection.js create mode 100644 client/js/libs/handlebars/ircmessageparser/fill.js create mode 100644 client/js/libs/handlebars/ircmessageparser/findChannels.js create mode 100644 client/js/libs/handlebars/ircmessageparser/merge.js create mode 100644 client/js/libs/handlebars/ircmessageparser/parseStyle.js diff --git a/client/js/libs/handlebars/ircmessageparser/anyIntersection.js b/client/js/libs/handlebars/ircmessageparser/anyIntersection.js new file mode 100644 index 00000000..4fd0d239 --- /dev/null +++ b/client/js/libs/handlebars/ircmessageparser/anyIntersection.js @@ -0,0 +1,10 @@ +"use strict"; + +function anyIntersection(a, b) { + return a.start <= b.start && b.start < a.end || + a.start < b.end && b.end <= a.end || + b.start <= a.start && a.start < b.end || + b.start < a.end && a.end <= b.end; +} + +module.exports = anyIntersection; diff --git a/client/js/libs/handlebars/ircmessageparser/fill.js b/client/js/libs/handlebars/ircmessageparser/fill.js new file mode 100644 index 00000000..2cc9f705 --- /dev/null +++ b/client/js/libs/handlebars/ircmessageparser/fill.js @@ -0,0 +1,29 @@ +"use strict"; + +function fill(existingEntries, text) { + let position = 0; + const result = []; + + for (let i = 0; i < existingEntries.length; i++) { + const textSegment = existingEntries[i]; + + if (textSegment.start > position) { + result.push({ + start: position, + end: textSegment.start + }); + } + position = textSegment.end; + } + + if (position < text.length) { + result.push({ + start: position, + end: text.length + }); + } + + return result; +} + +module.exports = fill; diff --git a/client/js/libs/handlebars/ircmessageparser/findChannels.js b/client/js/libs/handlebars/ircmessageparser/findChannels.js new file mode 100644 index 00000000..b613415c --- /dev/null +++ b/client/js/libs/handlebars/ircmessageparser/findChannels.js @@ -0,0 +1,32 @@ +"use strict"; + +const escapeRegExp = require("lodash/escapeRegExp"); + +// NOTE: channel prefixes should be RPL_ISUPPORT.CHANTYPES +// NOTE: userModes should be RPL_ISUPPORT.PREFIX +function findChannels(text, channelPrefixes, userModes) { + const userModePattern = userModes.map(escapeRegExp).join(""); + const channelPrefixPattern = channelPrefixes.map(escapeRegExp).join(""); + + const channelPattern = `(?:^|\\s)[${ userModePattern }]*([${ channelPrefixPattern }][^ \u0007]+)`; + const channelRegExp = new RegExp(channelPattern, "g"); + + const result = []; + let match; + + do { + match = channelRegExp.exec(text); + + if (match) { + result.push({ + start: match.index + match[0].length - match[1].length, + end: match.index + match[0].length, + channel: match[1] + }); + } + } while (match); + + return result; +} + +module.exports = findChannels; diff --git a/client/js/libs/handlebars/ircmessageparser/merge.js b/client/js/libs/handlebars/ircmessageparser/merge.js new file mode 100644 index 00000000..3da520e8 --- /dev/null +++ b/client/js/libs/handlebars/ircmessageparser/merge.js @@ -0,0 +1,47 @@ +"use strict"; + +const anyIntersection = require("./anyIntersection"); +const fill = require("./fill"); + +let Object_assign = Object.assign; + +if (typeof Object_assign !== "function") { + Object_assign = function(target) { + Array.prototype.slice.call(arguments, 1).forEach(function(obj) { + Object.keys(obj).forEach(function(key) { + target[key] = obj[key]; + }); + }); + return target; + }; +} + +function assign(textPart, fragment) { + const fragStart = fragment.start; + const start = Math.max(fragment.start, textPart.start); + const end = Math.min(fragment.end, textPart.end); + + return Object_assign({}, fragment, { + start: start, + end: end, + text: fragment.text.slice(start - fragStart, end - fragStart) + }); +} + +function merge(textParts, styleFragments) { + const cleanText = styleFragments.map(fragment => fragment.text).join(""); + + const allParts = textParts + .concat(fill(textParts, cleanText)) + .sort((a, b) => a.start - b.start); + + return allParts.map(textPart => { + textPart.fragments = styleFragments + .filter(fragment => anyIntersection(textPart, fragment)) + .map(fragment => assign(textPart, fragment)); + + return textPart; + }); +} + +module.exports = merge; diff --git a/client/js/libs/handlebars/ircmessageparser/parseStyle.js b/client/js/libs/handlebars/ircmessageparser/parseStyle.js new file mode 100644 index 00000000..54e1c191 --- /dev/null +++ b/client/js/libs/handlebars/ircmessageparser/parseStyle.js @@ -0,0 +1,131 @@ +"use strict"; + +const BOLD = "\x02"; +const COLOR = "\x03"; +const RESET = "\x0f"; +const REVERSE = "\x16"; +const ITALIC = "\x1d"; +const UNDERLINE = "\x1f"; + +const colorRx = /^(\d{1,2})(?:,(\d{1,2}))?/; +const controlCodesRx = /[\u0000-\u001F]/g; + +function parseStyle(text) { + const result = []; + let start = 0; + let position = 0; + + let colorCodes, bold, textColor, bgColor, reverse, italic, underline; + + const resetStyle = () => { + bold = false; + textColor = undefined; + bgColor = undefined; + reverse = false; + italic = false; + underline = false; + }; + resetStyle(); + + const emitFragment = () => { + const textPart = text.slice(start, position); + start = position + 1; + + const processedText = textPart.replace(controlCodesRx, ""); + + if (!processedText.length) { + return; + } + + result.push({ + bold, + textColor, + bgColor, + reverse, + italic, + underline, + text: processedText + }); + }; + + while (position < text.length) { + switch (text[position]) { + + case RESET: + emitFragment(); + resetStyle(); + break; + + case BOLD: + emitFragment(); + bold = !bold; + break; + + case COLOR: + emitFragment(); + + colorCodes = text.slice(position + 1).match(colorRx); + + if (colorCodes) { + textColor = Number(colorCodes[1]); + bgColor = Number(colorCodes[2]); + if (Number.isNaN(bgColor)) { + bgColor = undefined; + } + position += colorCodes[0].length; + } else { + textColor = undefined; + bgColor = undefined; + } + start = position + 1; + break; + + case REVERSE: + emitFragment(); + reverse = !reverse; + break; + + case ITALIC: + emitFragment(); + italic = !italic; + break; + + case UNDERLINE: + emitFragment(); + underline = !underline; + break; + } + position += 1; + } + + emitFragment(); + + return result; +} + +const properties = ["bold", "textColor", "bgColor", "italic", "underline", "reverse"]; + +function prepare(text) { + return parseStyle(text) + .filter(fragment => fragment.text.length) + .reduce((prev, curr, i) => { + if (i === 0) { + return prev.concat([curr]); + } + + const lastEntry = prev[prev.length - 1]; + if (properties.some(key => curr[key] !== lastEntry[key])) { + return prev.concat([curr]); + } + + lastEntry.text += curr.text; + return prev; + }, []) + .map((fragment, i, array) => { + fragment.start = i === 0 ? 0 : array[i - 1].end; + fragment.end = fragment.start + fragment.text.length; + return fragment; + }); +} + +module.exports = prepare; diff --git a/client/js/libs/handlebars/parse.js b/client/js/libs/handlebars/parse.js index 45d5c8d2..8c6ae432 100644 --- a/client/js/libs/handlebars/parse.js +++ b/client/js/libs/handlebars/parse.js @@ -2,125 +2,113 @@ const Handlebars = require("handlebars/runtime"); const URI = require("urijs"); +const parseStyle = require("./ircmessageparser/parseStyle"); +const findChannels = require("./ircmessageparser/findChannels"); +const merge = require("./ircmessageparser/merge"); -module.exports = function(text) { - text = Handlebars.Utils.escapeExpression(text); - text = colors(text); - text = channels(text); - text = uri(text); - return text; -}; +const commonSchemes = [ + "http", "https", + "ftp", "sftp", + "smb", "file", + "irc", "ircs", + "svn", "git", + "steam", "mumble", "ts3server", + "svn+ssh", "ssh", +]; -function uri(text) { - return URI.withinString(text, function(url) { - if (url.indexOf("javascript:") === 0) { - return url; - } - var split = url.split("<"); - url = "" + split[0] + ""; - if (split.length > 1) { - url += "<" + split.slice(1).join("<"); - } - return url; - }); -} +function findLinks(text) { + let result = []; + let lastPosition = 0; -/** - * Channels names are strings of length up to fifty (50) characters. - * The only restriction on a channel name is that it SHALL NOT contain - * any spaces (' '), a control G (^G or ASCII 7), a comma (','). - * Channel prefix '&' is handled as '&' because this parser is executed - * after entities in the message have been escaped. This prevents a couple of bugs. - */ -function channels(text) { - return text.replace( - /(^|\s|\x07|,)((?:#|&)[^\x07\s,]{1,49})/g, - '$1$2' - ); -} - -/** - * MIRC compliant colour and style parser - * Unfortuanately this is a non trivial operation - * See this branch for source and tests - * https://github.com/megawac/irc-style-parser/tree/shout - */ -var styleCheck_Re = /[\x00-\x1F]/, - back_re = /^([0-9]{1,2})(,([0-9]{1,2}))?/, - colourKey = "\x03", - // breaks all open styles ^O (\x0F) - styleBreak = "\x0F"; - -function styleTemplate(settings) { - return "" + settings.text + ""; -} - -var styles = [ - ["normal", "\x00", ""], ["underline", "\x1F"], - ["bold", "\x02"], ["italic", "\x1D"] -].map(function(style) { - var escaped = encodeURI(style[1]).replace("%", "\\x"); - return { - name: style[0], - style: style[2] ? style[2] : "irc-" + style[0], - key: style[1], - keyregex: new RegExp(escaped + "(.*?)(" + escaped + "|$)") - }; -}); - -function colors(line) { - // http://www.mirc.com/colors.html - // http://www.aviran.org/stripremove-irc-client-control-characters/ - // https://github.com/perl6/mu/blob/master/examples/rules/Grammar-IRC.pm - // regexs are cruel to parse this thing - - // already done? - if (!styleCheck_Re.test(line)) { - return line; - } - - // split up by the irc style break character ^O - if (line.indexOf(styleBreak) >= 0) { - return line.split(styleBreak).map(colors).join(""); - } - - var result = line; - var parseArr = result.split(colourKey); - var text, match, colour, background = ""; - for (var i = 0; i < parseArr.length; i++) { - text = parseArr[i]; - match = text.match(back_re); - if (!match) { - // ^C (no colour) ending. Escape current colour and carry on - background = ""; - continue; - } - colour = "irc-fg" + +match[1]; - // set the background colour - if (match[3]) { - background = " irc-bg" + +match[3]; - } - // update the parsed text result - result = result.replace(colourKey + text, styleTemplate({ - style: colour + background, - text: text.slice(match[0].length) - })); - } - - // Matching styles (italics/bold/underline) - // if only colours were this easy... - styles.forEach(function(style) { - if (result.indexOf(style.key) < 0) { + URI.withinString(text, function(url, start, end) { + // v-- fix: url was modified and does not match input string -> cant be mapped + if (text.indexOf(url, lastPosition) < 0) { return; } + // ^-- /fix: url was modified and does not match input string -> cant be mapped - result = result.replace(style.keyregex, function(matchedTrash, matchedText) { - return styleTemplate({ - style: style.style, - text: matchedText - }); + // v-- fix: use prefered scheme + const parsed = URI(url); + const parsedScheme = parsed.scheme().toLowerCase(); + const matchedScheme = commonSchemes.find(scheme => parsedScheme.endsWith(scheme)); + + if (matchedScheme) { + const prefix = parsedScheme.length - matchedScheme.length; + start += prefix; + url = url.slice(prefix); + } + // ^-- /fix: use prefered scheme + + // URL matched, but does not start with a protocol, add it + if (!parsedScheme.length) { + url = "http://" + url; + } + + result.push({ + start: start, + end: end, + link: url }); }); return result; } + +function createFragment(fragment) { + let className = ""; + if (fragment.bold) { + className += " irc-bold"; + } + if (fragment.textColor !== undefined) { + className += " irc-fg" + fragment.textColor; + } + if (fragment.bgColor !== undefined) { + className += " irc-bg" + fragment.bgColor; + } + if (fragment.italic) { + className += " irc-italic"; + } + if (fragment.underline) { + className += " irc-underline"; + } + const escapedText = Handlebars.Utils.escapeExpression(fragment.text); + if (className) { + return "" + escapedText + ""; + } + return escapedText; +} + +module.exports = function parse(text) { + const styleFragments = parseStyle(text); + const cleanText = styleFragments.map(fragment => fragment.text).join(""); + + const channelPrefixes = ["#", "&"]; // RPL_ISUPPORT.CHANTYPES + const userModes = ["!", "@", "%", "+"]; // RPL_ISUPPORT.PREFIX + const channelParts = findChannels(cleanText, channelPrefixes, userModes); + + const linkParts = findLinks(cleanText); + + const parts = channelParts + .concat(linkParts) + .sort((a, b) => a.start - b.start); + + return merge(parts, styleFragments).map(textPart => { + const fragments = textPart.fragments.map(createFragment).join(""); + + if (textPart.link) { + const escapedLink = Handlebars.Utils.escapeExpression(textPart.link); + return ( + "" + + fragments + + ""); + } else if (textPart.channel) { + const escapedChannel = Handlebars.Utils.escapeExpression(textPart.channel); + return ( + "" + + fragments + + ""); + } + + return fragments; + }).join(""); +}; From eb1360c3af3c2f02cc3facbd843e7e34af3091e9 Mon Sep 17 00:00:00 2001 From: Bonuspunkt Date: Sat, 18 Mar 2017 10:18:47 +0200 Subject: [PATCH 2/6] Add message parser tests --- .../ircmessageparser/anyIntersection.js | 30 ++ .../ircmessageparser/findChannels.js | 123 +++++++ .../libs/handlebars/ircmessageparser/merge.js | 63 ++++ .../handlebars/ircmessageparser/parseStyle.js | 274 ++++++++++++++ test/client/js/libs/handlebars/parse.js | 336 ++++++++++++++++++ 5 files changed, 826 insertions(+) create mode 100644 test/client/js/libs/handlebars/ircmessageparser/anyIntersection.js create mode 100644 test/client/js/libs/handlebars/ircmessageparser/findChannels.js create mode 100644 test/client/js/libs/handlebars/ircmessageparser/merge.js create mode 100644 test/client/js/libs/handlebars/ircmessageparser/parseStyle.js create mode 100644 test/client/js/libs/handlebars/parse.js diff --git a/test/client/js/libs/handlebars/ircmessageparser/anyIntersection.js b/test/client/js/libs/handlebars/ircmessageparser/anyIntersection.js new file mode 100644 index 00000000..b80a44ed --- /dev/null +++ b/test/client/js/libs/handlebars/ircmessageparser/anyIntersection.js @@ -0,0 +1,30 @@ +"use strict"; + +const expect = require("chai").expect; +const anyIntersection = require("../../../../../../client/js/libs/handlebars/ircmessageparser/anyIntersection"); + +describe("anyIntersection", () => { + it("should not intersect on edges", () => { + const a = {start: 1, end: 2}; + const b = {start: 2, end: 3}; + + expect(anyIntersection(a, b)).to.equal(false); + expect(anyIntersection(b, a)).to.equal(false); + }); + + it("should intersect on overlapping", () => { + const a = {start: 0, end: 3}; + const b = {start: 1, end: 2}; + + expect(anyIntersection(a, b)).to.equal(true); + expect(anyIntersection(b, a)).to.equal(true); + }); + + it("should not intersect", () => { + const a = {start: 0, end: 1}; + const b = {start: 2, end: 3}; + + expect(anyIntersection(a, b)).to.equal(false); + expect(anyIntersection(b, a)).to.equal(false); + }); +}); diff --git a/test/client/js/libs/handlebars/ircmessageparser/findChannels.js b/test/client/js/libs/handlebars/ircmessageparser/findChannels.js new file mode 100644 index 00000000..93c119ee --- /dev/null +++ b/test/client/js/libs/handlebars/ircmessageparser/findChannels.js @@ -0,0 +1,123 @@ +"use strict"; + +const expect = require("chai").expect; +const analyseText = require("../../../../../../client/js/libs/handlebars/ircmessageparser/findChannels"); + +describe("findChannels", () => { + it("should find single letter channel", () => { + const input = "#a"; + const expected = [{ + channel: "#a", + start: 0, + end: 2 + }]; + + const actual = analyseText(input, ["#"], ["@", "+"]); + + expect(actual).to.deep.equal(expected); + }); + + it("should find utf8 channels", () => { + const input = "#äöü"; + const expected = [{ + channel: "#äöü", + start: 0, + end: 4 + }]; + + const actual = analyseText(input, ["#"], ["@", "+"]); + + expect(actual).to.deep.equal(expected); + }); + + it("should find inline channel", () => { + const input = "inline #channel text"; + const expected = [{ + channel: "#channel", + start: 7, + end: 15 + }]; + + const actual = analyseText(input, ["#"], ["@", "+"]); + + expect(actual).to.deep.equal(expected); + }); + + it("should stop at \\0x07", () => { + const input = "#chan\x07nel"; + const expected = [{ + channel: "#chan", + start: 0, + end: 5 + }]; + + const actual = analyseText(input, ["#"], ["@", "+"]); + + expect(actual).to.deep.equal(expected); + }); + + it("should allow classics pranks", () => { + const input = "#1,000"; + const expected = [{ + channel: "#1,000", + start: 0, + end: 6 + }]; + + const actual = analyseText(input, ["#"], ["@", "+"]); + + expect(actual).to.deep.equal(expected); + }); + + it("should work with whois reponses", () => { + const input = "@#a"; + const expected = [{ + channel: "#a", + start: 1, + end: 3 + }]; + + const actual = analyseText(input, ["#"], ["@", "+"]); + + expect(actual).to.deep.equal(expected); + }); + + it("should work with IRCv3.1 multi-prefix", () => { + const input = "!@%+#a"; + const expected = [{ + channel: "#a", + start: 4, + end: 6 + }]; + + const actual = analyseText(input, ["#"], ["!", "@", "%", "+"]); + + expect(actual).to.deep.equal(expected); + }); + + it("should work with custom channelPrefixes", () => { + const input = "@a"; + const expected = [{ + channel: "@a", + start: 0, + end: 2 + }]; + + const actual = analyseText(input, ["@"], ["#", "+"]); + + expect(actual).to.deep.equal(expected); + }); + + it("should handle multiple channelPrefix correctly", () => { + const input = "##test"; + const expected = [{ + channel: "##test", + start: 0, + end: 6 + }]; + + const actual = analyseText(input, ["#"], ["@", "+"]); + + expect(actual).to.deep.equal(expected); + }); +}); diff --git a/test/client/js/libs/handlebars/ircmessageparser/merge.js b/test/client/js/libs/handlebars/ircmessageparser/merge.js new file mode 100644 index 00000000..d55ac1a2 --- /dev/null +++ b/test/client/js/libs/handlebars/ircmessageparser/merge.js @@ -0,0 +1,63 @@ +"use strict"; + +const expect = require("chai").expect; +const merge = require("../../../../../../client/js/libs/handlebars/ircmessageparser/merge"); + +describe("merge", () => { + it("should split style information", () => { + const textParts = [{ + start: 0, + end: 10, + flag1: true + }, { + start: 10, + end: 20, + flag2: true + }]; + const styleFragments = [{ + start: 0, + end: 5, + text: "01234" + }, { + start: 5, + end: 15, + text: "5678901234" + }, { + start: 15, + end: 20, + text: "56789" + }]; + + const expected = [{ + start: 0, + end: 10, + flag1: true, + fragments: [{ + start: 0, + end: 5, + text: "01234" + }, { + start: 5, + end: 10, + text: "56789" + }] + }, { + start: 10, + end: 20, + flag2: true, + fragments: [{ + start: 10, + end: 15, + text: "01234" + }, { + start: 15, + end: 20, + text: "56789" + }] + }]; + + const actual = merge(textParts, styleFragments); + + expect(actual).to.deep.equal(expected); + }); +}); diff --git a/test/client/js/libs/handlebars/ircmessageparser/parseStyle.js b/test/client/js/libs/handlebars/ircmessageparser/parseStyle.js new file mode 100644 index 00000000..6af289c4 --- /dev/null +++ b/test/client/js/libs/handlebars/ircmessageparser/parseStyle.js @@ -0,0 +1,274 @@ +"use strict"; + +const expect = require("chai").expect; +const parseStyle = require("../../../../../../client/js/libs/handlebars/ircmessageparser/parseStyle"); + +describe("parseStyle", () => { + it("should skip control codes", () => { + const input = "text\x01with\x04control\x05codes"; + const expected = [{ + bold: false, + textColor: undefined, + bgColor: undefined, + reverse: false, + italic: false, + underline: false, + text: "textwithcontrolcodes", + + start: 0, + end: 20 + }]; + + const actual = parseStyle(input); + + expect(actual).to.deep.equal(expected); + }); + + it("should parse bold", () => { + const input = "\x02bold"; + const expected = [{ + bold: true, + textColor: undefined, + bgColor: undefined, + reverse: false, + italic: false, + underline: false, + text: "bold", + + start: 0, + end: 4 + }]; + + const actual = parseStyle(input); + + expect(actual).to.deep.equal(expected); + }); + + it("should parse textColor", () => { + const input = "\x038yellowText"; + const expected = [{ + bold: false, + textColor: 8, + bgColor: undefined, + reverse: false, + italic: false, + underline: false, + text: "yellowText", + + start: 0, + end: 10 + }]; + + const actual = parseStyle(input); + + expect(actual).to.deep.equal(expected); + }); + + it("should parse textColor and background", () => { + const input = "\x034,8yellowBG redText"; + const expected = [{ + textColor: 4, + bgColor: 8, + bold: false, + reverse: false, + italic: false, + underline: false, + text: "yellowBG redText", + + start: 0, + end: 16 + }]; + + const actual = parseStyle(input); + + expect(actual).to.deep.equal(expected); + }); + + it("should parse italic", () => { + const input = "\x1ditalic"; + const expected = [{ + bold: false, + textColor: undefined, + bgColor: undefined, + reverse: false, + italic: true, + underline: false, + text: "italic", + + start: 0, + end: 6 + }]; + + const actual = parseStyle(input); + + expect(actual).to.deep.equal(expected); + }); + + it("should carry state corretly forward", () => { + const input = "\x02bold\x038yellow\x02nonBold\x03default"; + const expected = [{ + bold: true, + textColor: undefined, + bgColor: undefined, + reverse: false, + italic: false, + underline: false, + text: "bold", + + start: 0, + end: 4 + }, { + bold: true, + textColor: 8, + bgColor: undefined, + reverse: false, + italic: false, + underline: false, + text: "yellow", + + start: 4, + end: 10 + }, { + bold: false, + textColor: 8, + bgColor: undefined, + reverse: false, + italic: false, + underline: false, + text: "nonBold", + + start: 10, + end: 17 + }, { + bold: false, + textColor: undefined, + bgColor: undefined, + reverse: false, + italic: false, + underline: false, + text: "default", + + start: 17, + end: 24 + }]; + + const actual = parseStyle(input); + + expect(actual).to.deep.equal(expected); + }); + + it("should toggle bold correctly", () => { + const input = "\x02bold\x02 \x02bold\x02"; + const expected = [{ + bold: true, + textColor: undefined, + bgColor: undefined, + reverse: false, + italic: false, + underline: false, + text: "bold", + + start: 0, + end: 4 + }, { + bold: false, + textColor: undefined, + bgColor: undefined, + reverse: false, + italic: false, + underline: false, + text: " ", + + start: 4, + end: 5 + }, { + bold: true, + textColor: undefined, + bgColor: undefined, + reverse: false, + italic: false, + underline: false, + text: "bold", + + start: 5, + end: 9 + }]; + + const actual = parseStyle(input); + + expect(actual).to.deep.equal(expected); + }); + + it("should reset all styles", () => { + const input = "\x02\x034\x16\x1d\x1ffull\x0fnone"; + const expected = [{ + bold: true, + textColor: 4, + bgColor: undefined, + reverse: true, + italic: true, + underline: true, + text: "full", + + start: 0, + end: 4 + }, { + bold: false, + textColor: undefined, + bgColor: undefined, + reverse: false, + italic: false, + underline: false, + text: "none", + + start: 4, + end: 8 + }]; + + const actual = parseStyle(input); + + expect(actual).to.deep.equal(expected); + }); + + it("should not emit empty fragments", () => { + const input = "\x031\x031,2\x031\x031,2\x031\x031,2\x03a"; + const expected = [{ + bold: false, + textColor: undefined, + bgColor: undefined, + reverse: false, + italic: false, + underline: false, + text: "a", + + start: 0, + end: 1 + }]; + + const actual = parseStyle(input); + + expect(actual).to.deep.equal(expected); + }); + + it("should optimize fragments", () => { + const rawString = "oh hi test text"; + const colorCode = "\x0312"; + const input = colorCode + rawString.split("").join(colorCode); + const expected = [{ + bold: false, + textColor: 12, + bgColor: undefined, + reverse: false, + italic: false, + underline: false, + text: rawString, + + start: 0, + end: rawString.length + }]; + + const actual = parseStyle(input); + + expect(actual).to.deep.equal(expected); + }); +}); diff --git a/test/client/js/libs/handlebars/parse.js b/test/client/js/libs/handlebars/parse.js new file mode 100644 index 00000000..7d1e025a --- /dev/null +++ b/test/client/js/libs/handlebars/parse.js @@ -0,0 +1,336 @@ +"use strict"; + +const expect = require("chai").expect; +const parse = require("../../../../../client/js/libs/handlebars/parse"); + +describe("parse Handlebars helper", () => { + it("should not introduce xss", () => { + const testCases = [{ + input: "", + expected: "<img onerror='location.href="//youtube.com"'>" + }, { + input: "#&\">bug", + expected: "#&">bug" + }]; + + const actual = testCases.map(testCase => parse(testCase.input)); + const expected = testCases.map(testCase => testCase.expected); + + expect(actual).to.deep.equal(expected); + }); + + it("should skip control codes", () => { + const testCases = [{ + input: "text\x01with\x04control\x05codes", + expected: "textwithcontrolcodes" + }]; + + const actual = testCases.map(testCase => parse(testCase.input)); + const expected = testCases.map(testCase => testCase.expected); + + expect(actual).to.deep.equal(expected); + }); + + it("should find urls", () => { + const testCases = [{ + input: "irc://freenode.net/thelounge", + expected: + "" + + "irc://freenode.net/thelounge" + + "" + }, { + input: "www.nooooooooooooooo.com", + expected: + "" + + "www.nooooooooooooooo.com" + + "" + }, { + input: "look at https://thelounge.github.io/ for more information", + expected: + "look at " + + "" + + "https://thelounge.github.io/" + + "" + + " for more information", + }, { + input: "use www.duckduckgo.com for privacy reasons", + expected: + "use " + + "" + + "www.duckduckgo.com" + + "" + + " for privacy reasons" + }, { + input: "svn+ssh://example.org", + expected: + "" + + "svn+ssh://example.org" + + "" + }]; + + const actual = testCases.map(testCase => parse(testCase.input)); + const expected = testCases.map(testCase => testCase.expected); + + expect(actual).to.deep.equal(expected); + }); + + it("url with a dot parsed correctly", () => { + const input = + "bonuspunkt: your URL parser misparses this URL: https://msdn.microsoft.com/en-us/library/windows/desktop/ms644989(v=vs.85).aspx"; + const correctResult = + "bonuspunkt: your URL parser misparses this URL: " + + "" + + "https://msdn.microsoft.com/en-us/library/windows/desktop/ms644989(v=vs.85).aspx" + + ""; + + const actual = parse(input); + + expect(actual).to.deep.equal(correctResult); + }); + + it("should balance brackets", () => { + const testCases = [{ + input: "", + expected: + "<" + + "" + + "https://theos.kyriasis.com/~kyrias/stats/archlinux.html" + + "" + + ">" + }, { + input: "abc (www.example.com)", + expected: + "abc (" + + "" + + "www.example.com" + + "" + + ")" + }, { + input: "http://example.com/Test_(Page)", + expected: + "" + + "http://example.com/Test_(Page)" + + "" + }, { + input: "www.example.com/Test_(Page)", + expected: + "" + + "www.example.com/Test_(Page)" + + "" + }]; + + const actual = testCases.map(testCase => parse(testCase.input)); + const expected = testCases.map(testCase => testCase.expected); + + expect(actual).to.deep.equal(expected); + }); + + it("should not find urls", () => { + const testCases = [{ + input: "text www. text", + expected: "text www. text" + }, { + input: "http://.", + expected: "http://." + }]; + + const actual = testCases.map(testCase => parse(testCase.input)); + const expected = testCases.map(testCase => testCase.expected); + + expect(actual).to.deep.equal(expected); + }); + + it("should find channels", () => { + const testCases = [{ + input: "#a", + expected: + "" + + "#a" + + "" + }, { + input: "#test", + expected: + "" + + "#test" + + "" + }, { + input: "#äöü", + expected: + "" + + "#äöü" + + "" + }, { + input: "inline #channel text", + expected: + "inline " + + "" + + "#channel" + + "" + + " text" + }, { + input: "#1,000", + expected: + "" + + "#1,000" + + "" + }, { + input: "@#a", + expected: + "@" + + "" + + "#a" + + "" + }]; + + const actual = testCases.map(testCase => parse(testCase.input)); + const expected = testCases.map(testCase => testCase.expected); + + expect(actual).to.deep.equal(expected); + }); + + it("should not find channels", () => { + const testCases = [{ + input: "hi#test", + expected: "hi#test" + }, { + input: "#", + expected: "#" + }]; + + const actual = testCases.map(testCase => parse(testCase.input)); + const expected = testCases.map(testCase => testCase.expected); + + expect(actual).to.deep.equal(expected); + }); + + it("should style like mirc", () => { + const testCases = [{ + input: "\x02bold", + expected: "bold" + }, { + input: "\x038yellowText", + expected: "yellowText" + }, { + input: "\x030,0white,white", + expected: "white,white" + }, { + input: "\x034,8yellowBGredText", + expected: "yellowBGredText" + }, { + input: "\x1ditalic", + expected: "italic" + }, { + input: "\x1funderline", + expected: "underline" + }, { + input: "\x02bold\x038yellow\x02nonBold\x03default", + expected: + "bold" + + "yellow" + + "nonBold" + + "default" + }, { + input: "\x02bold\x02 \x02bold\x02", + expected: + "bold" + + " " + + "bold" + }]; + + const actual = testCases.map(testCase => parse(testCase.input)); + const expected = testCases.map(testCase => testCase.expected); + + expect(actual).to.deep.equal(expected); + }); + + it("should go bonkers like mirc", () => { + const testCases = [{ + input: "\x02irc\x0f://\x1dfreenode.net\x0f/\x034,8thelounge", + expected: + "" + + "irc" + + "://" + + "freenode.net" + + "/" + + "thelounge" + + "" + }, { + input: "\x02#\x038,9thelounge", + expected: + "" + + "#" + + "thelounge" + + "" + }]; + + const actual = testCases.map(testCase => parse(testCase.input)); + const expected = testCases.map(testCase => testCase.expected); + + expect(actual).to.deep.equal(expected); + }); + + it("should optimize generated html", () => { + const testCases = [{ + input: "test \x0312#\x0312\x0312\"te\x0312st\x0312\x0312\x0312\x0312\x0312\x0312\x0312\x0312\x0312\x0312\x0312a", + expected: + "test " + + "" + + "#"testa" + + "" + }]; + + const actual = testCases.map(testCase => parse(testCase.input)); + const expected = testCases.map(testCase => testCase.expected); + + expect(actual).to.deep.equal(expected); + }); + + it("should trim commom protocols", () => { + const testCases = [{ + input: "like..http://example.com", + expected: + "like.." + + "" + + "http://example.com" + + "" + }, { + input: "like..HTTP://example.com", + expected: + "like.." + + "" + + "HTTP://example.com" + + "" + }]; + + const actual = testCases.map(testCase => parse(testCase.input)); + const expected = testCases.map(testCase => testCase.expected); + + expect(actual).to.deep.equal(expected); + }); + + it("should not find channel in fragment", () => { + const testCases = [{ + input: "http://example.com/#hash", + expected: + "" + + "" + + "http://example.com/#hash" + + "" + }]; + + const actual = testCases.map(testCase => parse(testCase.input)); + const expected = testCases.map(testCase => testCase.expected); + + expect(actual).to.deep.equal(expected); + }); + + it("should not overlap parts", () => { + const input = "Url: http://example.com/path Channel: ##channel"; + const actual = parse(input); + + expect(actual).to.equal( + "Url: http://example.com/path " + + "Channel: ##channel" + ); + }); +}); From 5b4c00d8ca595a1c392f70893d0ca9830ce4028c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9mie=20Astori?= Date: Mon, 17 Apr 2017 23:28:35 -0400 Subject: [PATCH 3/6] Extract findLinks into its own file and add tests Tests were taken from https://github.com/Bonuspunkt/ircmessageparser/blob/5a249c30b1e0379e5df62f10734a3dc7ce02306b/test/findLinks.js. The underlying code is different but the tests are the same. --- .../handlebars/ircmessageparser/findLinks.js | 53 +++++++++ client/js/libs/handlebars/parse.js | 50 +-------- .../handlebars/ircmessageparser/findLinks.js | 106 ++++++++++++++++++ 3 files changed, 160 insertions(+), 49 deletions(-) create mode 100644 client/js/libs/handlebars/ircmessageparser/findLinks.js create mode 100644 test/client/js/libs/handlebars/ircmessageparser/findLinks.js diff --git a/client/js/libs/handlebars/ircmessageparser/findLinks.js b/client/js/libs/handlebars/ircmessageparser/findLinks.js new file mode 100644 index 00000000..031afa17 --- /dev/null +++ b/client/js/libs/handlebars/ircmessageparser/findLinks.js @@ -0,0 +1,53 @@ +"use strict"; + +const URI = require("urijs"); + +const commonSchemes = [ + "http", "https", + "ftp", "sftp", + "smb", "file", + "irc", "ircs", + "svn", "git", + "steam", "mumble", "ts3server", + "svn+ssh", "ssh", +]; + +function findLinks(text) { + let result = []; + let lastPosition = 0; + + URI.withinString(text, function(url, start, end) { + // v-- fix: url was modified and does not match input string -> cant be mapped + if (text.indexOf(url, lastPosition) < 0) { + return; + } + // ^-- /fix: url was modified and does not match input string -> cant be mapped + + // v-- fix: use prefered scheme + const parsed = URI(url); + const parsedScheme = parsed.scheme().toLowerCase(); + const matchedScheme = commonSchemes.find(scheme => parsedScheme.endsWith(scheme)); + + if (matchedScheme) { + const prefix = parsedScheme.length - matchedScheme.length; + start += prefix; + url = url.slice(prefix); + } + // ^-- /fix: use prefered scheme + + // URL matched, but does not start with a protocol, add it + if (!parsedScheme.length) { + url = "http://" + url; + } + + result.push({ + start: start, + end: end, + link: url + }); + }); + + return result; +} + +module.exports = findLinks; diff --git a/client/js/libs/handlebars/parse.js b/client/js/libs/handlebars/parse.js index 8c6ae432..fa21b898 100644 --- a/client/js/libs/handlebars/parse.js +++ b/client/js/libs/handlebars/parse.js @@ -1,59 +1,11 @@ "use strict"; const Handlebars = require("handlebars/runtime"); -const URI = require("urijs"); const parseStyle = require("./ircmessageparser/parseStyle"); const findChannels = require("./ircmessageparser/findChannels"); +const findLinks = require("./ircmessageparser/findLinks"); const merge = require("./ircmessageparser/merge"); -const commonSchemes = [ - "http", "https", - "ftp", "sftp", - "smb", "file", - "irc", "ircs", - "svn", "git", - "steam", "mumble", "ts3server", - "svn+ssh", "ssh", -]; - -function findLinks(text) { - let result = []; - let lastPosition = 0; - - URI.withinString(text, function(url, start, end) { - // v-- fix: url was modified and does not match input string -> cant be mapped - if (text.indexOf(url, lastPosition) < 0) { - return; - } - // ^-- /fix: url was modified and does not match input string -> cant be mapped - - // v-- fix: use prefered scheme - const parsed = URI(url); - const parsedScheme = parsed.scheme().toLowerCase(); - const matchedScheme = commonSchemes.find(scheme => parsedScheme.endsWith(scheme)); - - if (matchedScheme) { - const prefix = parsedScheme.length - matchedScheme.length; - start += prefix; - url = url.slice(prefix); - } - // ^-- /fix: use prefered scheme - - // URL matched, but does not start with a protocol, add it - if (!parsedScheme.length) { - url = "http://" + url; - } - - result.push({ - start: start, - end: end, - link: url - }); - }); - - return result; -} - function createFragment(fragment) { let className = ""; if (fragment.bold) { diff --git a/test/client/js/libs/handlebars/ircmessageparser/findLinks.js b/test/client/js/libs/handlebars/ircmessageparser/findLinks.js new file mode 100644 index 00000000..f3f228f2 --- /dev/null +++ b/test/client/js/libs/handlebars/ircmessageparser/findLinks.js @@ -0,0 +1,106 @@ +"use strict"; + +const expect = require("chai").expect; +const findLinks = require("../../../../../../client/js/libs/handlebars/ircmessageparser/findLinks"); + +describe("findLinks", () => { + it("should find url", () => { + const input = "irc://freenode.net/thelounge"; + const expected = [{ + start: 0, + end: 28, + link: "irc://freenode.net/thelounge", + }]; + + const actual = findLinks(input); + + expect(actual).to.deep.equal(expected); + }); + + it("should find urls with www", () => { + const input = "www.nooooooooooooooo.com"; + const expected = [{ + start: 0, + end: 24, + link: "http://www.nooooooooooooooo.com" + }]; + + const actual = findLinks(input); + + expect(actual).to.deep.equal(expected); + }); + + it("should find urls in strings", () => { + const input = "look at https://thelounge.github.io/ for more information"; + const expected = [{ + link: "https://thelounge.github.io/", + start: 8, + end: 36 + }]; + + const actual = findLinks(input); + + expect(actual).to.deep.equal(expected); + }); + + it("should find urls in strings starting with www", () => { + const input = "use www.duckduckgo.com for privacy reasons"; + const expected = [{ + link: "http://www.duckduckgo.com", + start: 4, + end: 22 + }]; + + const actual = findLinks(input); + + expect(actual).to.deep.equal(expected); + }); + + it("should find urls with odd surroundings", () => { + const input = ""; + const expected = [{ + link: "https://theos.kyriasis.com/~kyrias/stats/archlinux.html", + start: 1, + end: 56 + }]; + + const actual = findLinks(input); + + expect(actual).to.deep.equal(expected); + }); + + it("should find urls with starting with www. and odd surroundings", () => { + const input = ".:www.github.com:."; + const expected = [{ + link: "http://www.github.com", + start: 2, + end: 16 + }]; + + const actual = findLinks(input); + + expect(actual).to.deep.equal(expected); + }); + + it("should not find urls", () => { + const input = "text www. text"; + const expected = []; + + const actual = findLinks(input); + + expect(actual).to.deep.equal(expected); + }); + + it("should handle multiple www. correctly", () => { + const input = "www.www.test.com"; + const expected = [{ + link: "http://www.www.test.com", + start: 0, + end: 16 + }]; + + const actual = findLinks(input); + + expect(actual).to.deep.equal(expected); + }); +}); From 90f4a94bb246bd91a0c2d2e2e54f717d16f8ba32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9mie=20Astori?= Date: Tue, 18 Apr 2017 00:53:12 -0400 Subject: [PATCH 4/6] Use template literals in parse Also make it output double quotes for consistency with web stuff. --- client/js/libs/handlebars/parse.js | 26 +++----- test/client/js/libs/handlebars/parse.js | 84 ++++++++++++------------- 2 files changed, 52 insertions(+), 58 deletions(-) diff --git a/client/js/libs/handlebars/parse.js b/client/js/libs/handlebars/parse.js index fa21b898..7e9aebb8 100644 --- a/client/js/libs/handlebars/parse.js +++ b/client/js/libs/handlebars/parse.js @@ -7,25 +7,25 @@ const findLinks = require("./ircmessageparser/findLinks"); const merge = require("./ircmessageparser/merge"); function createFragment(fragment) { - let className = ""; + let classes = []; if (fragment.bold) { - className += " irc-bold"; + classes.push("irc-bold"); } if (fragment.textColor !== undefined) { - className += " irc-fg" + fragment.textColor; + classes.push("irc-fg" + fragment.textColor); } if (fragment.bgColor !== undefined) { - className += " irc-bg" + fragment.bgColor; + classes.push("irc-bg" + fragment.bgColor); } if (fragment.italic) { - className += " irc-italic"; + classes.push("irc-italic"); } if (fragment.underline) { - className += " irc-underline"; + classes.push("irc-underline"); } const escapedText = Handlebars.Utils.escapeExpression(fragment.text); - if (className) { - return "" + escapedText + ""; + if (classes.length) { + return `${escapedText}`; } return escapedText; } @@ -49,16 +49,10 @@ module.exports = function parse(text) { if (textPart.link) { const escapedLink = Handlebars.Utils.escapeExpression(textPart.link); - return ( - "" + - fragments + - ""); + return `${fragments}`; } else if (textPart.channel) { const escapedChannel = Handlebars.Utils.escapeExpression(textPart.channel); - return ( - "" + - fragments + - ""); + return `${fragments}`; } return fragments; diff --git a/test/client/js/libs/handlebars/parse.js b/test/client/js/libs/handlebars/parse.js index 7d1e025a..d3737e98 100644 --- a/test/client/js/libs/handlebars/parse.js +++ b/test/client/js/libs/handlebars/parse.js @@ -10,7 +10,7 @@ describe("parse Handlebars helper", () => { expected: "<img onerror='location.href="//youtube.com"'>" }, { input: "#&\">bug", - expected: "#&">bug" + expected: "#&">bug" }]; const actual = testCases.map(testCase => parse(testCase.input)); @@ -35,20 +35,20 @@ describe("parse Handlebars helper", () => { const testCases = [{ input: "irc://freenode.net/thelounge", expected: - "" + + "" + "irc://freenode.net/thelounge" + "" }, { input: "www.nooooooooooooooo.com", expected: - "" + + "" + "www.nooooooooooooooo.com" + "" }, { input: "look at https://thelounge.github.io/ for more information", expected: "look at " + - "" + + "" + "https://thelounge.github.io/" + "" + " for more information", @@ -56,14 +56,14 @@ describe("parse Handlebars helper", () => { input: "use www.duckduckgo.com for privacy reasons", expected: "use " + - "" + + "" + "www.duckduckgo.com" + "" + " for privacy reasons" }, { input: "svn+ssh://example.org", expected: - "" + + "" + "svn+ssh://example.org" + "" }]; @@ -79,7 +79,7 @@ describe("parse Handlebars helper", () => { "bonuspunkt: your URL parser misparses this URL: https://msdn.microsoft.com/en-us/library/windows/desktop/ms644989(v=vs.85).aspx"; const correctResult = "bonuspunkt: your URL parser misparses this URL: " + - "" + + "" + "https://msdn.microsoft.com/en-us/library/windows/desktop/ms644989(v=vs.85).aspx" + ""; @@ -93,7 +93,7 @@ describe("parse Handlebars helper", () => { input: "", expected: "<" + - "" + + "" + "https://theos.kyriasis.com/~kyrias/stats/archlinux.html" + "" + ">" @@ -101,20 +101,20 @@ describe("parse Handlebars helper", () => { input: "abc (www.example.com)", expected: "abc (" + - "" + + "" + "www.example.com" + "" + ")" }, { input: "http://example.com/Test_(Page)", expected: - "" + + "" + "http://example.com/Test_(Page)" + "" }, { input: "www.example.com/Test_(Page)", expected: - "" + + "" + "www.example.com/Test_(Page)" + "" }]; @@ -144,40 +144,40 @@ describe("parse Handlebars helper", () => { const testCases = [{ input: "#a", expected: - "" + + "" + "#a" + "" }, { input: "#test", expected: - "" + + "" + "#test" + "" }, { input: "#äöü", expected: - "" + + "" + "#äöü" + "" }, { input: "inline #channel text", expected: "inline " + - "" + + "" + "#channel" + "" + " text" }, { input: "#1,000", expected: - "" + + "" + "#1,000" + "" }, { input: "@#a", expected: "@" + - "" + + "" + "#a" + "" }]; @@ -206,35 +206,35 @@ describe("parse Handlebars helper", () => { it("should style like mirc", () => { const testCases = [{ input: "\x02bold", - expected: "bold" + expected: "bold" }, { input: "\x038yellowText", - expected: "yellowText" + expected: "yellowText" }, { input: "\x030,0white,white", - expected: "white,white" + expected: "white,white" }, { input: "\x034,8yellowBGredText", - expected: "yellowBGredText" + expected: "yellowBGredText" }, { input: "\x1ditalic", - expected: "italic" + expected: "italic" }, { input: "\x1funderline", - expected: "underline" + expected: "underline" }, { input: "\x02bold\x038yellow\x02nonBold\x03default", expected: - "bold" + - "yellow" + - "nonBold" + + "bold" + + "yellow" + + "nonBold" + "default" }, { input: "\x02bold\x02 \x02bold\x02", expected: - "bold" + + "bold" + " " + - "bold" + "bold" }]; const actual = testCases.map(testCase => parse(testCase.input)); @@ -247,19 +247,19 @@ describe("parse Handlebars helper", () => { const testCases = [{ input: "\x02irc\x0f://\x1dfreenode.net\x0f/\x034,8thelounge", expected: - "" + - "irc" + + "" + + "irc" + "://" + - "freenode.net" + + "freenode.net" + "/" + - "thelounge" + + "thelounge" + "" }, { input: "\x02#\x038,9thelounge", expected: - "" + - "#" + - "thelounge" + + "" + + "#" + + "thelounge" + "" }]; @@ -274,8 +274,8 @@ describe("parse Handlebars helper", () => { input: "test \x0312#\x0312\x0312\"te\x0312st\x0312\x0312\x0312\x0312\x0312\x0312\x0312\x0312\x0312\x0312\x0312a", expected: "test " + - "" + - "#"testa" + + "" + + "#"testa" + "" }]; @@ -290,14 +290,14 @@ describe("parse Handlebars helper", () => { input: "like..http://example.com", expected: "like.." + - "" + + "" + "http://example.com" + "" }, { input: "like..HTTP://example.com", expected: "like.." + - "" + + "" + "HTTP://example.com" + "" }]; @@ -313,7 +313,7 @@ describe("parse Handlebars helper", () => { input: "http://example.com/#hash", expected: "" + - "" + + "" + "http://example.com/#hash" + "" }]; @@ -329,8 +329,8 @@ describe("parse Handlebars helper", () => { const actual = parse(input); expect(actual).to.equal( - "Url: http://example.com/path " + - "Channel: ##channel" + "Url: http://example.com/path " + + "Channel: ##channel" ); }); }); From 03e3444a352bc5045ff8f4669bc3ed565b369bf0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9mie=20Astori?= Date: Tue, 4 Apr 2017 00:36:03 -0400 Subject: [PATCH 5/6] Explain the modules of the message parser and add tests - Add comments and descriptions to: - `findChannels.js` - `parseStyle` - `findLinks` - `fill` - `anyIntersection` - `merge` - `parse` - Minor optimizations to `parseStyle` - Add tests for `fill` --- .../ircmessageparser/anyIntersection.js | 2 + .../libs/handlebars/ircmessageparser/fill.js | 17 ++-- .../ircmessageparser/findChannels.js | 19 +++- .../handlebars/ircmessageparser/findLinks.js | 20 +++- .../libs/handlebars/ircmessageparser/merge.js | 15 ++- .../handlebars/ircmessageparser/parseStyle.js | 98 ++++++++++++------- client/js/libs/handlebars/parse.js | 17 +++- .../libs/handlebars/ircmessageparser/fill.js | 50 ++++++++++ .../ircmessageparser/findChannels.js | 20 ++-- 9 files changed, 195 insertions(+), 63 deletions(-) create mode 100644 test/client/js/libs/handlebars/ircmessageparser/fill.js diff --git a/client/js/libs/handlebars/ircmessageparser/anyIntersection.js b/client/js/libs/handlebars/ircmessageparser/anyIntersection.js index 4fd0d239..a77e031d 100644 --- a/client/js/libs/handlebars/ircmessageparser/anyIntersection.js +++ b/client/js/libs/handlebars/ircmessageparser/anyIntersection.js @@ -1,5 +1,7 @@ "use strict"; +// Return true if any section of "a" or "b" parts (defined by their start/end +// markers) intersect each other, false otherwise. function anyIntersection(a, b) { return a.start <= b.start && b.start < a.end || a.start < b.end && b.end <= a.end || diff --git a/client/js/libs/handlebars/ircmessageparser/fill.js b/client/js/libs/handlebars/ircmessageparser/fill.js index 2cc9f705..7d90a96c 100644 --- a/client/js/libs/handlebars/ircmessageparser/fill.js +++ b/client/js/libs/handlebars/ircmessageparser/fill.js @@ -1,21 +1,26 @@ "use strict"; +// Create plain text entries corresponding to areas of the text that match no +// existing entries. Returns an empty array if all parts of the text have been +// parsed into recognizable entries already. function fill(existingEntries, text) { let position = 0; - const result = []; - - for (let i = 0; i < existingEntries.length; i++) { - const textSegment = existingEntries[i]; + // Fill inner parts of the text. For example, if text is `foobarbaz` and both + // `foo` and `baz` have matched into an entry, this will return a dummy entry + // corresponding to `bar`. + const result = existingEntries.reduce((acc, textSegment) => { if (textSegment.start > position) { - result.push({ + acc.push({ start: position, end: textSegment.start }); } position = textSegment.end; - } + return acc; + }, []); + // Complete the unmatched end of the text with a dummy entry if (position < text.length) { result.push({ start: position, diff --git a/client/js/libs/handlebars/ircmessageparser/findChannels.js b/client/js/libs/handlebars/ircmessageparser/findChannels.js index b613415c..6edd5dad 100644 --- a/client/js/libs/handlebars/ircmessageparser/findChannels.js +++ b/client/js/libs/handlebars/ircmessageparser/findChannels.js @@ -1,20 +1,31 @@ "use strict"; +// Escapes the RegExp special characters "^", "$", "", ".", "*", "+", "?", "(", +// ")", "[", "]", "{", "}", and "|" in string. +// See https://lodash.com/docs/#escapeRegExp const escapeRegExp = require("lodash/escapeRegExp"); -// NOTE: channel prefixes should be RPL_ISUPPORT.CHANTYPES -// NOTE: userModes should be RPL_ISUPPORT.PREFIX +// Given an array of channel prefixes (such as "#" and "&") and an array of user +// modes (such as "@" and "+"), this function extracts channels and nicks from a +// text. +// It returns an array of objects for each channel found with their start index, +// end index and channel name. function findChannels(text, channelPrefixes, userModes) { + // `userModePattern` is necessary to ignore user modes in /whois responses. + // For example, a voiced user in #thelounge will have a /whois response of: + // > foo is on the following channels: +#thelounge + // We need to explicitly ignore user modes to parse such channels correctly. const userModePattern = userModes.map(escapeRegExp).join(""); const channelPrefixPattern = channelPrefixes.map(escapeRegExp).join(""); - - const channelPattern = `(?:^|\\s)[${ userModePattern }]*([${ channelPrefixPattern }][^ \u0007]+)`; + const channelPattern = `(?:^|\\s)[${userModePattern}]*([${channelPrefixPattern}][^ \u0007]+)`; const channelRegExp = new RegExp(channelPattern, "g"); const result = []; let match; do { + // With global ("g") regexes, calling `exec` multiple times will find + // successive matches in the same string. match = channelRegExp.exec(text); if (match) { diff --git a/client/js/libs/handlebars/ircmessageparser/findLinks.js b/client/js/libs/handlebars/ircmessageparser/findLinks.js index 031afa17..9596a5a0 100644 --- a/client/js/libs/handlebars/ircmessageparser/findLinks.js +++ b/client/js/libs/handlebars/ircmessageparser/findLinks.js @@ -2,6 +2,9 @@ const URI = require("urijs"); +// Known schemes to detect in a text. If a text contains `foo...bar://foo.com`, +// the parsed scheme should be `foo...bar` but if it contains +// `foo...http://foo.com`, we assume the scheme to extract will be `http`. const commonSchemes = [ "http", "https", "ftp", "sftp", @@ -16,6 +19,10 @@ function findLinks(text) { let result = []; let lastPosition = 0; + // URI.withinString() identifies URIs within text, e.g. to translate them to + // -Tags. + // See https://medialize.github.io/URI.js/docs.html#static-withinString + // In our case, we store each URI encountered in a result array. URI.withinString(text, function(url, start, end) { // v-- fix: url was modified and does not match input string -> cant be mapped if (text.indexOf(url, lastPosition) < 0) { @@ -23,19 +30,22 @@ function findLinks(text) { } // ^-- /fix: url was modified and does not match input string -> cant be mapped - // v-- fix: use prefered scheme - const parsed = URI(url); - const parsedScheme = parsed.scheme().toLowerCase(); + // Extract the scheme of the URL detected, if there is one + const parsedScheme = URI(url).scheme().toLowerCase(); + + // Check if the scheme of the detected URL matches a common one above. + // In a URL like `foo..http://example.com`, the scheme would be `foo..http`, + // so we need to clean up the end of the scheme and filter out the rest. const matchedScheme = commonSchemes.find(scheme => parsedScheme.endsWith(scheme)); + // A known scheme was found, extract the unknown part from the URL if (matchedScheme) { const prefix = parsedScheme.length - matchedScheme.length; start += prefix; url = url.slice(prefix); } - // ^-- /fix: use prefered scheme - // URL matched, but does not start with a protocol, add it + // The URL matched but does not start with a scheme (`www.foo.com`), add it if (!parsedScheme.length) { url = "http://" + url; } diff --git a/client/js/libs/handlebars/ircmessageparser/merge.js b/client/js/libs/handlebars/ircmessageparser/merge.js index 3da520e8..893997cc 100644 --- a/client/js/libs/handlebars/ircmessageparser/merge.js +++ b/client/js/libs/handlebars/ircmessageparser/merge.js @@ -16,6 +16,7 @@ if (typeof Object_assign !== "function") { }; } +// Merge text part information within a styling fragment function assign(textPart, fragment) { const fragStart = fragment.start; const start = Math.max(fragment.start, textPart.start); @@ -28,13 +29,25 @@ function assign(textPart, fragment) { }); } +// Merge the style fragments withing the text parts, taking into account +// boundaries and text sections that have not matched to links or channels. +// For example, given a string "foobar" where "foo" and "bar" have been +// identified as parts (channels, links, etc.) and "fo", "ob" and "ar" have 3 +// different styles, the first resulting part will contain fragments "fo" and +// "o", and the second resulting part will contain "b" and "ar". "o" and "b" +// fragments will contain duplicate styling attributes. function merge(textParts, styleFragments) { - const cleanText = styleFragments.map(fragment => fragment.text).join(""); + // Re-build the overall text (without control codes) from the style fragments + const cleanText = styleFragments.reduce((acc, frag) => acc + frag.text, ""); + // Every section of the original text that has not been captured in a "part" + // is filled with "text" parts, dummy objects with start/end but no extra + // metadata. const allParts = textParts .concat(fill(textParts, cleanText)) .sort((a, b) => a.start - b.start); + // Distribute the style fragments within the text parts return allParts.map(textPart => { textPart.fragments = styleFragments .filter(fragment => anyIntersection(textPart, fragment)) diff --git a/client/js/libs/handlebars/ircmessageparser/parseStyle.js b/client/js/libs/handlebars/ircmessageparser/parseStyle.js index 54e1c191..d23d5bd6 100644 --- a/client/js/libs/handlebars/ircmessageparser/parseStyle.js +++ b/client/js/libs/handlebars/ircmessageparser/parseStyle.js @@ -1,5 +1,6 @@ "use strict"; +// Styling control codes const BOLD = "\x02"; const COLOR = "\x03"; const RESET = "\x0f"; @@ -7,14 +8,24 @@ const REVERSE = "\x16"; const ITALIC = "\x1d"; const UNDERLINE = "\x1f"; +// Color code matcher, with format `XX,YY` where both `XX` and `YY` are +// integers, `XX` is the text color and `YY` is an optional background color. const colorRx = /^(\d{1,2})(?:,(\d{1,2}))?/; + +// Represents all other control codes that to be ignored/filtered from the text const controlCodesRx = /[\u0000-\u001F]/g; +// Converts a given text into an array of objects, each of them representing a +// similarly styled section of the text. Each object carries the `text`, style +// information (`bold`, `textColor`, `bgcolor`, `reverse`, `italic`, +// `underline`), and `start`/`end` cursors. function parseStyle(text) { const result = []; let start = 0; let position = 0; + // At any given time, these carry style information since last time a styling + // control code was met. let colorCodes, bold, textColor, bgColor, reverse, italic, underline; const resetStyle = () => { @@ -27,27 +38,42 @@ function parseStyle(text) { }; resetStyle(); + // When called, this "closes" the current fragment by adding an entry to the + // `result` array using the styling information set last time a control code + // was met. const emitFragment = () => { + // Uses the text fragment starting from the last control code position up to + // the current position const textPart = text.slice(start, position); - start = position + 1; + // Filters out all non-style related control codes present in this text const processedText = textPart.replace(controlCodesRx, ""); - if (!processedText.length) { - return; + if (processedText.length) { + // Current fragment starts where the previous one ends, or at 0 if none + const fragmentStart = result.length ? result[result.length - 1].end : 0; + + result.push({ + bold, + textColor, + bgColor, + reverse, + italic, + underline, + text: processedText, + start: fragmentStart, + end: fragmentStart + processedText.length + }); } - result.push({ - bold, - textColor, - bgColor, - reverse, - italic, - underline, - text: processedText - }); + // Now that a fragment has been "closed", the next one will start after that + start = position + 1; }; + // This loop goes through each character of the given text one by one by + // bumping the `position` cursor. Every time a new special "styling" character + // is met, an object gets created (with `emitFragment()`)information on text + // encountered since the previous styling character. while (position < text.length) { switch (text[position]) { @@ -56,6 +82,10 @@ function parseStyle(text) { resetStyle(); break; + // Meeting a BOLD character means that the ongoing text is either going to + // be in bold or that the previous one was in bold and the following one + // must be reset. + // This same behavior applies to COLOR, REVERSE, ITALIC, and UNDERLINE. case BOLD: emitFragment(); bold = !bold; @@ -64,20 +94,23 @@ function parseStyle(text) { case COLOR: emitFragment(); + // Go one step further to find the corresponding color colorCodes = text.slice(position + 1).match(colorRx); if (colorCodes) { textColor = Number(colorCodes[1]); - bgColor = Number(colorCodes[2]); - if (Number.isNaN(bgColor)) { - bgColor = undefined; + if (colorCodes[2]) { + bgColor = Number(colorCodes[2]); } + // Color code length is > 1, so bump the current position cursor by as + // much (and reset the start cursor for the current text block as well) position += colorCodes[0].length; + start = position + 1; } else { + // If no color codes were found, toggles back to no colors (like BOLD). textColor = undefined; bgColor = undefined; } - start = position + 1; break; case REVERSE: @@ -95,9 +128,12 @@ function parseStyle(text) { underline = !underline; break; } + + // Evaluate the next character at the next iteration position += 1; } + // The entire text has been parsed, so we finalize the current text fragment. emitFragment(); return result; @@ -107,25 +143,19 @@ const properties = ["bold", "textColor", "bgColor", "italic", "underline", "reve function prepare(text) { return parseStyle(text) - .filter(fragment => fragment.text.length) - .reduce((prev, curr, i) => { - if (i === 0) { - return prev.concat([curr]); + // This optimizes fragments by combining them together when all their values + // for the properties defined above are equal. + .reduce((prev, curr) => { + if (prev.length) { + const lastEntry = prev[prev.length - 1]; + if (properties.every(key => curr[key] === lastEntry[key])) { + lastEntry.text += curr.text; + lastEntry.end += curr.text.length; + return prev; + } } - - const lastEntry = prev[prev.length - 1]; - if (properties.some(key => curr[key] !== lastEntry[key])) { - return prev.concat([curr]); - } - - lastEntry.text += curr.text; - return prev; - }, []) - .map((fragment, i, array) => { - fragment.start = i === 0 ? 0 : array[i - 1].end; - fragment.end = fragment.start + fragment.text.length; - return fragment; - }); + return prev.concat([curr]); + }, []); } module.exports = prepare; diff --git a/client/js/libs/handlebars/parse.js b/client/js/libs/handlebars/parse.js index 7e9aebb8..915a432c 100644 --- a/client/js/libs/handlebars/parse.js +++ b/client/js/libs/handlebars/parse.js @@ -6,6 +6,7 @@ const findChannels = require("./ircmessageparser/findChannels"); const findLinks = require("./ircmessageparser/findLinks"); const merge = require("./ircmessageparser/merge"); +// Create an HTML `span` with styling information for a given fragment function createFragment(fragment) { let classes = []; if (fragment.bold) { @@ -30,23 +31,33 @@ function createFragment(fragment) { return escapedText; } +// Transform an IRC message potentially filled with styling control codes, URLs +// and channels into a string of HTML elements to display on the client. module.exports = function parse(text) { + // Extract the styling information and get the plain text version from it const styleFragments = parseStyle(text); const cleanText = styleFragments.map(fragment => fragment.text).join(""); - const channelPrefixes = ["#", "&"]; // RPL_ISUPPORT.CHANTYPES - const userModes = ["!", "@", "%", "+"]; // RPL_ISUPPORT.PREFIX + // On the plain text, find channels and URLs, returned as "parts". Parts are + // arrays of objects containing start and end markers, as well as metadata + // depending on what was found (channel or link). + const channelPrefixes = ["#", "&"]; // TODO Channel prefixes should be RPL_ISUPPORT.CHANTYPES + const userModes = ["!", "@", "%", "+"]; // TODO User modes should be RPL_ISUPPORT.PREFIX const channelParts = findChannels(cleanText, channelPrefixes, userModes); - const linkParts = findLinks(cleanText); + // Sort all parts identified based on their position in the original text const parts = channelParts .concat(linkParts) .sort((a, b) => a.start - b.start); + // Merge the styling information with the channels / URLs / text objects and + // generate HTML strings with the resulting fragments return merge(parts, styleFragments).map(textPart => { + // Create HTML strings with styling information const fragments = textPart.fragments.map(createFragment).join(""); + // Wrap these potentially styled fragments with links and channel buttons if (textPart.link) { const escapedLink = Handlebars.Utils.escapeExpression(textPart.link); return `${fragments}`; diff --git a/test/client/js/libs/handlebars/ircmessageparser/fill.js b/test/client/js/libs/handlebars/ircmessageparser/fill.js new file mode 100644 index 00000000..8723ad52 --- /dev/null +++ b/test/client/js/libs/handlebars/ircmessageparser/fill.js @@ -0,0 +1,50 @@ +"use strict"; + +const expect = require("chai").expect; +const fill = require("../../../../../../client/js/libs/handlebars/ircmessageparser/fill"); + +describe("fill", () => { + const text = "01234567890123456789"; + + it("should return an entry for the unmatched end of string", () => { + const existingEntries = [ + {start: 0, end: 10}, + {start: 5, end: 15}, + ]; + + const expected = [ + {start: 15, end: 20}, + ]; + + const actual = fill(existingEntries, text); + + expect(actual).to.deep.equal(expected); + }); + + it("should return an entry per unmatched areas of the text", () => { + const existingEntries = [ + {start: 0, end: 5}, + {start: 10, end: 15}, + ]; + + const expected = [ + {start: 5, end: 10}, + {start: 15, end: 20}, + ]; + + const actual = fill(existingEntries, text); + + expect(actual).to.deep.equal(expected); + }); + + it("should not return anything when entries match all text", () => { + const existingEntries = [ + {start: 0, end: 10}, + {start: 10, end: 20}, + ]; + + const actual = fill(existingEntries, text); + + expect(actual).to.be.empty; + }); +}); diff --git a/test/client/js/libs/handlebars/ircmessageparser/findChannels.js b/test/client/js/libs/handlebars/ircmessageparser/findChannels.js index 93c119ee..4c676e57 100644 --- a/test/client/js/libs/handlebars/ircmessageparser/findChannels.js +++ b/test/client/js/libs/handlebars/ircmessageparser/findChannels.js @@ -1,7 +1,7 @@ "use strict"; const expect = require("chai").expect; -const analyseText = require("../../../../../../client/js/libs/handlebars/ircmessageparser/findChannels"); +const findChannels = require("../../../../../../client/js/libs/handlebars/ircmessageparser/findChannels"); describe("findChannels", () => { it("should find single letter channel", () => { @@ -12,7 +12,7 @@ describe("findChannels", () => { end: 2 }]; - const actual = analyseText(input, ["#"], ["@", "+"]); + const actual = findChannels(input, ["#"], ["@", "+"]); expect(actual).to.deep.equal(expected); }); @@ -25,7 +25,7 @@ describe("findChannels", () => { end: 4 }]; - const actual = analyseText(input, ["#"], ["@", "+"]); + const actual = findChannels(input, ["#"], ["@", "+"]); expect(actual).to.deep.equal(expected); }); @@ -38,7 +38,7 @@ describe("findChannels", () => { end: 15 }]; - const actual = analyseText(input, ["#"], ["@", "+"]); + const actual = findChannels(input, ["#"], ["@", "+"]); expect(actual).to.deep.equal(expected); }); @@ -51,7 +51,7 @@ describe("findChannels", () => { end: 5 }]; - const actual = analyseText(input, ["#"], ["@", "+"]); + const actual = findChannels(input, ["#"], ["@", "+"]); expect(actual).to.deep.equal(expected); }); @@ -64,7 +64,7 @@ describe("findChannels", () => { end: 6 }]; - const actual = analyseText(input, ["#"], ["@", "+"]); + const actual = findChannels(input, ["#"], ["@", "+"]); expect(actual).to.deep.equal(expected); }); @@ -77,7 +77,7 @@ describe("findChannels", () => { end: 3 }]; - const actual = analyseText(input, ["#"], ["@", "+"]); + const actual = findChannels(input, ["#"], ["@", "+"]); expect(actual).to.deep.equal(expected); }); @@ -90,7 +90,7 @@ describe("findChannels", () => { end: 6 }]; - const actual = analyseText(input, ["#"], ["!", "@", "%", "+"]); + const actual = findChannels(input, ["#"], ["!", "@", "%", "+"]); expect(actual).to.deep.equal(expected); }); @@ -103,7 +103,7 @@ describe("findChannels", () => { end: 2 }]; - const actual = analyseText(input, ["@"], ["#", "+"]); + const actual = findChannels(input, ["@"], ["#", "+"]); expect(actual).to.deep.equal(expected); }); @@ -116,7 +116,7 @@ describe("findChannels", () => { end: 6 }]; - const actual = analyseText(input, ["#"], ["@", "+"]); + const actual = findChannels(input, ["#"], ["@", "+"]); expect(actual).to.deep.equal(expected); }); From fa1aecdd9e9e0cab27e2f5f92f485e99fe69a376 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9mie=20Astori?= Date: Thu, 20 Apr 2017 01:37:10 -0400 Subject: [PATCH 6/6] Remove URI.js monkey-patch as fix landed in v1.18.5 See https://github.com/medialize/URI.js/issues/325 --- client/js/libs/handlebars/ircmessageparser/findLinks.js | 7 ------- 1 file changed, 7 deletions(-) diff --git a/client/js/libs/handlebars/ircmessageparser/findLinks.js b/client/js/libs/handlebars/ircmessageparser/findLinks.js index 9596a5a0..1bd989b2 100644 --- a/client/js/libs/handlebars/ircmessageparser/findLinks.js +++ b/client/js/libs/handlebars/ircmessageparser/findLinks.js @@ -17,19 +17,12 @@ const commonSchemes = [ function findLinks(text) { let result = []; - let lastPosition = 0; // URI.withinString() identifies URIs within text, e.g. to translate them to // -Tags. // See https://medialize.github.io/URI.js/docs.html#static-withinString // In our case, we store each URI encountered in a result array. URI.withinString(text, function(url, start, end) { - // v-- fix: url was modified and does not match input string -> cant be mapped - if (text.indexOf(url, lastPosition) < 0) { - return; - } - // ^-- /fix: url was modified and does not match input string -> cant be mapped - // Extract the scheme of the URL detected, if there is one const parsedScheme = URI(url).scheme().toLowerCase();