From a812f25c2f7107f15f3e19067bc55bd31d4184db Mon Sep 17 00:00:00 2001 From: joshua stein Date: Thu, 3 Dec 2015 15:56:52 -0600 Subject: [PATCH] Markdowner: use Nokogiri for html rewriting brute forcing changes by regexps gets things wrong sometimes, so use nokogiri to parse the html output of rdiscount and do changes on individual nodes, then turn back into a string we lose the ability to move punctuation inside of auto-generated links, but i don't see any easy/definitive way to do this properly. closes #242 closes #209 --- extras/markdowner.rb | 45 ++++++++++++++++++---------------- spec/models/markdowner_spec.rb | 24 +++++++----------- 2 files changed, 33 insertions(+), 36 deletions(-) diff --git a/extras/markdowner.rb b/extras/markdowner.rb index 82b750f..597ba70 100644 --- a/extras/markdowner.rb +++ b/extras/markdowner.rb @@ -13,35 +13,38 @@ class Markdowner args.push :no_image end - html = RDiscount.new(text.to_s, *args).to_html + ng = Nokogiri::HTML(RDiscount.new(text.to_s, *args).to_html) - # change

headings to just emphasis tags - html.gsub!(/<(\/)?h(\d)>/) {|_| "<#{$1}strong>" } - - # fix links that got the trailing punctuation appended to move it outside - # the link - html.gsub!(/]+)([\.\!\,])">([^>]+)([\.\!\,])<\/a>/) {|_| - if $2.to_s == $4.to_s - "#{$3}#{$2}" - else - _ - end - } + # change

,

, etc. headings to just bold tags + ng.css("h1, h2, h3, h4, h5, h6").each do |h| + h.name = "strong" + end # make links have rel=nofollow - html.gsub!(/ u[1 .. -1]) - "#{u}" - else - u + ng.search("//text()").each do |t| + if t.parent && t.parent.name.downcase == "a" + # don't replace inside s + next end + + tx = t.text.gsub(/\B\@([\w\-]+)/) do |u| + if User.exists?(:username => u[1 .. -1]) + "#{u}" + else + u + end + end + + t.replace(tx) end end - html + ng.at_css("body").inner_html end end diff --git a/spec/models/markdowner_spec.rb b/spec/models/markdowner_spec.rb index 3408520..96d66b8 100644 --- a/spec/models/markdowner_spec.rb +++ b/spec/models/markdowner_spec.rb @@ -3,30 +3,24 @@ require "spec_helper" describe Markdowner do it "parses simple markdown" do Markdowner.to_html("hello there *italics* and **bold**!").should == - "

hello there italics and bold!

\n" + "

hello there italics and bold!

" end it "turns @username into a link if @username exists" do User.make!(:username => "blahblah") Markdowner.to_html("hi @blahblah test").should == - "

hi @blahblah test

\n" + "

hi @blahblah test

" Markdowner.to_html("hi @flimflam test").should == - "

hi @flimflam test

\n" - end - - it "moves punctuation outside of auto-generated links" do - Markdowner.to_html("hi http://example.com/a! test").should == - "

hi http://example.com/a! test

\n" + "

hi @flimflam test

" end # bug#209 it "keeps punctuation inside of auto-generated links when using brackets" do Markdowner.to_html("hi test").should == - "

hi http://example.com/a. test

\n" + "

hi " << + "http://example.com/a. test

" end # bug#242 @@ -34,11 +28,11 @@ describe Markdowner do User.make!(:username => "blahblah") Markdowner.to_html("hi http://example.com/@blahblah/ test").should == - "

hi http://example.com/@blahblah

\n" + "

hi " << + "http://example.com/@blahblah/ test

" Markdowner.to_html("hi [test](http://example.com/@blahblah/)").should == - "

hi test

\n" + "

hi " << + "test

" end end