Markdowner: use Nokogiri for html rewriting

brute forcing changes by regexps gets things wrong sometimes, so use
nokogiri to parse the html output of rdiscount and do changes on
individual nodes, then turn back into a string

we lose the ability to move punctuation inside of auto-generated
links, but i don't see any easy/definitive way to do this properly.

closes #242
closes #209
This commit is contained in:
joshua stein 2015-12-03 15:56:52 -06:00
parent abd662fc7e
commit a812f25c2f
2 changed files with 33 additions and 36 deletions

View file

@ -13,35 +13,38 @@ class Markdowner
args.push :no_image args.push :no_image
end end
html = RDiscount.new(text.to_s, *args).to_html ng = Nokogiri::HTML(RDiscount.new(text.to_s, *args).to_html)
# change <h1> headings to just emphasis tags # change <h1>, <h2>, etc. headings to just bold tags
html.gsub!(/<(\/)?h(\d)>/) {|_| "<#{$1}strong>" } ng.css("h1, h2, h3, h4, h5, h6").each do |h|
h.name = "strong"
# fix links that got the trailing punctuation appended to move it outside end
# the link
html.gsub!(/<a ([^>]+)([\.\!\,])">([^>]+)([\.\!\,])<\/a>/) {|_|
if $2.to_s == $4.to_s
"<a #{$1}\">#{$3}</a>#{$2}"
else
_
end
}
# make links have rel=nofollow # make links have rel=nofollow
html.gsub!(/<a href/, "<a rel=\"nofollow\" href") ng.css("a").each do |h|
h[:rel] = "nofollow"
end
if !opts[:disable_profile_links] unless opts[:disable_profile_links]
# make @username link to that user's profile # make @username link to that user's profile
html.gsub!(/\B\@([\w\-]+)/) do |u| ng.search("//text()").each do |t|
if User.exists?(:username => u[1 .. -1]) if t.parent && t.parent.name.downcase == "a"
"<a href=\"/u/#{u[1 .. -1]}\">#{u}</a>" # don't replace inside <a>s
else next
u
end end
tx = t.text.gsub(/\B\@([\w\-]+)/) do |u|
if User.exists?(:username => u[1 .. -1])
"<a href=\"/u/#{u[1 .. -1]}\">#{u}</a>"
else
u
end
end
t.replace(tx)
end end
end end
html ng.at_css("body").inner_html
end end
end end

View file

@ -3,30 +3,24 @@ require "spec_helper"
describe Markdowner do describe Markdowner do
it "parses simple markdown" do it "parses simple markdown" do
Markdowner.to_html("hello there *italics* and **bold**!").should == Markdowner.to_html("hello there *italics* and **bold**!").should ==
"<p>hello there <em>italics</em> and <strong>bold</strong>!</p>\n" "<p>hello there <em>italics</em> and <strong>bold</strong>!</p>"
end end
it "turns @username into a link if @username exists" do it "turns @username into a link if @username exists" do
User.make!(:username => "blahblah") User.make!(:username => "blahblah")
Markdowner.to_html("hi @blahblah test").should == Markdowner.to_html("hi @blahblah test").should ==
"<p>hi <a href=\"/u/blahblah\">@blahblah</a> test</p>\n" "<p>hi <a href=\"/u/blahblah\">@blahblah</a> test</p>"
Markdowner.to_html("hi @flimflam test").should == Markdowner.to_html("hi @flimflam test").should ==
"<p>hi @flimflam test</p>\n" "<p>hi @flimflam test</p>"
end
it "moves punctuation outside of auto-generated links" do
Markdowner.to_html("hi http://example.com/a! test").should ==
"<p>hi <a rel=\"nofollow\" " <<
"href=\"http://example.com/a\">http://example.com/a</a>! test</p>\n"
end end
# bug#209 # bug#209
it "keeps punctuation inside of auto-generated links when using brackets" do it "keeps punctuation inside of auto-generated links when using brackets" do
Markdowner.to_html("hi <http://example.com/a.> test").should == Markdowner.to_html("hi <http://example.com/a.> test").should ==
"<p>hi <a rel=\"nofollow\" " << "<p>hi <a href=\"http://example.com/a.\" rel=\"nofollow\">" <<
"href=\"http://example.com/a.\">http://example.com/a.</a> test</p>\n" "http://example.com/a.</a> test</p>"
end end
# bug#242 # bug#242
@ -34,11 +28,11 @@ describe Markdowner do
User.make!(:username => "blahblah") User.make!(:username => "blahblah")
Markdowner.to_html("hi http://example.com/@blahblah/ test").should == Markdowner.to_html("hi http://example.com/@blahblah/ test").should ==
"<p>hi <a rel=\"nofollow\" " << "<p>hi <a href=\"http://example.com/@blahblah/\" rel=\"nofollow\">" <<
"href=\"http://example.com/@blahblah/\">http://example.com/@blahblah</a></p>\n" "http://example.com/@blahblah/</a> test</p>"
Markdowner.to_html("hi [test](http://example.com/@blahblah/)").should == Markdowner.to_html("hi [test](http://example.com/@blahblah/)").should ==
"<p>hi <a rel=\"nofollow\" " << "<p>hi <a href=\"http://example.com/@blahblah/\" rel=\"nofollow\">" <<
"href=\"http://example.com/@blahblah/\">test</a></p>\n" "test</a></p>"
end end
end end