Markdowner: use Nokogiri for html rewriting

brute forcing changes by regexps gets things wrong sometimes, so use
nokogiri to parse the html output of rdiscount and do changes on
individual nodes, then turn back into a string

we lose the ability to move punctuation inside of auto-generated
links, but i don't see any easy/definitive way to do this properly.

closes #242
closes #209
This commit is contained in:
joshua stein 2015-12-03 15:56:52 -06:00
parent abd662fc7e
commit a812f25c2f
2 changed files with 33 additions and 36 deletions

View file

@ -13,35 +13,38 @@ class Markdowner
args.push :no_image
end
html = RDiscount.new(text.to_s, *args).to_html
ng = Nokogiri::HTML(RDiscount.new(text.to_s, *args).to_html)
# change <h1> headings to just emphasis tags
html.gsub!(/<(\/)?h(\d)>/) {|_| "<#{$1}strong>" }
# fix links that got the trailing punctuation appended to move it outside
# the link
html.gsub!(/<a ([^>]+)([\.\!\,])">([^>]+)([\.\!\,])<\/a>/) {|_|
if $2.to_s == $4.to_s
"<a #{$1}\">#{$3}</a>#{$2}"
else
_
end
}
# change <h1>, <h2>, etc. headings to just bold tags
ng.css("h1, h2, h3, h4, h5, h6").each do |h|
h.name = "strong"
end
# make links have rel=nofollow
html.gsub!(/<a href/, "<a rel=\"nofollow\" href")
ng.css("a").each do |h|
h[:rel] = "nofollow"
end
if !opts[:disable_profile_links]
unless opts[:disable_profile_links]
# make @username link to that user's profile
html.gsub!(/\B\@([\w\-]+)/) do |u|
if User.exists?(:username => u[1 .. -1])
"<a href=\"/u/#{u[1 .. -1]}\">#{u}</a>"
else
u
ng.search("//text()").each do |t|
if t.parent && t.parent.name.downcase == "a"
# don't replace inside <a>s
next
end
tx = t.text.gsub(/\B\@([\w\-]+)/) do |u|
if User.exists?(:username => u[1 .. -1])
"<a href=\"/u/#{u[1 .. -1]}\">#{u}</a>"
else
u
end
end
t.replace(tx)
end
end
html
ng.at_css("body").inner_html
end
end

View file

@ -3,30 +3,24 @@ require "spec_helper"
describe Markdowner do
it "parses simple markdown" do
Markdowner.to_html("hello there *italics* and **bold**!").should ==
"<p>hello there <em>italics</em> and <strong>bold</strong>!</p>\n"
"<p>hello there <em>italics</em> and <strong>bold</strong>!</p>"
end
it "turns @username into a link if @username exists" do
User.make!(:username => "blahblah")
Markdowner.to_html("hi @blahblah test").should ==
"<p>hi <a href=\"/u/blahblah\">@blahblah</a> test</p>\n"
"<p>hi <a href=\"/u/blahblah\">@blahblah</a> test</p>"
Markdowner.to_html("hi @flimflam test").should ==
"<p>hi @flimflam test</p>\n"
end
it "moves punctuation outside of auto-generated links" do
Markdowner.to_html("hi http://example.com/a! test").should ==
"<p>hi <a rel=\"nofollow\" " <<
"href=\"http://example.com/a\">http://example.com/a</a>! test</p>\n"
"<p>hi @flimflam test</p>"
end
# bug#209
it "keeps punctuation inside of auto-generated links when using brackets" do
Markdowner.to_html("hi <http://example.com/a.> test").should ==
"<p>hi <a rel=\"nofollow\" " <<
"href=\"http://example.com/a.\">http://example.com/a.</a> test</p>\n"
"<p>hi <a href=\"http://example.com/a.\" rel=\"nofollow\">" <<
"http://example.com/a.</a> test</p>"
end
# bug#242
@ -34,11 +28,11 @@ describe Markdowner do
User.make!(:username => "blahblah")
Markdowner.to_html("hi http://example.com/@blahblah/ test").should ==
"<p>hi <a rel=\"nofollow\" " <<
"href=\"http://example.com/@blahblah/\">http://example.com/@blahblah</a></p>\n"
"<p>hi <a href=\"http://example.com/@blahblah/\" rel=\"nofollow\">" <<
"http://example.com/@blahblah/</a> test</p>"
Markdowner.to_html("hi [test](http://example.com/@blahblah/)").should ==
"<p>hi <a rel=\"nofollow\" " <<
"href=\"http://example.com/@blahblah/\">test</a></p>\n"
"<p>hi <a href=\"http://example.com/@blahblah/\" rel=\"nofollow\">" <<
"test</a></p>"
end
end