add similar-url check for www\d*\., rewrite checker to be simpler
This commit is contained in:
parent
70db7a5879
commit
c945f29040
|
@ -67,18 +67,38 @@ class Story < ActiveRecord::Base
|
|||
|
||||
def self.find_recent_similar_by_url(url)
|
||||
urls = [ url ]
|
||||
urls.push url.gsub(/^http:\/\//, "https://")
|
||||
urls.push url.gsub(/^https:\/\//, "http://")
|
||||
urls.push url.gsub(/^http:\/\//, "https://").gsub(/\/+\z/, "")
|
||||
urls.push url.gsub(/^https:\/\//, "http://").gsub(/\/+\z/, "")
|
||||
urls.push url.gsub(/^http:\/\//, "https://") << "/"
|
||||
urls.push url.gsub(/^https:\/\//, "http://") << "/"
|
||||
urls2 = [ url ]
|
||||
|
||||
urls.uniq.each do |url|
|
||||
if s = Story.find(:first, :conditions => [ "created_at >= ? AND url = ?",
|
||||
(Time.now - 30.days), url ])
|
||||
return s
|
||||
end
|
||||
# https
|
||||
urls.each do |u|
|
||||
urls2.push u.gsub(/^http:\/\//i, "https://")
|
||||
urls2.push u.gsub(/^https:\/\//i, "http://")
|
||||
end
|
||||
urls = urls2.clone
|
||||
|
||||
# trailing slash
|
||||
urls.each do |u|
|
||||
urls2.push u.gsub(/\/+\z/, "")
|
||||
urls2.push (u << "/")
|
||||
end
|
||||
urls = urls2.clone
|
||||
|
||||
# www prefix
|
||||
urls.each do |u|
|
||||
urls2.push u.gsub(/^(https?:\/\/)www\d*\./i) {|_| $1 }
|
||||
urls2.push u.gsub(/^(https?:\/\/)/i) {|_| "#{$1}www." }
|
||||
end
|
||||
urls = urls2.clone
|
||||
|
||||
conds = [ "created_at >= ? AND (", (Time.now - 30.days) ]
|
||||
urls.uniq.each_with_index do |url,x|
|
||||
conds[0] << (x == 0 ? "" : " OR ") << "url = ?"
|
||||
conds.push url
|
||||
end
|
||||
conds[0] << ")"
|
||||
|
||||
if s = Story.find(:first, :conditions => conds)
|
||||
return s
|
||||
end
|
||||
|
||||
false
|
||||
|
|
|
@ -34,59 +34,35 @@ describe Story do
|
|||
end
|
||||
|
||||
it "checks for invalid urls" do
|
||||
expect { Story.make!(:url => "http://gooses.com/") }.to_not raise_error
|
||||
expect { Story.make!(:title => "test", :url => "http://gooses.com/")
|
||||
}.to_not raise_error
|
||||
|
||||
expect { Story.make!(:url => "ftp://gooses/") }.to raise_error
|
||||
expect { Story.make!(:title => "test", url => "ftp://gooses/")
|
||||
}.to raise_error
|
||||
end
|
||||
|
||||
it "removes crap from urls" do
|
||||
Story.make!(:url => "http://www.example.com/").
|
||||
url.should == "http://www.example.com/"
|
||||
Story.delete_all
|
||||
it "checks for a previously posted story with same url" do
|
||||
Story.count.should == 0
|
||||
|
||||
Story.make!(:url => "http://www.example.com/?utm_campaign=Spam").
|
||||
url.should == "http://www.example.com/"
|
||||
Story.delete_all
|
||||
|
||||
Story.make!(:url => "http://www.example.com/?utm_campaign=Spam&hello=hi").
|
||||
url.should == "http://www.example.com/?hello=hi"
|
||||
Story.delete_all
|
||||
end
|
||||
|
||||
it "finds similar urls" do
|
||||
s = Story.make!(:url => "https://example.com/something")
|
||||
Story.make!(:title => "flim flam", :url => "http://example.com/")
|
||||
Story.count.should == 1
|
||||
|
||||
new_s = Story.make(:url => "http://example.com/something")
|
||||
new_s.save.should == false
|
||||
new_s.already_posted_story.should == s
|
||||
|
||||
new_s = Story.make(:url => "http://example.com/something/")
|
||||
new_s.save.should == false
|
||||
new_s.already_posted_story.should == s
|
||||
|
||||
new_s = Story.make(:url => "http://example.com/something/")
|
||||
new_s.save.should == false
|
||||
new_s.already_posted_story.should == s
|
||||
|
||||
Story.count.should == 1
|
||||
end
|
||||
expect { Story.make!(:title => "flim flam 2",
|
||||
:url => "http://example.com/") }.to raise_error
|
||||
|
||||
it "ignores similar urls from long ago" do
|
||||
new_s = Story.make(:created_at => 31.days.ago,
|
||||
:url => "http://example.com/something")
|
||||
new_s.save.should == true
|
||||
Story.count.should == 1
|
||||
|
||||
expect { Story.make!(:title => "flim flam 2",
|
||||
:url => "http://www.example.com/") }.to raise_error
|
||||
|
||||
new_s = Story.make(:url => "http://example.com/something")
|
||||
new_s.save.should == true
|
||||
Story.count.should == 1
|
||||
end
|
||||
|
||||
it "parses domain properly" do
|
||||
s = Story.make!(:url => "http://example.com")
|
||||
s.domain.should == "example.com"
|
||||
|
||||
s = Story.make!(:url => "http://www3.example.com")
|
||||
s = Story.make!(:url => "http://www3.example.com/goose")
|
||||
s.domain.should == "example.com"
|
||||
|
||||
s = Story.make!(:url => "http://flub.example.com")
|
||||
|
|
Loading…
Reference in a new issue