do better at finding near-similar urls already posted recently
http -> https, trailing slash, etc.
This commit is contained in:
parent
6f3f018598
commit
389b4c61ec
|
@ -28,8 +28,7 @@ class Story < ActiveRecord::Base
|
|||
# URI.parse is not very lenient, so we can't use it
|
||||
|
||||
if self.url.match(/\Ahttps?:\/\/([^\.]+\.)+[a-z]+(\/|\z)/)
|
||||
if self.new_record? && (s = Story.find_by_url(self.url)) &&
|
||||
(Time.now - s.created_at) < 30.days
|
||||
if self.new_record? && (s = Story.find_recent_similar_by_url(self.url))
|
||||
errors.add(:url, "has already been submitted recently")
|
||||
self.already_posted_story = s
|
||||
end
|
||||
|
@ -46,6 +45,25 @@ class Story < ActiveRecord::Base
|
|||
end
|
||||
end
|
||||
|
||||
def self.find_recent_similar_by_url(url)
|
||||
urls = [ url ]
|
||||
urls.push url.gsub(/^http:\/\//, "https://")
|
||||
urls.push url.gsub(/^https:\/\//, "http://")
|
||||
urls.push url.gsub(/^http:\/\//, "https://").gsub(/\/+\z/, "")
|
||||
urls.push url.gsub(/^https:\/\//, "http://").gsub(/\/+\z/, "")
|
||||
urls.push url.gsub(/^http:\/\//, "https://") << "/"
|
||||
urls.push url.gsub(/^https:\/\//, "http://") << "/"
|
||||
|
||||
urls.uniq.each do |url|
|
||||
if s = Story.find(:first, :conditions => [ "created_at >= ? AND url = ?",
|
||||
(Time.now - 30.days), url ])
|
||||
return s
|
||||
end
|
||||
end
|
||||
|
||||
false
|
||||
end
|
||||
|
||||
def assign_short_id
|
||||
10.times do |try|
|
||||
if try == 10
|
||||
|
@ -194,6 +212,19 @@ class Story < ActiveRecord::Base
|
|||
end
|
||||
end
|
||||
|
||||
def url=(u)
|
||||
# strip out stupid google analytics parameters
|
||||
if u && (m = u.match(/\A([^\?]+)\?(.+)\z/))
|
||||
params = m[2].split("&")
|
||||
params.reject!{|p|
|
||||
p.match(/^utm_(source|medium|campaign|term|content)=/) }
|
||||
|
||||
u = m[1] << (params.any?? "?" << params.join("&") : "")
|
||||
end
|
||||
|
||||
self[:url] = u
|
||||
end
|
||||
|
||||
def title=(t)
|
||||
# change unicode whitespace characters into real spaces
|
||||
self[:title] = t.strip
|
||||
|
|
|
@ -34,25 +34,54 @@ describe Story do
|
|||
end
|
||||
|
||||
it "checks for invalid urls" do
|
||||
expect { Story.make!(:title => "test", :url => "http://gooses.com/")
|
||||
}.to_not raise_error
|
||||
expect { Story.make!(:url => "http://gooses.com/") }.to_not raise_error
|
||||
|
||||
expect { Story.make!(:title => "test", url => "ftp://gooses/")
|
||||
}.to raise_error
|
||||
expect { Story.make!(:url => "ftp://gooses/") }.to raise_error
|
||||
end
|
||||
|
||||
it "checks for a previously posted story with same url" do
|
||||
Story.count.should == 0
|
||||
it "removes crap from urls" do
|
||||
Story.make!(:url => "http://www.example.com/").
|
||||
url.should == "http://www.example.com/"
|
||||
Story.delete_all
|
||||
|
||||
Story.make!(:title => "flim flam", :url => "http://example.com/")
|
||||
Story.make!(:url => "http://www.example.com/?utm_campaign=Spam").
|
||||
url.should == "http://www.example.com/"
|
||||
Story.delete_all
|
||||
|
||||
Story.make!(:url => "http://www.example.com/?utm_campaign=Spam&hello=hi").
|
||||
url.should == "http://www.example.com/?hello=hi"
|
||||
Story.delete_all
|
||||
end
|
||||
|
||||
it "finds similar urls" do
|
||||
s = Story.make!(:url => "https://example.com/something")
|
||||
Story.count.should == 1
|
||||
|
||||
expect { Story.make!(:title => "flim flam 2",
|
||||
:url => "http://example.com/") }.to raise_error
|
||||
new_s = Story.make(:url => "http://example.com/something")
|
||||
new_s.save.should == false
|
||||
new_s.already_posted_story.should == s
|
||||
|
||||
new_s = Story.make(:url => "http://example.com/something/")
|
||||
new_s.save.should == false
|
||||
new_s.already_posted_story.should == s
|
||||
|
||||
new_s = Story.make(:url => "http://example.com/something/")
|
||||
new_s.save.should == false
|
||||
new_s.already_posted_story.should == s
|
||||
|
||||
Story.count.should == 1
|
||||
end
|
||||
|
||||
it "ignores similar urls from long ago" do
|
||||
new_s = Story.make(:created_at => 31.days.ago,
|
||||
:url => "http://example.com/something")
|
||||
new_s.save.should == true
|
||||
Story.count.should == 1
|
||||
|
||||
new_s = Story.make(:url => "http://example.com/something")
|
||||
new_s.save.should == true
|
||||
end
|
||||
|
||||
it "parses domain properly" do
|
||||
s = Story.make!(:url => "http://example.com")
|
||||
s.domain.should == "example.com"
|
||||
|
|
Loading…
Reference in a new issue