diff --git a/app/models/story.rb b/app/models/story.rb index edebd20..47a9aa8 100644 --- a/app/models/story.rb +++ b/app/models/story.rb @@ -67,18 +67,38 @@ class Story < ActiveRecord::Base def self.find_recent_similar_by_url(url) urls = [ url ] - urls.push url.gsub(/^http:\/\//, "https://") - urls.push url.gsub(/^https:\/\//, "http://") - urls.push url.gsub(/^http:\/\//, "https://").gsub(/\/+\z/, "") - urls.push url.gsub(/^https:\/\//, "http://").gsub(/\/+\z/, "") - urls.push url.gsub(/^http:\/\//, "https://") << "/" - urls.push url.gsub(/^https:\/\//, "http://") << "/" + urls2 = [ url ] - urls.uniq.each do |url| - if s = Story.find(:first, :conditions => [ "created_at >= ? AND url = ?", - (Time.now - 30.days), url ]) - return s - end + # https + urls.each do |u| + urls2.push u.gsub(/^http:\/\//i, "https://") + urls2.push u.gsub(/^https:\/\//i, "http://") + end + urls = urls2.clone + + # trailing slash + urls.each do |u| + urls2.push u.gsub(/\/+\z/, "") + urls2.push (u << "/") + end + urls = urls2.clone + + # www prefix + urls.each do |u| + urls2.push u.gsub(/^(https?:\/\/)www\d*\./i) {|_| $1 } + urls2.push u.gsub(/^(https?:\/\/)/i) {|_| "#{$1}www." } + end + urls = urls2.clone + + conds = [ "created_at >= ? AND (", (Time.now - 30.days) ] + urls.uniq.each_with_index do |url,x| + conds[0] << (x == 0 ? "" : " OR ") << "url = ?" + conds.push url + end + conds[0] << ")" + + if s = Story.find(:first, :conditions => conds) + return s end false diff --git a/spec/models/story_spec.rb b/spec/models/story_spec.rb index 57eae7f..34d81fc 100644 --- a/spec/models/story_spec.rb +++ b/spec/models/story_spec.rb @@ -34,59 +34,35 @@ describe Story do end it "checks for invalid urls" do - expect { Story.make!(:url => "http://gooses.com/") }.to_not raise_error + expect { Story.make!(:title => "test", :url => "http://gooses.com/") + }.to_not raise_error - expect { Story.make!(:url => "ftp://gooses/") }.to raise_error + expect { Story.make!(:title => "test", url => "ftp://gooses/") + }.to raise_error end - it "removes crap from urls" do - Story.make!(:url => "http://www.example.com/"). - url.should == "http://www.example.com/" - Story.delete_all + it "checks for a previously posted story with same url" do + Story.count.should == 0 - Story.make!(:url => "http://www.example.com/?utm_campaign=Spam"). - url.should == "http://www.example.com/" - Story.delete_all - - Story.make!(:url => "http://www.example.com/?utm_campaign=Spam&hello=hi"). - url.should == "http://www.example.com/?hello=hi" - Story.delete_all - end - - it "finds similar urls" do - s = Story.make!(:url => "https://example.com/something") + Story.make!(:title => "flim flam", :url => "http://example.com/") Story.count.should == 1 - new_s = Story.make(:url => "http://example.com/something") - new_s.save.should == false - new_s.already_posted_story.should == s - - new_s = Story.make(:url => "http://example.com/something/") - new_s.save.should == false - new_s.already_posted_story.should == s - - new_s = Story.make(:url => "http://example.com/something/") - new_s.save.should == false - new_s.already_posted_story.should == s - - Story.count.should == 1 - end + expect { Story.make!(:title => "flim flam 2", + :url => "http://example.com/") }.to raise_error - it "ignores similar urls from long ago" do - new_s = Story.make(:created_at => 31.days.ago, - :url => "http://example.com/something") - new_s.save.should == true Story.count.should == 1 + + expect { Story.make!(:title => "flim flam 2", + :url => "http://www.example.com/") }.to raise_error - new_s = Story.make(:url => "http://example.com/something") - new_s.save.should == true + Story.count.should == 1 end it "parses domain properly" do s = Story.make!(:url => "http://example.com") s.domain.should == "example.com" - s = Story.make!(:url => "http://www3.example.com") + s = Story.make!(:url => "http://www3.example.com/goose") s.domain.should == "example.com" s = Story.make!(:url => "http://flub.example.com")