when auto-detecting story titles, try to find a canonical url and use it
should remedy duplicate stories being submitted under all of their stupid blogspot.* domains instead of the canonical url represented in the <link> tag
This commit is contained in:
parent
491a3d57e8
commit
6695480fdb
|
@ -238,12 +238,16 @@ var _Lobsters = Class.extend({
|
|||
button.prop("disabled", true);
|
||||
button.val("Fetching...");
|
||||
|
||||
$.post("/stories/fetch_url_title", {
|
||||
$.post("/stories/fetch_url_attributes", {
|
||||
fetch_url: url_field.val(),
|
||||
})
|
||||
.success(function(data) {
|
||||
if (data && data.title)
|
||||
title_field.val(data.title.substr(0, title_field.maxLength));
|
||||
if (data) {
|
||||
if (data.title)
|
||||
title_field.val(data.title.substr(0, title_field.maxLength));
|
||||
if (data.url)
|
||||
url_field.val(data.url);
|
||||
}
|
||||
|
||||
button.val(old_value);
|
||||
button.prop("disabled", false);
|
||||
|
|
|
@ -58,15 +58,22 @@ class StoriesController < ApplicationController
|
|||
end
|
||||
end
|
||||
|
||||
def fetch_url_title
|
||||
def fetch_url_attributes
|
||||
s = Story.new
|
||||
s.fetching_ip = request.remote_ip
|
||||
s.url = params[:fetch_url]
|
||||
|
||||
if (title = s.fetched_title(request.remote_ip)).present?
|
||||
return render :json => { :title => title }
|
||||
else
|
||||
return render :json => "error"
|
||||
oattrs = { :url => params[:fetch_url], :title => nil }
|
||||
|
||||
if (title = s.fetched_title).present?
|
||||
oattrs[:title] = title
|
||||
end
|
||||
|
||||
if (cu = s.fetched_canonical_url).present?
|
||||
oattrs[:url] = cu
|
||||
end
|
||||
|
||||
return render :json => oattrs
|
||||
end
|
||||
|
||||
def new
|
||||
|
@ -74,10 +81,17 @@ class StoriesController < ApplicationController
|
|||
@cur_url = "/stories/new"
|
||||
|
||||
@story = Story.new
|
||||
@story.fetching_ip = request.remote_ip
|
||||
|
||||
if params[:url].present?
|
||||
@story.url = params[:url]
|
||||
|
||||
if (cu = @story.fetched_canonical_url).present? && @story.url != cu
|
||||
flash.now[:notice] = "Note: URL has been changed to fetched " <<
|
||||
"canonicalized version"
|
||||
@story.url = cu
|
||||
end
|
||||
|
||||
if s = Story.find_similar_by_url(@story.url)
|
||||
if s.is_recent?
|
||||
# user won't be able to submit this story as new, so just redirect
|
||||
|
@ -91,7 +105,7 @@ class StoriesController < ApplicationController
|
|||
end
|
||||
|
||||
# ignore what the user brought unless we need it as a fallback
|
||||
@story.title = @story.fetched_title(request.remote_ip)
|
||||
@story.title = @story.fetched_title
|
||||
if !@story.title.present? && params[:title].present?
|
||||
@story.title = params[:title]
|
||||
end
|
||||
|
|
|
@ -30,9 +30,10 @@ class Story < ActiveRecord::Base
|
|||
# days a story is considered recent, for resubmitting
|
||||
RECENT_DAYS = 30
|
||||
|
||||
attr_accessor :vote, :already_posted_story, :fetched_content, :previewing,
|
||||
:seen_previous, :is_hidden_by_cur_user
|
||||
attr_accessor :vote, :already_posted_story, :previewing, :seen_previous,
|
||||
:is_hidden_by_cur_user
|
||||
attr_accessor :editor, :moderation_reason, :merge_story_short_id
|
||||
attr_accessor :fetching_ip
|
||||
|
||||
before_validation :assign_short_id_and_upvote,
|
||||
:on => :create
|
||||
|
@ -237,52 +238,6 @@ class Story < ActiveRecord::Base
|
|||
end
|
||||
end
|
||||
|
||||
def fetched_content(for_remote_ip = nil)
|
||||
return @fetched_content if @fetched_content
|
||||
|
||||
begin
|
||||
s = Sponge.new
|
||||
s.timeout = 3
|
||||
@fetched_content = s.fetch(self.url, :get, nil, nil,
|
||||
{ "User-agent" => "#{Rails.application.domain} for #{for_remote_ip}" },
|
||||
3)
|
||||
rescue
|
||||
end
|
||||
|
||||
@fetched_content
|
||||
end
|
||||
|
||||
def fetched_title(for_remote_ip = nil)
|
||||
title = ""
|
||||
|
||||
if !(doc = Nokogiri::HTML(fetched_content(for_remote_ip).to_s))
|
||||
return title
|
||||
end
|
||||
|
||||
# try <meta property="og:title"> first, it probably won't have the site
|
||||
# name
|
||||
begin
|
||||
title = doc.at_css("meta[property='og:title']").
|
||||
attributes["content"].text
|
||||
rescue
|
||||
end
|
||||
|
||||
# then try <meta name="title">
|
||||
if title.to_s == ""
|
||||
begin
|
||||
title = doc.at_css("meta[name='title']").attributes["content"].text
|
||||
rescue
|
||||
end
|
||||
end
|
||||
|
||||
# then try plain old <title>
|
||||
if title.to_s == ""
|
||||
title = doc.at_css("title").try(:text).to_s
|
||||
end
|
||||
|
||||
return title
|
||||
end
|
||||
|
||||
def generated_markeddown_description
|
||||
Markdowner.to_html(self.description, { :allow_images => true })
|
||||
end
|
||||
|
@ -576,4 +531,73 @@ class Story < ActiveRecord::Base
|
|||
end
|
||||
}.join(", ")
|
||||
end
|
||||
|
||||
def fetched_content
|
||||
return @fetched_content if @fetched_content
|
||||
|
||||
begin
|
||||
s = Sponge.new
|
||||
s.timeout = 3
|
||||
@fetched_content = s.fetch(self.url, :get, nil, nil,
|
||||
{ "User-agent" => "#{Rails.application.domain} for #{self.fetching_ip}" },
|
||||
3)
|
||||
rescue
|
||||
end
|
||||
|
||||
@fetched_content
|
||||
end
|
||||
|
||||
def parsed_content
|
||||
return @parsed_content if @parsed_content
|
||||
|
||||
@parsed_content = Nokogiri::HTML(self.fetched_content.to_s)
|
||||
end
|
||||
|
||||
def fetched_title
|
||||
title = ""
|
||||
|
||||
if !(doc = self.parsed_content)
|
||||
return title
|
||||
end
|
||||
|
||||
# try <meta property="og:title"> first, it probably won't have the site
|
||||
# name
|
||||
begin
|
||||
title = doc.at_css("meta[property='og:title']").
|
||||
attributes["content"].text
|
||||
rescue
|
||||
end
|
||||
|
||||
# then try <meta name="title">
|
||||
if title.to_s == ""
|
||||
begin
|
||||
title = doc.at_css("meta[name='title']").attributes["content"].text
|
||||
rescue
|
||||
end
|
||||
end
|
||||
|
||||
# then try plain old <title>
|
||||
if title.to_s == ""
|
||||
title = doc.at_css("title").try(:text).to_s
|
||||
end
|
||||
|
||||
return title
|
||||
end
|
||||
|
||||
def fetched_canonical_url
|
||||
return @fetched_canonical_url if @fetched_canonical_url
|
||||
|
||||
if doc = self.parsed_content
|
||||
begin
|
||||
if (cu = doc.at_css("link[rel='canonical']").attributes["href"].
|
||||
text).present? && (ucu = URI.parse(cu)) && ucu.scheme.present? &&
|
||||
ucu.host.present?
|
||||
return cu
|
||||
end
|
||||
rescue
|
||||
end
|
||||
end
|
||||
|
||||
return self.url
|
||||
end
|
||||
end
|
||||
|
|
|
@ -58,7 +58,7 @@ Lobsters::Application.routes.draw do
|
|||
post "hide"
|
||||
post "unhide"
|
||||
end
|
||||
post "/stories/fetch_url_title", :format => "json"
|
||||
post "/stories/fetch_url_attributes", :format => "json"
|
||||
post "/stories/preview" => "stories#preview"
|
||||
|
||||
resources :comments do
|
||||
|
|
Loading…
Reference in a new issue