when auto-detecting story titles, try to find a canonical url and use it
should remedy duplicate stories being submitted under all of their stupid blogspot.* domains instead of the canonical url represented in the <link> tag
This commit is contained in:
parent
491a3d57e8
commit
6695480fdb
|
@ -238,12 +238,16 @@ var _Lobsters = Class.extend({
|
||||||
button.prop("disabled", true);
|
button.prop("disabled", true);
|
||||||
button.val("Fetching...");
|
button.val("Fetching...");
|
||||||
|
|
||||||
$.post("/stories/fetch_url_title", {
|
$.post("/stories/fetch_url_attributes", {
|
||||||
fetch_url: url_field.val(),
|
fetch_url: url_field.val(),
|
||||||
})
|
})
|
||||||
.success(function(data) {
|
.success(function(data) {
|
||||||
if (data && data.title)
|
if (data) {
|
||||||
title_field.val(data.title.substr(0, title_field.maxLength));
|
if (data.title)
|
||||||
|
title_field.val(data.title.substr(0, title_field.maxLength));
|
||||||
|
if (data.url)
|
||||||
|
url_field.val(data.url);
|
||||||
|
}
|
||||||
|
|
||||||
button.val(old_value);
|
button.val(old_value);
|
||||||
button.prop("disabled", false);
|
button.prop("disabled", false);
|
||||||
|
|
|
@ -58,15 +58,22 @@ class StoriesController < ApplicationController
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def fetch_url_title
|
def fetch_url_attributes
|
||||||
s = Story.new
|
s = Story.new
|
||||||
|
s.fetching_ip = request.remote_ip
|
||||||
s.url = params[:fetch_url]
|
s.url = params[:fetch_url]
|
||||||
|
|
||||||
if (title = s.fetched_title(request.remote_ip)).present?
|
oattrs = { :url => params[:fetch_url], :title => nil }
|
||||||
return render :json => { :title => title }
|
|
||||||
else
|
if (title = s.fetched_title).present?
|
||||||
return render :json => "error"
|
oattrs[:title] = title
|
||||||
end
|
end
|
||||||
|
|
||||||
|
if (cu = s.fetched_canonical_url).present?
|
||||||
|
oattrs[:url] = cu
|
||||||
|
end
|
||||||
|
|
||||||
|
return render :json => oattrs
|
||||||
end
|
end
|
||||||
|
|
||||||
def new
|
def new
|
||||||
|
@ -74,10 +81,17 @@ class StoriesController < ApplicationController
|
||||||
@cur_url = "/stories/new"
|
@cur_url = "/stories/new"
|
||||||
|
|
||||||
@story = Story.new
|
@story = Story.new
|
||||||
|
@story.fetching_ip = request.remote_ip
|
||||||
|
|
||||||
if params[:url].present?
|
if params[:url].present?
|
||||||
@story.url = params[:url]
|
@story.url = params[:url]
|
||||||
|
|
||||||
|
if (cu = @story.fetched_canonical_url).present? && @story.url != cu
|
||||||
|
flash.now[:notice] = "Note: URL has been changed to fetched " <<
|
||||||
|
"canonicalized version"
|
||||||
|
@story.url = cu
|
||||||
|
end
|
||||||
|
|
||||||
if s = Story.find_similar_by_url(@story.url)
|
if s = Story.find_similar_by_url(@story.url)
|
||||||
if s.is_recent?
|
if s.is_recent?
|
||||||
# user won't be able to submit this story as new, so just redirect
|
# user won't be able to submit this story as new, so just redirect
|
||||||
|
@ -91,7 +105,7 @@ class StoriesController < ApplicationController
|
||||||
end
|
end
|
||||||
|
|
||||||
# ignore what the user brought unless we need it as a fallback
|
# ignore what the user brought unless we need it as a fallback
|
||||||
@story.title = @story.fetched_title(request.remote_ip)
|
@story.title = @story.fetched_title
|
||||||
if !@story.title.present? && params[:title].present?
|
if !@story.title.present? && params[:title].present?
|
||||||
@story.title = params[:title]
|
@story.title = params[:title]
|
||||||
end
|
end
|
||||||
|
|
|
@ -30,9 +30,10 @@ class Story < ActiveRecord::Base
|
||||||
# days a story is considered recent, for resubmitting
|
# days a story is considered recent, for resubmitting
|
||||||
RECENT_DAYS = 30
|
RECENT_DAYS = 30
|
||||||
|
|
||||||
attr_accessor :vote, :already_posted_story, :fetched_content, :previewing,
|
attr_accessor :vote, :already_posted_story, :previewing, :seen_previous,
|
||||||
:seen_previous, :is_hidden_by_cur_user
|
:is_hidden_by_cur_user
|
||||||
attr_accessor :editor, :moderation_reason, :merge_story_short_id
|
attr_accessor :editor, :moderation_reason, :merge_story_short_id
|
||||||
|
attr_accessor :fetching_ip
|
||||||
|
|
||||||
before_validation :assign_short_id_and_upvote,
|
before_validation :assign_short_id_and_upvote,
|
||||||
:on => :create
|
:on => :create
|
||||||
|
@ -237,52 +238,6 @@ class Story < ActiveRecord::Base
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def fetched_content(for_remote_ip = nil)
|
|
||||||
return @fetched_content if @fetched_content
|
|
||||||
|
|
||||||
begin
|
|
||||||
s = Sponge.new
|
|
||||||
s.timeout = 3
|
|
||||||
@fetched_content = s.fetch(self.url, :get, nil, nil,
|
|
||||||
{ "User-agent" => "#{Rails.application.domain} for #{for_remote_ip}" },
|
|
||||||
3)
|
|
||||||
rescue
|
|
||||||
end
|
|
||||||
|
|
||||||
@fetched_content
|
|
||||||
end
|
|
||||||
|
|
||||||
def fetched_title(for_remote_ip = nil)
|
|
||||||
title = ""
|
|
||||||
|
|
||||||
if !(doc = Nokogiri::HTML(fetched_content(for_remote_ip).to_s))
|
|
||||||
return title
|
|
||||||
end
|
|
||||||
|
|
||||||
# try <meta property="og:title"> first, it probably won't have the site
|
|
||||||
# name
|
|
||||||
begin
|
|
||||||
title = doc.at_css("meta[property='og:title']").
|
|
||||||
attributes["content"].text
|
|
||||||
rescue
|
|
||||||
end
|
|
||||||
|
|
||||||
# then try <meta name="title">
|
|
||||||
if title.to_s == ""
|
|
||||||
begin
|
|
||||||
title = doc.at_css("meta[name='title']").attributes["content"].text
|
|
||||||
rescue
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# then try plain old <title>
|
|
||||||
if title.to_s == ""
|
|
||||||
title = doc.at_css("title").try(:text).to_s
|
|
||||||
end
|
|
||||||
|
|
||||||
return title
|
|
||||||
end
|
|
||||||
|
|
||||||
def generated_markeddown_description
|
def generated_markeddown_description
|
||||||
Markdowner.to_html(self.description, { :allow_images => true })
|
Markdowner.to_html(self.description, { :allow_images => true })
|
||||||
end
|
end
|
||||||
|
@ -576,4 +531,73 @@ class Story < ActiveRecord::Base
|
||||||
end
|
end
|
||||||
}.join(", ")
|
}.join(", ")
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def fetched_content
|
||||||
|
return @fetched_content if @fetched_content
|
||||||
|
|
||||||
|
begin
|
||||||
|
s = Sponge.new
|
||||||
|
s.timeout = 3
|
||||||
|
@fetched_content = s.fetch(self.url, :get, nil, nil,
|
||||||
|
{ "User-agent" => "#{Rails.application.domain} for #{self.fetching_ip}" },
|
||||||
|
3)
|
||||||
|
rescue
|
||||||
|
end
|
||||||
|
|
||||||
|
@fetched_content
|
||||||
|
end
|
||||||
|
|
||||||
|
def parsed_content
|
||||||
|
return @parsed_content if @parsed_content
|
||||||
|
|
||||||
|
@parsed_content = Nokogiri::HTML(self.fetched_content.to_s)
|
||||||
|
end
|
||||||
|
|
||||||
|
def fetched_title
|
||||||
|
title = ""
|
||||||
|
|
||||||
|
if !(doc = self.parsed_content)
|
||||||
|
return title
|
||||||
|
end
|
||||||
|
|
||||||
|
# try <meta property="og:title"> first, it probably won't have the site
|
||||||
|
# name
|
||||||
|
begin
|
||||||
|
title = doc.at_css("meta[property='og:title']").
|
||||||
|
attributes["content"].text
|
||||||
|
rescue
|
||||||
|
end
|
||||||
|
|
||||||
|
# then try <meta name="title">
|
||||||
|
if title.to_s == ""
|
||||||
|
begin
|
||||||
|
title = doc.at_css("meta[name='title']").attributes["content"].text
|
||||||
|
rescue
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# then try plain old <title>
|
||||||
|
if title.to_s == ""
|
||||||
|
title = doc.at_css("title").try(:text).to_s
|
||||||
|
end
|
||||||
|
|
||||||
|
return title
|
||||||
|
end
|
||||||
|
|
||||||
|
def fetched_canonical_url
|
||||||
|
return @fetched_canonical_url if @fetched_canonical_url
|
||||||
|
|
||||||
|
if doc = self.parsed_content
|
||||||
|
begin
|
||||||
|
if (cu = doc.at_css("link[rel='canonical']").attributes["href"].
|
||||||
|
text).present? && (ucu = URI.parse(cu)) && ucu.scheme.present? &&
|
||||||
|
ucu.host.present?
|
||||||
|
return cu
|
||||||
|
end
|
||||||
|
rescue
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return self.url
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -58,7 +58,7 @@ Lobsters::Application.routes.draw do
|
||||||
post "hide"
|
post "hide"
|
||||||
post "unhide"
|
post "unhide"
|
||||||
end
|
end
|
||||||
post "/stories/fetch_url_title", :format => "json"
|
post "/stories/fetch_url_attributes", :format => "json"
|
||||||
post "/stories/preview" => "stories#preview"
|
post "/stories/preview" => "stories#preview"
|
||||||
|
|
||||||
resources :comments do
|
resources :comments do
|
||||||
|
|
Loading…
Reference in a new issue