diff --git a/app/assets/javascripts/application.js.erb b/app/assets/javascripts/application.js.erb index 8f05e90..1ddff7f 100644 --- a/app/assets/javascripts/application.js.erb +++ b/app/assets/javascripts/application.js.erb @@ -238,12 +238,16 @@ var _Lobsters = Class.extend({ button.prop("disabled", true); button.val("Fetching..."); - $.post("/stories/fetch_url_title", { + $.post("/stories/fetch_url_attributes", { fetch_url: url_field.val(), }) .success(function(data) { - if (data && data.title) - title_field.val(data.title.substr(0, title_field.maxLength)); + if (data) { + if (data.title) + title_field.val(data.title.substr(0, title_field.maxLength)); + if (data.url) + url_field.val(data.url); + } button.val(old_value); button.prop("disabled", false); diff --git a/app/controllers/stories_controller.rb b/app/controllers/stories_controller.rb index 6c83312..dddafd1 100644 --- a/app/controllers/stories_controller.rb +++ b/app/controllers/stories_controller.rb @@ -58,15 +58,22 @@ class StoriesController < ApplicationController end end - def fetch_url_title + def fetch_url_attributes s = Story.new + s.fetching_ip = request.remote_ip s.url = params[:fetch_url] - if (title = s.fetched_title(request.remote_ip)).present? - return render :json => { :title => title } - else - return render :json => "error" + oattrs = { :url => params[:fetch_url], :title => nil } + + if (title = s.fetched_title).present? + oattrs[:title] = title end + + if (cu = s.fetched_canonical_url).present? + oattrs[:url] = cu + end + + return render :json => oattrs end def new @@ -74,10 +81,17 @@ class StoriesController < ApplicationController @cur_url = "/stories/new" @story = Story.new + @story.fetching_ip = request.remote_ip if params[:url].present? @story.url = params[:url] + if (cu = @story.fetched_canonical_url).present? && @story.url != cu + flash.now[:notice] = "Note: URL has been changed to fetched " << + "canonicalized version" + @story.url = cu + end + if s = Story.find_similar_by_url(@story.url) if s.is_recent? # user won't be able to submit this story as new, so just redirect @@ -91,7 +105,7 @@ class StoriesController < ApplicationController end # ignore what the user brought unless we need it as a fallback - @story.title = @story.fetched_title(request.remote_ip) + @story.title = @story.fetched_title if !@story.title.present? && params[:title].present? @story.title = params[:title] end diff --git a/app/models/story.rb b/app/models/story.rb index b3da7d4..758b7b6 100644 --- a/app/models/story.rb +++ b/app/models/story.rb @@ -30,9 +30,10 @@ class Story < ActiveRecord::Base # days a story is considered recent, for resubmitting RECENT_DAYS = 30 - attr_accessor :vote, :already_posted_story, :fetched_content, :previewing, - :seen_previous, :is_hidden_by_cur_user + attr_accessor :vote, :already_posted_story, :previewing, :seen_previous, + :is_hidden_by_cur_user attr_accessor :editor, :moderation_reason, :merge_story_short_id + attr_accessor :fetching_ip before_validation :assign_short_id_and_upvote, :on => :create @@ -237,52 +238,6 @@ class Story < ActiveRecord::Base end end - def fetched_content(for_remote_ip = nil) - return @fetched_content if @fetched_content - - begin - s = Sponge.new - s.timeout = 3 - @fetched_content = s.fetch(self.url, :get, nil, nil, - { "User-agent" => "#{Rails.application.domain} for #{for_remote_ip}" }, - 3) - rescue - end - - @fetched_content - end - - def fetched_title(for_remote_ip = nil) - title = "" - - if !(doc = Nokogiri::HTML(fetched_content(for_remote_ip).to_s)) - return title - end - - # try first, it probably won't have the site - # name - begin - title = doc.at_css("meta[property='og:title']"). - attributes["content"].text - rescue - end - - # then try - if title.to_s == "" - begin - title = doc.at_css("meta[name='title']").attributes["content"].text - rescue - end - end - - # then try plain old - if title.to_s == "" - title = doc.at_css("title").try(:text).to_s - end - - return title - end - def generated_markeddown_description Markdowner.to_html(self.description, { :allow_images => true }) end @@ -576,4 +531,73 @@ class Story < ActiveRecord::Base end }.join(", ") end + + def fetched_content + return @fetched_content if @fetched_content + + begin + s = Sponge.new + s.timeout = 3 + @fetched_content = s.fetch(self.url, :get, nil, nil, + { "User-agent" => "#{Rails.application.domain} for #{self.fetching_ip}" }, + 3) + rescue + end + + @fetched_content + end + + def parsed_content + return @parsed_content if @parsed_content + + @parsed_content = Nokogiri::HTML(self.fetched_content.to_s) + end + + def fetched_title + title = "" + + if !(doc = self.parsed_content) + return title + end + + # try <meta property="og:title"> first, it probably won't have the site + # name + begin + title = doc.at_css("meta[property='og:title']"). + attributes["content"].text + rescue + end + + # then try <meta name="title"> + if title.to_s == "" + begin + title = doc.at_css("meta[name='title']").attributes["content"].text + rescue + end + end + + # then try plain old <title> + if title.to_s == "" + title = doc.at_css("title").try(:text).to_s + end + + return title + end + + def fetched_canonical_url + return @fetched_canonical_url if @fetched_canonical_url + + if doc = self.parsed_content + begin + if (cu = doc.at_css("link[rel='canonical']").attributes["href"]. + text).present? && (ucu = URI.parse(cu)) && ucu.scheme.present? && + ucu.host.present? + return cu + end + rescue + end + end + + return self.url + end end diff --git a/config/routes.rb b/config/routes.rb index 0f265c1..7023169 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -58,7 +58,7 @@ Lobsters::Application.routes.draw do post "hide" post "unhide" end - post "/stories/fetch_url_title", :format => "json" + post "/stories/fetch_url_attributes", :format => "json" post "/stories/preview" => "stories#preview" resources :comments do