2013-06-22 03:38:37 +02:00
|
|
|
class StoryCacher
|
|
|
|
cattr_accessor :DIFFBOT_API_KEY
|
|
|
|
|
|
|
|
# this needs to be overridden in config/initializers/production.rb
|
|
|
|
@@DIFFBOT_API_KEY = nil
|
|
|
|
|
|
|
|
DIFFBOT_API_URL = "http://www.diffbot.com/api/article"
|
|
|
|
|
2017-05-20 15:18:56 +02:00
|
|
|
def self.get_story_text(story)
|
2013-06-22 03:38:37 +02:00
|
|
|
if !@@DIFFBOT_API_KEY
|
|
|
|
return
|
|
|
|
end
|
|
|
|
|
2013-12-03 23:14:35 +01:00
|
|
|
# XXX: diffbot tries to read pdfs as text, so disable for now
|
2017-05-20 15:18:56 +02:00
|
|
|
if story.url.to_s.match(/\.pdf$/i)
|
2013-12-03 23:14:35 +01:00
|
|
|
return nil
|
|
|
|
end
|
|
|
|
|
2013-06-22 03:38:37 +02:00
|
|
|
db_url = "#{DIFFBOT_API_URL}?token=#{@@DIFFBOT_API_KEY}&url=" <<
|
2017-05-20 15:18:56 +02:00
|
|
|
CGI.escape(story.url)
|
2013-06-22 03:38:37 +02:00
|
|
|
|
|
|
|
begin
|
|
|
|
s = Sponge.new
|
|
|
|
# we're not doing this interactively, so take a while
|
2013-07-04 05:11:01 +02:00
|
|
|
s.timeout = 45
|
2013-06-22 03:38:37 +02:00
|
|
|
res = s.fetch(db_url)
|
|
|
|
if res.present?
|
|
|
|
j = JSON.parse(res)
|
|
|
|
|
2014-03-20 17:41:33 +01:00
|
|
|
# turn newlines into double newlines, so they become paragraphs
|
2015-10-20 02:13:15 +02:00
|
|
|
j["text"] = j["text"].to_s.gsub("\n", "\n\n")
|
2014-03-20 17:41:33 +01:00
|
|
|
|
2014-03-18 05:35:30 +01:00
|
|
|
while j["text"].match("\n\n\n")
|
|
|
|
j["text"].gsub!("\n\n\n", "\n\n")
|
|
|
|
end
|
2014-03-20 17:41:33 +01:00
|
|
|
|
|
|
|
return j["text"]
|
2013-06-22 03:38:37 +02:00
|
|
|
end
|
|
|
|
|
|
|
|
rescue => e
|
|
|
|
Rails.logger.error "error fetching #{db_url}: #{e.message}"
|
|
|
|
end
|
|
|
|
|
2016-02-10 15:39:42 +01:00
|
|
|
begin
|
|
|
|
s = Sponge.new
|
|
|
|
s.timeout = 45
|
2017-05-20 15:18:56 +02:00
|
|
|
s.fetch(story.archive_url)
|
2016-02-10 15:39:42 +01:00
|
|
|
rescue => e
|
|
|
|
Rails.logger.error "error caching #{db_url}: #{e.message}"
|
|
|
|
end
|
|
|
|
|
2013-06-22 03:38:37 +02:00
|
|
|
nil
|
|
|
|
end
|
|
|
|
end
|