09a92040ed
We force a connection to a pre-resolved IP to avoid being tricked into connecting to a local host, but hosts that use SNI need the hostname in the SSL negotiation instead of an IP. Fixes the problem with CloudFlare blocking requests to medium.com, which prevented the URL canonicalization code from being run when a medium URL was submitted, which prevented us from stripping off those stupid referral codes medium attaches to URLs (which could be stripped out in Story#url= but medium allows custom domains, so that wouldn't work for things like m.signalvnoise.com). Code from Pushover.
252 lines
5.5 KiB
Ruby
252 lines
5.5 KiB
Ruby
require "uri"
|
|
require "net/https"
|
|
require "resolv"
|
|
require "ipaddr"
|
|
|
|
module Net
|
|
class HTTP
|
|
attr_accessor :address, :custom_conn_address, :skip_close
|
|
|
|
def start # :yield: http
|
|
if block_given? && !skip_close
|
|
begin
|
|
do_start
|
|
return yield(self)
|
|
ensure
|
|
do_finish
|
|
end
|
|
end
|
|
do_start
|
|
self
|
|
end
|
|
|
|
private
|
|
def conn_address
|
|
if self.custom_conn_address.to_s != ""
|
|
self.custom_conn_address
|
|
else
|
|
address
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
class Sponge
|
|
MAX_TIME = 60
|
|
MAX_DNS_TIME = 5
|
|
|
|
attr_accessor :debug, :last_res, :timeout
|
|
|
|
# rfc3330
|
|
BAD_NETS = [
|
|
"0.0.0.0/8",
|
|
"10.0.0.0/8",
|
|
"127.0.0.0/8",
|
|
"169.254.0.0/16",
|
|
"172.16.0.0/12",
|
|
"192.0.2.0/24",
|
|
"192.88.99.0/24",
|
|
"192.168.0.0/16",
|
|
"198.18.0.0/15",
|
|
"224.0.0.0/4",
|
|
"240.0.0.0/4"
|
|
]
|
|
|
|
# old api
|
|
def self.fetch(url, headers = {}, limit = 10)
|
|
s = Sponge.new
|
|
s.fetch(url, "get", nil, nil, headers, limit)
|
|
end
|
|
|
|
def initialize
|
|
@cookies = {}
|
|
@timeout = MAX_TIME
|
|
end
|
|
|
|
def set_cookie(host, name, val)
|
|
dputs "setting cookie #{name} on domain #{host} to #{val}"
|
|
|
|
if !@cookies[host]
|
|
@cookies[host] = {}
|
|
end
|
|
|
|
if val.to_s == ""
|
|
@cookies[host][name] ? @cookies[host][name].delete : nil
|
|
else
|
|
@cookies[host][name] = val
|
|
end
|
|
end
|
|
|
|
def cookies(host)
|
|
cooks = @cookies[host] || {}
|
|
|
|
# check for domain cookies
|
|
@cookies.keys.each do |dom|
|
|
if dom.length < host.length &&
|
|
dom == host[host.length - dom.length .. host.length - 1]
|
|
dputs "adding domain keys from #{dom}"
|
|
cooks = cooks.merge @cookies[dom]
|
|
end
|
|
end
|
|
|
|
if cooks
|
|
return cooks.map{|k,v| "#{k}=#{v};" }.join(" ")
|
|
else
|
|
return ""
|
|
end
|
|
end
|
|
|
|
def fetch(url, method = :get, fields = nil, raw_post_data = nil,
|
|
headers = {}, limit = 10)
|
|
raise ArgumentError, "http redirection too deep" if limit <= 0
|
|
|
|
uri = URI.parse(url)
|
|
|
|
# we'll manually resolve the ip so we can verify it's not local
|
|
ip = nil
|
|
tip = nil
|
|
ips = []
|
|
retried = false
|
|
begin
|
|
Timeout.timeout(MAX_DNS_TIME) do
|
|
ips = Resolv.getaddresses(uri.host)
|
|
|
|
if !ips.any?
|
|
raise
|
|
end
|
|
|
|
# reject ipv6 addresses
|
|
ips.reject!{|ip| ip.match(/:/) }
|
|
|
|
# pick a random one
|
|
tip = ips[rand(ips.length)]
|
|
ip = IPAddr.new(tip)
|
|
end
|
|
rescue Timeout::Error => e
|
|
if retried
|
|
raise "couldn't resolve #{uri.host} (DNS timeout)"
|
|
else
|
|
retried = true
|
|
retry
|
|
end
|
|
rescue StandardError => e
|
|
raise "couldn't resolve #{uri.host} (#{e.inspect})"
|
|
end
|
|
|
|
if !ip
|
|
raise "couldn't resolve #{uri.host}"
|
|
end
|
|
|
|
if BAD_NETS.select{|n| IPAddr.new(n).include?(ip) }.any?
|
|
raise "refusing to talk to IP #{ip.to_s}"
|
|
end
|
|
|
|
host = Net::HTTP.new(ip.to_s, uri.port)
|
|
host.read_timeout = self.timeout
|
|
if self.debug
|
|
host.set_debug_output $stdout
|
|
end
|
|
|
|
if uri.scheme == "https"
|
|
host.use_ssl = true
|
|
host.address = uri.host
|
|
host.custom_conn_address = ip.to_s
|
|
host.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
|
end
|
|
|
|
send_headers = headers.dup
|
|
|
|
path = (uri.path == "" ? "/" : uri.path)
|
|
if uri.query
|
|
path += "?" + uri.query
|
|
elsif method == :get && raw_post_data
|
|
path += "?" + URI.encode(raw_post_data)
|
|
send_headers["Content-type"] = "application/x-www-form-urlencoded"
|
|
end
|
|
|
|
if method == :post
|
|
if raw_post_data
|
|
post_data = raw_post_data
|
|
send_headers["Content-type"] = "application/x-www-form-urlencoded"
|
|
else
|
|
post_data = fields.map{|k,v| "#{k}=#{v}" }.join("&")
|
|
end
|
|
|
|
send_headers["Content-Length"] = post_data.length.to_s
|
|
end
|
|
|
|
path.gsub!(/^\/\//, "/")
|
|
|
|
dputs "fetching #{url} (#{ip.to_s}) " + (uri.user ? "with http auth " +
|
|
uri.user + "/" + ("*" * uri.password.length) + " " : "") +
|
|
"by #{method} with cookies #{cookies(uri.host)}"
|
|
|
|
send_headers = {
|
|
"Host" => uri.host,
|
|
"Cookie" => cookies(uri.host),
|
|
"Referer" => url.to_s,
|
|
"User-Agent" => "Mozilla/5.0 (compatible)",
|
|
}.merge(send_headers || {})
|
|
|
|
if uri.user
|
|
send_headers["Authorization"] = "Basic " +
|
|
["#{uri.user}:#{uri.password}"].pack('m').delete("\r\n")
|
|
end
|
|
|
|
res = nil
|
|
if method == :post
|
|
res = host.post(path, post_data, send_headers)
|
|
else
|
|
res = host.get(path, send_headers)
|
|
end
|
|
|
|
if res.get_fields("Set-Cookie")
|
|
res.get_fields("Set-Cookie").each do |cook|
|
|
if p = Regexp.new(/^([^=]+)=([^;]*)/).match(cook)
|
|
set_cookie(uri.host, p[1], p[2])
|
|
else
|
|
dputs "unable to match cookie line #{cook}"
|
|
end
|
|
end
|
|
end
|
|
|
|
last_res = res
|
|
|
|
case res
|
|
when Net::HTTPSuccess
|
|
return res.body
|
|
when Net::HTTPRedirection
|
|
# follow
|
|
newuri = URI.parse(res["location"])
|
|
if newuri.host
|
|
dputs "following redirection to " + res["location"]
|
|
else
|
|
# relative path
|
|
newuri.host = uri.host
|
|
newuri.scheme = uri.scheme
|
|
newuri.port = uri.port
|
|
newuri.path = "/#{newuri.path}"
|
|
|
|
dputs "following relative redirection to " + newuri.to_s
|
|
end
|
|
|
|
fetch(newuri.to_s, "get", nil, nil, headers, limit - 1)
|
|
end
|
|
end
|
|
|
|
def get(url)
|
|
fetch(url, "get")
|
|
end
|
|
|
|
def post(url, fields)
|
|
fetch(url, "post", fields)
|
|
end
|
|
|
|
private
|
|
def dputs(string)
|
|
if self.debug
|
|
puts string
|
|
end
|
|
end
|
|
end
|