move inbound e-mail parsing guts into an extra, add tests for it
This commit is contained in:
parent
2f55605645
commit
b75468a2c9
|
@ -0,0 +1,110 @@
|
|||
class EmailParser
|
||||
attr_reader :sender, :recipient, :email_text, :email
|
||||
|
||||
def initialize(sender, recipient, email_text)
|
||||
@sender = sender
|
||||
@recipient = recipient
|
||||
@email_text = email_text.forcibly_convert_to_utf8
|
||||
|
||||
@email = nil
|
||||
begin
|
||||
# the mail gem stupidly spams STDERR while parsing e-mail, so silence
|
||||
# that stream to avoid anything getting back to postfix
|
||||
Utils.silence_stream(STDERR) do
|
||||
@email = Mail.read_from_string(email_text)
|
||||
end
|
||||
rescue
|
||||
end
|
||||
|
||||
@sending_user = nil
|
||||
@parent = nil
|
||||
@body = nil
|
||||
end
|
||||
|
||||
def user_token
|
||||
@recipient.gsub(/^#{Rails.application.shortname}-/, "").gsub(/@.*/, "")
|
||||
end
|
||||
|
||||
def been_here?
|
||||
!!@email_text.match(/^X-BeenThere: #{Rails.application.shortname}-/i)
|
||||
end
|
||||
|
||||
def sending_user
|
||||
return @sending_user if @sending_user
|
||||
|
||||
if (user = User.where(:mailing_list_enabled => true,
|
||||
:mailing_list_token => user_token).first) && user.is_active?
|
||||
@sending_user = user
|
||||
return user
|
||||
end
|
||||
end
|
||||
|
||||
def parent
|
||||
return @parent if @parent
|
||||
|
||||
irt = self.email[:in_reply_to].to_s.gsub(/[^A-Za-z0-9@\.]/, "")
|
||||
|
||||
if m = irt.match(/^comment\.([^\.]+)\.\d+@/)
|
||||
@parent = Comment.where(:short_id => m[1]).first
|
||||
elsif m = irt.match(/^story\.([^\.]+)\.\d+@/)
|
||||
@parent = Story.where(:short_id => m[1]).first
|
||||
end
|
||||
|
||||
@parent
|
||||
end
|
||||
|
||||
def body
|
||||
return @body if @body
|
||||
|
||||
@possible_charset = nil
|
||||
|
||||
if self.email.multipart?
|
||||
# parts[0] - multipart/alternative
|
||||
# parts[0].parts[0] - text/plain
|
||||
# parts[0].parts[1] - text/html
|
||||
if (p = self.email.parts.first.parts.select{|p|
|
||||
p.content_type.match(/text\/plain/i) }).any?
|
||||
@body = p.first.body.to_s
|
||||
|
||||
begin
|
||||
@possible_charset = p.first.content_type_parameters["charset"]
|
||||
rescue
|
||||
end
|
||||
|
||||
# parts[0] - text/plain
|
||||
elsif (p = self.email.parts.select{|p|
|
||||
p.content_type.match(/text\/plain/i) }).any?
|
||||
@body = p.first.body.to_s
|
||||
|
||||
begin
|
||||
@possible_charset = p.first.content_type_parameters["charset"]
|
||||
rescue
|
||||
end
|
||||
end
|
||||
|
||||
# simple one-part
|
||||
elsif self.email.content_type.to_s.match(/text\/plain/)
|
||||
@body = self.email.body.to_s
|
||||
|
||||
begin
|
||||
@possible_charset = self.email.content_type_parameters["charset"]
|
||||
rescue
|
||||
end
|
||||
|
||||
elsif !self.email.content_type.to_s.present?
|
||||
# no content-type header, assume it's text/plain
|
||||
@body = self.email.body.to_s
|
||||
end
|
||||
|
||||
# TODO: use @possible_charset, but did previously forcing the entire
|
||||
# email_text to utf8 screw this up already?
|
||||
|
||||
# try to remove sig lines
|
||||
@body.gsub!(/^-- \n.+\z/m, "")
|
||||
|
||||
# TODO: try to strip out attribution line, followed by an optional blank
|
||||
# line, and then lines prefixed with >
|
||||
|
||||
@body.strip!
|
||||
end
|
||||
end
|
|
@ -24,125 +24,47 @@ EX_NOUSER = 67
|
|||
EX_TEMPFAIL = 75
|
||||
EX_UNAVAILABLE = 69
|
||||
|
||||
recipient = ARGV[0]
|
||||
user_token = recipient.gsub(/^#{Rails.application.shortname}-/, "").
|
||||
gsub(/@.*/, "")
|
||||
sender = ARGV[1]
|
||||
message = ""
|
||||
email = nil
|
||||
|
||||
while !STDIN.eof?
|
||||
message += STDIN.gets.to_s
|
||||
end
|
||||
|
||||
message = message.forcibly_convert_to_utf8
|
||||
parser = EmailParser.new(sender = ARGV[1], recipient = ARGV[0], message)
|
||||
|
||||
if message.match(/^X-BeenThere: #{Rails.application.shortname}-/i)
|
||||
# avoid looping
|
||||
if parser.been_here?
|
||||
# avoid looping, quietly
|
||||
exit
|
||||
end
|
||||
|
||||
sending_user = User.where(:mailing_list_enabled => true,
|
||||
:mailing_list_token => user_token).first
|
||||
|
||||
if !sending_user || !sending_user.is_active?
|
||||
STDERR.puts "no user with mailing list token #{user_token}"
|
||||
elsif !parser.sending_user
|
||||
STDERR.puts "no active user with mailing list token #{parser.user_token}"
|
||||
|
||||
# if this looks like a user token but invalid, generate a bounce to be
|
||||
# helpful. otherwise supress it to avoid talking back to spammers
|
||||
exit(recipient.match(/^#{Rails.application.shortname}-/) ? EX_NOUSER : 0)
|
||||
end
|
||||
exit(parser.user_token ? EX_NOUSER : 0)
|
||||
|
||||
# the mail gem stupidly spams STDERR while parsing e-mail, so silence that
|
||||
# stream to avoid anything getting back to postfix
|
||||
begin
|
||||
Utils.silence_stream(STDERR) do
|
||||
email = Mail.read_from_string(message)
|
||||
end
|
||||
|
||||
if !email
|
||||
raise
|
||||
end
|
||||
rescue
|
||||
elsif !parser.email
|
||||
STDERR.puts "error parsing e-mail"
|
||||
exit EX_UNAVAILABLE
|
||||
end
|
||||
|
||||
# figure out what this reply is to
|
||||
irt = email[:in_reply_to].to_s.gsub(/[^A-Za-z0-9@\.]/, "")
|
||||
|
||||
if m = irt.match(/^comment\.([^\.]+)\.\d+@/)
|
||||
parent = Comment.where(:short_id => m[1]).first
|
||||
elsif m = irt.match(/^story\.([^\.]+)\.\d+@/)
|
||||
parent = Story.where(:short_id => m[1]).first
|
||||
end
|
||||
|
||||
if !parent
|
||||
elsif !parser.parent
|
||||
STDERR.puts "no valid comment or story being replied to"
|
||||
exit EX_NOUSER
|
||||
end
|
||||
|
||||
body = nil
|
||||
possible_charset = nil
|
||||
|
||||
if email.multipart?
|
||||
# parts[0] - multipart/alternative
|
||||
# parts[0].parts[0] - text/plain
|
||||
# parts[0].parts[1] - text/html
|
||||
if (p = email.parts.first.parts.select{|p|
|
||||
p.content_type.match(/text\/plain/) }).any?
|
||||
begin
|
||||
possible_charset = p.first.content_type_parameters["charset"]
|
||||
rescue
|
||||
end
|
||||
|
||||
# parts[0] - text/plain
|
||||
elsif (p = email.parts.select{|p|
|
||||
p.content_type.match(/text\/plain/) }).any?
|
||||
body = p.first.body.to_s
|
||||
|
||||
begin
|
||||
possible_charset = p.first.content_type_parameters["charset"]
|
||||
rescue
|
||||
end
|
||||
end
|
||||
elsif email.content_type.to_s.match(/text\/plain/)
|
||||
body = email.body.to_s
|
||||
|
||||
begin
|
||||
possible_charset = email.content_type_parameters["charset"]
|
||||
rescue
|
||||
end
|
||||
|
||||
elsif !email.content_type.to_s.present?
|
||||
# no content-type header, assume it's text/plain
|
||||
body = email.body.to_s
|
||||
end
|
||||
|
||||
if !body.present?
|
||||
# oh well
|
||||
elsif !parser.body.present?
|
||||
STDERR.puts "no valid text/plain body found"
|
||||
exit EX_UNAVAILABLE
|
||||
end
|
||||
|
||||
# try to remove sig lines
|
||||
body.gsub!(/^-- \n.+\z/, "")
|
||||
|
||||
# TODO: try to strip out attribution line, followed by an optional blank line,
|
||||
# and then lines prefixed with >
|
||||
|
||||
body.strip!
|
||||
|
||||
c = Comment.new
|
||||
c.user_id = sending_user.id
|
||||
c.comment = body
|
||||
c.user_id = parser.sending_user.id
|
||||
c.comment = parser.body
|
||||
c.is_from_email = true
|
||||
|
||||
if parent.is_a?(Comment)
|
||||
c.story_id = parent.story_id
|
||||
c.parent_comment_id = parent.id
|
||||
if parser.parent.is_a?(Comment)
|
||||
c.story_id = parser.parent.story_id
|
||||
c.parent_comment_id = parser.parent.id
|
||||
else
|
||||
c.story_id = parent.id
|
||||
c.story_id = parser.parent.id
|
||||
end
|
||||
|
||||
if c.save
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
Date: Tue, 16 Jul 2013 10:37:11 -0500
|
||||
From: joshua stein <jcs@jcs.org>
|
||||
To: ##SHORTNAME##-##MAILING_LIST_TOKEN##@lobste.rs
|
||||
Subject: Re: Lobsters by mail [announce]
|
||||
Message-ID: <20130716103519.78e3be260a@5d7607215bd9704>
|
||||
References: <story.jg3eet.1372275536@lobste.rs>
|
||||
<comment.nh0wig.1373986371@lobste.rs>
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=us-ascii
|
||||
Content-Disposition: inline
|
||||
In-Reply-To: <comment.##COMMENT_ID##.1373986371@lobste.rs>
|
||||
X-No-Archive: Yes
|
||||
|
||||
It hasn't decreased any measurable amount but since the traffic to
|
||||
the site is increasing a bit each week, it's hard to tell.
|
|
@ -0,0 +1,36 @@
|
|||
Return-Path: <nobody@lobste.rs>
|
||||
X-Original-To: jcs@jcs.org
|
||||
Delivered-To: superblock.net-jcs@filter
|
||||
Received: from vmail.superblock.net (localhost.superblock.net [127.0.0.1])
|
||||
by vmail.superblock.net (Postfix) with ESMTP id 1657A391FB
|
||||
for <jcs@jcs.org>; Tue, 16 Jul 2013 10:40:08 -0500 (CDT)
|
||||
DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=lobste.rs; h=from:reply-to
|
||||
:to:list-id:list-unsubscribe:content-type:message-id:in-reply-to
|
||||
:references:date:subject; s=20120304; bh=wfuy7M5Juh21GniDc9U1TGw
|
||||
mqLo=; b=1UFXtiD7QtaaoXfqnVXFMperUlfxsPV8HbPVrkwiB8Yzi5L2AkRdlpD
|
||||
hunv1x7u1U/VXhHI1eOkp0LkSUc6Zi7legiyPXnWvOye3cTbOmp29KUhLls9gjGD
|
||||
7inNgqR52uRB6NTxdDtrMZYDjUXJtHZdlLwgvcB6BKj7WPtvWx/A=
|
||||
DomainKey-Signature: a=rsa-sha1; c=nofws; d=lobste.rs; h=from:reply-to
|
||||
:to:list-id:list-unsubscribe:content-type:message-id:in-reply-to
|
||||
:references:date:subject; q=dns; s=20120304; b=WGF2WkJDYzLWJ8jhS
|
||||
TNERxmtU0qQRqI04yIuIJNHDsr0pwGR01phyvdoZfrJP+RJFDQ7SbTCl+qvXjdcJ
|
||||
XRn+8zLEd1Mg8Hy2PzZuBVLMxXcJ+WGFTxUbArupByqp9qJnCPGusJmrCEIQCC+N
|
||||
KqOAQotiWz9B5x5oFaBj97ZbSM=
|
||||
To: ##SHORTNAME##-##MAILING_LIST_TOKEN##@lobste.rs
|
||||
List-Id: Lobsters <##SHORTNAME##-##MAILING_LIST_TOKEN##@lobste.rs>
|
||||
List-Unsubscribe: <https://lobste.rs/settings>
|
||||
Precedence: list
|
||||
Content-Type: text/plain; charset="us-ascii"
|
||||
Message-ID: <comment.jow5ro.1373989038@lobste.rs>
|
||||
In-Reply-To: <comment.nh0wig.1373986371@lobste.rs>
|
||||
References: <story.jg3eet.1372275536@lobste.rs>
|
||||
<comment.nh0wig.1373986371@lobste.rs>
|
||||
Date: Tue, 16 Jul 2013 10:37:18 -0500
|
||||
Subject: Re: Lobsters by mail [announce]
|
||||
X-BeenThere: ##SHORTNAME##-##MAILING_LIST_TOKEN##@lobste.rs
|
||||
|
||||
It hasn't decreased any measurable amount but since the traffic to
|
||||
the site is increasing a bit each week, it's hard to tell.
|
||||
|
||||
--
|
||||
Vote: https://lobste.rs/s/jg3eet/_/comments/jow5ro
|
|
@ -0,0 +1,18 @@
|
|||
Date: Tue, 16 Jul 2013 10:37:11 -0500
|
||||
From: joshua stein <jcs@jcs.org>
|
||||
To: ##SHORTNAME##-##MAILING_LIST_TOKEN##@lobste.rs
|
||||
Subject: Re: Lobsters by mail [announce]
|
||||
Message-ID: <20130716103519.78e3be260a@5d7607215bd9704>
|
||||
References: <story.jg3eet.1372275536@lobste.rs>
|
||||
<comment.nh0wig.1373986371@lobste.rs>
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=us-ascii
|
||||
Content-Disposition: inline
|
||||
In-Reply-To: <comment.##COMMENT_ID##.1373986371@lobste.rs>
|
||||
X-No-Archive: Yes
|
||||
|
||||
It hasn't decreased any measurable amount but since the traffic to
|
||||
the site is increasing a bit each week, it's hard to tell.
|
||||
|
||||
--
|
||||
this is my signature
|
|
@ -0,0 +1,24 @@
|
|||
Date: Tue, 16 Jul 2013 10:37:11 -0500
|
||||
From: joshua stein <jcs@jcs.org>
|
||||
To: ##SHORTNAME##-##MAILING_LIST_TOKEN##@lobste.rs
|
||||
Subject: Re: Lobsters by mail [announce]
|
||||
Message-ID: <20130716103519.78e3be260a@5d7607215bd9704>
|
||||
References: <story.jg3eet.1372275536@lobste.rs>
|
||||
<comment.nh0wig.1373986371@lobste.rs>
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=us-ascii
|
||||
Content-Disposition: inline
|
||||
In-Reply-To: <comment.##COMMENT_ID##.1373986371@lobste.rs>
|
||||
X-No-Archive: Yes
|
||||
|
||||
On Sun, Feb 2, 2014 at 11:51 PM, blah <blah@lobste.rs> wrote:
|
||||
|
||||
> This is some quoted text.
|
||||
> With an attribution line
|
||||
|
||||
It hasn't decreased any measurable amount but since the traffic to
|
||||
the site is increasing a bit each week, it's hard to tell.
|
||||
|
||||
--
|
||||
this is my signature
|
||||
|
|
@ -0,0 +1,61 @@
|
|||
require "spec_helper"
|
||||
|
||||
describe EmailParser do
|
||||
before(:each) do
|
||||
@user = User.make!
|
||||
@story = Story.make!(:user => @user)
|
||||
|
||||
@commentor = User.make!
|
||||
@comment = Comment.make!(:story => @story, :user => @commentor)
|
||||
|
||||
@emailer = User.make!(:mailing_list_enabled => true)
|
||||
|
||||
@emails = {}
|
||||
Dir.glob("#{Rails.root}/spec/fixtures/inbound_emails/*.eml").
|
||||
each do |f|
|
||||
@emails[File.basename(f).gsub(/\..*/, "")] = File.read(f).
|
||||
gsub(/##SHORTNAME##/, Rails.application.shortname).
|
||||
gsub(/##MAILING_LIST_TOKEN##/, @emailer.mailing_list_token).
|
||||
gsub(/##COMMENT_ID##/, @comment.short_id)
|
||||
end
|
||||
end
|
||||
|
||||
it "can parse a valid e-mail" do
|
||||
parser = EmailParser.new(
|
||||
"user@example.com",
|
||||
Rails.application.shortname +
|
||||
"-#{@emailer.mailing_list_token}@example.org",
|
||||
@emails["1"])
|
||||
|
||||
parser.should_not == nil
|
||||
parser.email.should_not == nil
|
||||
|
||||
parser.user_token.should == @emailer.mailing_list_token
|
||||
parser.been_here?.should == false
|
||||
parser.sending_user.id.should == @emailer.id
|
||||
|
||||
parser.parent.class.should == Comment
|
||||
end
|
||||
|
||||
it "rejects mailing loops" do
|
||||
parser = EmailParser.new(
|
||||
"user@example.com",
|
||||
Rails.application.shortname +
|
||||
"-#{@emailer.mailing_list_token}@example.org",
|
||||
@emails["2"])
|
||||
|
||||
parser.email.should_not == nil
|
||||
parser.been_here?.should == true
|
||||
end
|
||||
|
||||
it "strips signatures" do
|
||||
parser = EmailParser.new(
|
||||
"user@example.com",
|
||||
Rails.application.shortname +
|
||||
"-#{@emailer.mailing_list_token}@example.org",
|
||||
@emails["3"])
|
||||
|
||||
parser.email.should_not == nil
|
||||
parser.body.should == "It hasn't decreased any measurable amount but since the traffic to\nthe site is increasing a bit each week, it's hard to tell."
|
||||
end
|
||||
end
|
Loading…
Reference in New Issue