move inbound e-mail parsing guts into an extra, add tests for it
This commit is contained in:
parent
2f55605645
commit
b75468a2c9
110
extras/email_parser.rb
Normal file
110
extras/email_parser.rb
Normal file
|
@ -0,0 +1,110 @@
|
||||||
|
class EmailParser
|
||||||
|
attr_reader :sender, :recipient, :email_text, :email
|
||||||
|
|
||||||
|
def initialize(sender, recipient, email_text)
|
||||||
|
@sender = sender
|
||||||
|
@recipient = recipient
|
||||||
|
@email_text = email_text.forcibly_convert_to_utf8
|
||||||
|
|
||||||
|
@email = nil
|
||||||
|
begin
|
||||||
|
# the mail gem stupidly spams STDERR while parsing e-mail, so silence
|
||||||
|
# that stream to avoid anything getting back to postfix
|
||||||
|
Utils.silence_stream(STDERR) do
|
||||||
|
@email = Mail.read_from_string(email_text)
|
||||||
|
end
|
||||||
|
rescue
|
||||||
|
end
|
||||||
|
|
||||||
|
@sending_user = nil
|
||||||
|
@parent = nil
|
||||||
|
@body = nil
|
||||||
|
end
|
||||||
|
|
||||||
|
def user_token
|
||||||
|
@recipient.gsub(/^#{Rails.application.shortname}-/, "").gsub(/@.*/, "")
|
||||||
|
end
|
||||||
|
|
||||||
|
def been_here?
|
||||||
|
!!@email_text.match(/^X-BeenThere: #{Rails.application.shortname}-/i)
|
||||||
|
end
|
||||||
|
|
||||||
|
def sending_user
|
||||||
|
return @sending_user if @sending_user
|
||||||
|
|
||||||
|
if (user = User.where(:mailing_list_enabled => true,
|
||||||
|
:mailing_list_token => user_token).first) && user.is_active?
|
||||||
|
@sending_user = user
|
||||||
|
return user
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def parent
|
||||||
|
return @parent if @parent
|
||||||
|
|
||||||
|
irt = self.email[:in_reply_to].to_s.gsub(/[^A-Za-z0-9@\.]/, "")
|
||||||
|
|
||||||
|
if m = irt.match(/^comment\.([^\.]+)\.\d+@/)
|
||||||
|
@parent = Comment.where(:short_id => m[1]).first
|
||||||
|
elsif m = irt.match(/^story\.([^\.]+)\.\d+@/)
|
||||||
|
@parent = Story.where(:short_id => m[1]).first
|
||||||
|
end
|
||||||
|
|
||||||
|
@parent
|
||||||
|
end
|
||||||
|
|
||||||
|
def body
|
||||||
|
return @body if @body
|
||||||
|
|
||||||
|
@possible_charset = nil
|
||||||
|
|
||||||
|
if self.email.multipart?
|
||||||
|
# parts[0] - multipart/alternative
|
||||||
|
# parts[0].parts[0] - text/plain
|
||||||
|
# parts[0].parts[1] - text/html
|
||||||
|
if (p = self.email.parts.first.parts.select{|p|
|
||||||
|
p.content_type.match(/text\/plain/i) }).any?
|
||||||
|
@body = p.first.body.to_s
|
||||||
|
|
||||||
|
begin
|
||||||
|
@possible_charset = p.first.content_type_parameters["charset"]
|
||||||
|
rescue
|
||||||
|
end
|
||||||
|
|
||||||
|
# parts[0] - text/plain
|
||||||
|
elsif (p = self.email.parts.select{|p|
|
||||||
|
p.content_type.match(/text\/plain/i) }).any?
|
||||||
|
@body = p.first.body.to_s
|
||||||
|
|
||||||
|
begin
|
||||||
|
@possible_charset = p.first.content_type_parameters["charset"]
|
||||||
|
rescue
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# simple one-part
|
||||||
|
elsif self.email.content_type.to_s.match(/text\/plain/)
|
||||||
|
@body = self.email.body.to_s
|
||||||
|
|
||||||
|
begin
|
||||||
|
@possible_charset = self.email.content_type_parameters["charset"]
|
||||||
|
rescue
|
||||||
|
end
|
||||||
|
|
||||||
|
elsif !self.email.content_type.to_s.present?
|
||||||
|
# no content-type header, assume it's text/plain
|
||||||
|
@body = self.email.body.to_s
|
||||||
|
end
|
||||||
|
|
||||||
|
# TODO: use @possible_charset, but did previously forcing the entire
|
||||||
|
# email_text to utf8 screw this up already?
|
||||||
|
|
||||||
|
# try to remove sig lines
|
||||||
|
@body.gsub!(/^-- \n.+\z/m, "")
|
||||||
|
|
||||||
|
# TODO: try to strip out attribution line, followed by an optional blank
|
||||||
|
# line, and then lines prefixed with >
|
||||||
|
|
||||||
|
@body.strip!
|
||||||
|
end
|
||||||
|
end
|
|
@ -24,125 +24,47 @@ EX_NOUSER = 67
|
||||||
EX_TEMPFAIL = 75
|
EX_TEMPFAIL = 75
|
||||||
EX_UNAVAILABLE = 69
|
EX_UNAVAILABLE = 69
|
||||||
|
|
||||||
recipient = ARGV[0]
|
|
||||||
user_token = recipient.gsub(/^#{Rails.application.shortname}-/, "").
|
|
||||||
gsub(/@.*/, "")
|
|
||||||
sender = ARGV[1]
|
|
||||||
message = ""
|
message = ""
|
||||||
email = nil
|
|
||||||
|
|
||||||
while !STDIN.eof?
|
while !STDIN.eof?
|
||||||
message += STDIN.gets.to_s
|
message += STDIN.gets.to_s
|
||||||
end
|
end
|
||||||
|
|
||||||
message = message.forcibly_convert_to_utf8
|
parser = EmailParser.new(sender = ARGV[1], recipient = ARGV[0], message)
|
||||||
|
|
||||||
if message.match(/^X-BeenThere: #{Rails.application.shortname}-/i)
|
if parser.been_here?
|
||||||
# avoid looping
|
# avoid looping, quietly
|
||||||
exit
|
exit
|
||||||
end
|
|
||||||
|
|
||||||
sending_user = User.where(:mailing_list_enabled => true,
|
elsif !parser.sending_user
|
||||||
:mailing_list_token => user_token).first
|
STDERR.puts "no active user with mailing list token #{parser.user_token}"
|
||||||
|
|
||||||
if !sending_user || !sending_user.is_active?
|
|
||||||
STDERR.puts "no user with mailing list token #{user_token}"
|
|
||||||
|
|
||||||
# if this looks like a user token but invalid, generate a bounce to be
|
# if this looks like a user token but invalid, generate a bounce to be
|
||||||
# helpful. otherwise supress it to avoid talking back to spammers
|
# helpful. otherwise supress it to avoid talking back to spammers
|
||||||
exit(recipient.match(/^#{Rails.application.shortname}-/) ? EX_NOUSER : 0)
|
exit(parser.user_token ? EX_NOUSER : 0)
|
||||||
end
|
|
||||||
|
|
||||||
# the mail gem stupidly spams STDERR while parsing e-mail, so silence that
|
elsif !parser.email
|
||||||
# stream to avoid anything getting back to postfix
|
|
||||||
begin
|
|
||||||
Utils.silence_stream(STDERR) do
|
|
||||||
email = Mail.read_from_string(message)
|
|
||||||
end
|
|
||||||
|
|
||||||
if !email
|
|
||||||
raise
|
|
||||||
end
|
|
||||||
rescue
|
|
||||||
STDERR.puts "error parsing e-mail"
|
STDERR.puts "error parsing e-mail"
|
||||||
exit EX_UNAVAILABLE
|
exit EX_UNAVAILABLE
|
||||||
end
|
|
||||||
|
|
||||||
# figure out what this reply is to
|
elsif !parser.parent
|
||||||
irt = email[:in_reply_to].to_s.gsub(/[^A-Za-z0-9@\.]/, "")
|
|
||||||
|
|
||||||
if m = irt.match(/^comment\.([^\.]+)\.\d+@/)
|
|
||||||
parent = Comment.where(:short_id => m[1]).first
|
|
||||||
elsif m = irt.match(/^story\.([^\.]+)\.\d+@/)
|
|
||||||
parent = Story.where(:short_id => m[1]).first
|
|
||||||
end
|
|
||||||
|
|
||||||
if !parent
|
|
||||||
STDERR.puts "no valid comment or story being replied to"
|
STDERR.puts "no valid comment or story being replied to"
|
||||||
exit EX_NOUSER
|
exit EX_NOUSER
|
||||||
end
|
|
||||||
|
|
||||||
body = nil
|
elsif !parser.body.present?
|
||||||
possible_charset = nil
|
|
||||||
|
|
||||||
if email.multipart?
|
|
||||||
# parts[0] - multipart/alternative
|
|
||||||
# parts[0].parts[0] - text/plain
|
|
||||||
# parts[0].parts[1] - text/html
|
|
||||||
if (p = email.parts.first.parts.select{|p|
|
|
||||||
p.content_type.match(/text\/plain/) }).any?
|
|
||||||
begin
|
|
||||||
possible_charset = p.first.content_type_parameters["charset"]
|
|
||||||
rescue
|
|
||||||
end
|
|
||||||
|
|
||||||
# parts[0] - text/plain
|
|
||||||
elsif (p = email.parts.select{|p|
|
|
||||||
p.content_type.match(/text\/plain/) }).any?
|
|
||||||
body = p.first.body.to_s
|
|
||||||
|
|
||||||
begin
|
|
||||||
possible_charset = p.first.content_type_parameters["charset"]
|
|
||||||
rescue
|
|
||||||
end
|
|
||||||
end
|
|
||||||
elsif email.content_type.to_s.match(/text\/plain/)
|
|
||||||
body = email.body.to_s
|
|
||||||
|
|
||||||
begin
|
|
||||||
possible_charset = email.content_type_parameters["charset"]
|
|
||||||
rescue
|
|
||||||
end
|
|
||||||
|
|
||||||
elsif !email.content_type.to_s.present?
|
|
||||||
# no content-type header, assume it's text/plain
|
|
||||||
body = email.body.to_s
|
|
||||||
end
|
|
||||||
|
|
||||||
if !body.present?
|
|
||||||
# oh well
|
|
||||||
STDERR.puts "no valid text/plain body found"
|
STDERR.puts "no valid text/plain body found"
|
||||||
exit EX_UNAVAILABLE
|
exit EX_UNAVAILABLE
|
||||||
end
|
end
|
||||||
|
|
||||||
# try to remove sig lines
|
|
||||||
body.gsub!(/^-- \n.+\z/, "")
|
|
||||||
|
|
||||||
# TODO: try to strip out attribution line, followed by an optional blank line,
|
|
||||||
# and then lines prefixed with >
|
|
||||||
|
|
||||||
body.strip!
|
|
||||||
|
|
||||||
c = Comment.new
|
c = Comment.new
|
||||||
c.user_id = sending_user.id
|
c.user_id = parser.sending_user.id
|
||||||
c.comment = body
|
c.comment = parser.body
|
||||||
c.is_from_email = true
|
c.is_from_email = true
|
||||||
|
|
||||||
if parent.is_a?(Comment)
|
if parser.parent.is_a?(Comment)
|
||||||
c.story_id = parent.story_id
|
c.story_id = parser.parent.story_id
|
||||||
c.parent_comment_id = parent.id
|
c.parent_comment_id = parser.parent.id
|
||||||
else
|
else
|
||||||
c.story_id = parent.id
|
c.story_id = parser.parent.id
|
||||||
end
|
end
|
||||||
|
|
||||||
if c.save
|
if c.save
|
||||||
|
|
15
spec/fixtures/inbound_emails/1.eml
vendored
Normal file
15
spec/fixtures/inbound_emails/1.eml
vendored
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
Date: Tue, 16 Jul 2013 10:37:11 -0500
|
||||||
|
From: joshua stein <jcs@jcs.org>
|
||||||
|
To: ##SHORTNAME##-##MAILING_LIST_TOKEN##@lobste.rs
|
||||||
|
Subject: Re: Lobsters by mail [announce]
|
||||||
|
Message-ID: <20130716103519.78e3be260a@5d7607215bd9704>
|
||||||
|
References: <story.jg3eet.1372275536@lobste.rs>
|
||||||
|
<comment.nh0wig.1373986371@lobste.rs>
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=us-ascii
|
||||||
|
Content-Disposition: inline
|
||||||
|
In-Reply-To: <comment.##COMMENT_ID##.1373986371@lobste.rs>
|
||||||
|
X-No-Archive: Yes
|
||||||
|
|
||||||
|
It hasn't decreased any measurable amount but since the traffic to
|
||||||
|
the site is increasing a bit each week, it's hard to tell.
|
36
spec/fixtures/inbound_emails/2.eml
vendored
Normal file
36
spec/fixtures/inbound_emails/2.eml
vendored
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
Return-Path: <nobody@lobste.rs>
|
||||||
|
X-Original-To: jcs@jcs.org
|
||||||
|
Delivered-To: superblock.net-jcs@filter
|
||||||
|
Received: from vmail.superblock.net (localhost.superblock.net [127.0.0.1])
|
||||||
|
by vmail.superblock.net (Postfix) with ESMTP id 1657A391FB
|
||||||
|
for <jcs@jcs.org>; Tue, 16 Jul 2013 10:40:08 -0500 (CDT)
|
||||||
|
DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=lobste.rs; h=from:reply-to
|
||||||
|
:to:list-id:list-unsubscribe:content-type:message-id:in-reply-to
|
||||||
|
:references:date:subject; s=20120304; bh=wfuy7M5Juh21GniDc9U1TGw
|
||||||
|
mqLo=; b=1UFXtiD7QtaaoXfqnVXFMperUlfxsPV8HbPVrkwiB8Yzi5L2AkRdlpD
|
||||||
|
hunv1x7u1U/VXhHI1eOkp0LkSUc6Zi7legiyPXnWvOye3cTbOmp29KUhLls9gjGD
|
||||||
|
7inNgqR52uRB6NTxdDtrMZYDjUXJtHZdlLwgvcB6BKj7WPtvWx/A=
|
||||||
|
DomainKey-Signature: a=rsa-sha1; c=nofws; d=lobste.rs; h=from:reply-to
|
||||||
|
:to:list-id:list-unsubscribe:content-type:message-id:in-reply-to
|
||||||
|
:references:date:subject; q=dns; s=20120304; b=WGF2WkJDYzLWJ8jhS
|
||||||
|
TNERxmtU0qQRqI04yIuIJNHDsr0pwGR01phyvdoZfrJP+RJFDQ7SbTCl+qvXjdcJ
|
||||||
|
XRn+8zLEd1Mg8Hy2PzZuBVLMxXcJ+WGFTxUbArupByqp9qJnCPGusJmrCEIQCC+N
|
||||||
|
KqOAQotiWz9B5x5oFaBj97ZbSM=
|
||||||
|
To: ##SHORTNAME##-##MAILING_LIST_TOKEN##@lobste.rs
|
||||||
|
List-Id: Lobsters <##SHORTNAME##-##MAILING_LIST_TOKEN##@lobste.rs>
|
||||||
|
List-Unsubscribe: <https://lobste.rs/settings>
|
||||||
|
Precedence: list
|
||||||
|
Content-Type: text/plain; charset="us-ascii"
|
||||||
|
Message-ID: <comment.jow5ro.1373989038@lobste.rs>
|
||||||
|
In-Reply-To: <comment.nh0wig.1373986371@lobste.rs>
|
||||||
|
References: <story.jg3eet.1372275536@lobste.rs>
|
||||||
|
<comment.nh0wig.1373986371@lobste.rs>
|
||||||
|
Date: Tue, 16 Jul 2013 10:37:18 -0500
|
||||||
|
Subject: Re: Lobsters by mail [announce]
|
||||||
|
X-BeenThere: ##SHORTNAME##-##MAILING_LIST_TOKEN##@lobste.rs
|
||||||
|
|
||||||
|
It hasn't decreased any measurable amount but since the traffic to
|
||||||
|
the site is increasing a bit each week, it's hard to tell.
|
||||||
|
|
||||||
|
--
|
||||||
|
Vote: https://lobste.rs/s/jg3eet/_/comments/jow5ro
|
18
spec/fixtures/inbound_emails/3.eml
vendored
Normal file
18
spec/fixtures/inbound_emails/3.eml
vendored
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
Date: Tue, 16 Jul 2013 10:37:11 -0500
|
||||||
|
From: joshua stein <jcs@jcs.org>
|
||||||
|
To: ##SHORTNAME##-##MAILING_LIST_TOKEN##@lobste.rs
|
||||||
|
Subject: Re: Lobsters by mail [announce]
|
||||||
|
Message-ID: <20130716103519.78e3be260a@5d7607215bd9704>
|
||||||
|
References: <story.jg3eet.1372275536@lobste.rs>
|
||||||
|
<comment.nh0wig.1373986371@lobste.rs>
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=us-ascii
|
||||||
|
Content-Disposition: inline
|
||||||
|
In-Reply-To: <comment.##COMMENT_ID##.1373986371@lobste.rs>
|
||||||
|
X-No-Archive: Yes
|
||||||
|
|
||||||
|
It hasn't decreased any measurable amount but since the traffic to
|
||||||
|
the site is increasing a bit each week, it's hard to tell.
|
||||||
|
|
||||||
|
--
|
||||||
|
this is my signature
|
24
spec/fixtures/inbound_emails/4.eml
vendored
Normal file
24
spec/fixtures/inbound_emails/4.eml
vendored
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
Date: Tue, 16 Jul 2013 10:37:11 -0500
|
||||||
|
From: joshua stein <jcs@jcs.org>
|
||||||
|
To: ##SHORTNAME##-##MAILING_LIST_TOKEN##@lobste.rs
|
||||||
|
Subject: Re: Lobsters by mail [announce]
|
||||||
|
Message-ID: <20130716103519.78e3be260a@5d7607215bd9704>
|
||||||
|
References: <story.jg3eet.1372275536@lobste.rs>
|
||||||
|
<comment.nh0wig.1373986371@lobste.rs>
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=us-ascii
|
||||||
|
Content-Disposition: inline
|
||||||
|
In-Reply-To: <comment.##COMMENT_ID##.1373986371@lobste.rs>
|
||||||
|
X-No-Archive: Yes
|
||||||
|
|
||||||
|
On Sun, Feb 2, 2014 at 11:51 PM, blah <blah@lobste.rs> wrote:
|
||||||
|
|
||||||
|
> This is some quoted text.
|
||||||
|
> With an attribution line
|
||||||
|
|
||||||
|
It hasn't decreased any measurable amount but since the traffic to
|
||||||
|
the site is increasing a bit each week, it's hard to tell.
|
||||||
|
|
||||||
|
--
|
||||||
|
this is my signature
|
||||||
|
|
61
spec/models/email_parser_spec.rb
Normal file
61
spec/models/email_parser_spec.rb
Normal file
|
@ -0,0 +1,61 @@
|
||||||
|
require "spec_helper"
|
||||||
|
|
||||||
|
describe EmailParser do
|
||||||
|
before(:each) do
|
||||||
|
@user = User.make!
|
||||||
|
@story = Story.make!(:user => @user)
|
||||||
|
|
||||||
|
@commentor = User.make!
|
||||||
|
@comment = Comment.make!(:story => @story, :user => @commentor)
|
||||||
|
|
||||||
|
@emailer = User.make!(:mailing_list_enabled => true)
|
||||||
|
|
||||||
|
@emails = {}
|
||||||
|
Dir.glob("#{Rails.root}/spec/fixtures/inbound_emails/*.eml").
|
||||||
|
each do |f|
|
||||||
|
@emails[File.basename(f).gsub(/\..*/, "")] = File.read(f).
|
||||||
|
gsub(/##SHORTNAME##/, Rails.application.shortname).
|
||||||
|
gsub(/##MAILING_LIST_TOKEN##/, @emailer.mailing_list_token).
|
||||||
|
gsub(/##COMMENT_ID##/, @comment.short_id)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
it "can parse a valid e-mail" do
|
||||||
|
parser = EmailParser.new(
|
||||||
|
"user@example.com",
|
||||||
|
Rails.application.shortname +
|
||||||
|
"-#{@emailer.mailing_list_token}@example.org",
|
||||||
|
@emails["1"])
|
||||||
|
|
||||||
|
parser.should_not == nil
|
||||||
|
parser.email.should_not == nil
|
||||||
|
|
||||||
|
parser.user_token.should == @emailer.mailing_list_token
|
||||||
|
parser.been_here?.should == false
|
||||||
|
parser.sending_user.id.should == @emailer.id
|
||||||
|
|
||||||
|
parser.parent.class.should == Comment
|
||||||
|
end
|
||||||
|
|
||||||
|
it "rejects mailing loops" do
|
||||||
|
parser = EmailParser.new(
|
||||||
|
"user@example.com",
|
||||||
|
Rails.application.shortname +
|
||||||
|
"-#{@emailer.mailing_list_token}@example.org",
|
||||||
|
@emails["2"])
|
||||||
|
|
||||||
|
parser.email.should_not == nil
|
||||||
|
parser.been_here?.should == true
|
||||||
|
end
|
||||||
|
|
||||||
|
it "strips signatures" do
|
||||||
|
parser = EmailParser.new(
|
||||||
|
"user@example.com",
|
||||||
|
Rails.application.shortname +
|
||||||
|
"-#{@emailer.mailing_list_token}@example.org",
|
||||||
|
@emails["3"])
|
||||||
|
|
||||||
|
parser.email.should_not == nil
|
||||||
|
parser.body.should == "It hasn't decreased any measurable amount but since the traffic to\nthe site is increasing a bit each week, it's hard to tell."
|
||||||
|
end
|
||||||
|
end
|
Loading…
Reference in a new issue