module Redwood
module MBox
- BREAK_RE = /^From \S+@\S+ /
+ BREAK_RE = /^From \S+ (.+)$/
+
+ def is_break_line? l
+ l =~ BREAK_RE or return false
+ time = $1
+ begin
+ ## hack -- make Time.parse fail when trying to substitute values from Time.now
+ Time.parse time, 0
+ true
+ rescue NoMethodError
+ Redwood::log "found invalid date in potential mbox split line, not splitting: #{l.inspect}"
+ false
+ end
+ end
+ module_function :is_break_line?
end
end
attr_accessor :labels
## uri_or_fp is horrific. need to refactor.
- def initialize uri_or_fp, start_offset=nil, usual=true, archived=false, id=nil, labels=[]
+ def initialize uri_or_fp, start_offset=0, usual=true, archived=false, id=nil, labels=[]
@mutex = Mutex.new
@labels = ((labels || []) - LabelManager::RESERVED_LABELS).uniq.freeze
@mutex.synchronize do
@f.seek offset
l = @f.gets
- unless l =~ BREAK_RE
+ unless MBox::is_break_line? l
raise OutOfSyncSourceError, "mismatch in mbox file offset #{offset.inspect}: #{l.inspect}."
end
header = parse_raw_email_header @f
## "From" at the start of a message body line.
string = ""
l = @f.gets
- string << l until @f.eof? || (l = @f.gets) =~ BREAK_RE
+ string << l until @f.eof? || MBox::is_break_line?(l = @f.gets)
RMail::Parser.read string
rescue RMail::Parser::Error => e
raise FatalSourceError, "error parsing mbox file: #{e.message}"
@mutex.synchronize do
@f.seek cur_offset
string = ""
- until @f.eof? || (l = @f.gets) =~ BREAK_RE
+ until @f.eof? || MBox::is_break_line?(l = @f.gets)
string << l
end
self.cur_offset += string.length
@mutex.synchronize do
@f.seek offset
yield @f.gets
- until @f.eof? || (l = @f.gets) =~ BREAK_RE
+ until @f.eof? || MBox::is_break_line?(l = @f.gets)
yield l
end
end
## 2. at the beginning of an mbox separator (in all other
## cases).
- l = @f.gets or raise "next while at EOF"
+ l = @f.gets or return nil
if l =~ /^\s*$/ # case 1
returned_offset = @f.tell
@f.gets # now we're at a BREAK_RE, so skip past it
end
while(line = @f.gets)
- break if line =~ BREAK_RE
+ break if MBox::is_break_line? line
next_offset = @f.tell
end
end
assert_equal "Bob <bob@bob.com>", h["from"]
assert_nil h["to"]
end
+
+ def test_from_line_splitting
+ l = MBox::Loader.new StringIO.new(<<EOS)
+From sup-talk-bounces@rubyforge.org Mon Apr 27 12:56:18 2009
+From: Bob <bob@bob.com>
+To: a dear friend
+
+Hello there friend. How are you?
+
+From sea to shining sea
+
+From bob@bob.com I get only spam.
+
+From bob@bob.com
+
+From bob@bob.com
+
+(that second one has spaces at the endj
+
+This is the end of the email.
+EOS
+ offset, labels = l.next
+ assert_equal 0, offset
+ offset, labels = l.next
+ assert_nil offset
+ end
+
+ def test_more_from_line_splitting
+ l = MBox::Loader.new StringIO.new(<<EOS)
+From sup-talk-bounces@rubyforge.org Mon Apr 27 12:56:18 2009
+From: Bob <bob@bob.com>
+To: a dear friend
+
+Hello there friend. How are you?
+
+From bob@bob.com Mon Apr 27 12:56:19 2009
+From: Bob <bob@bob.com>
+To: a dear friend
+
+Hello again! Would you like to buy my products?
+EOS
+ offset, labels = l.next
+ assert_not_nil offset
+
+ offset, labels = l.next
+ assert_not_nil offset
+
+ offset, labels = l.next
+ assert_nil offset
+ end
end