From d06b80220e17580c61750b6b29f02c0ca2a7bd57 Mon Sep 17 00:00:00 2001 From: William Morgan Date: Tue, 28 Apr 2009 09:34:27 -0400 Subject: [PATCH] fix mbox splitting regexp I dunno. This helps with the "From problem", but at the expense of being too specific than the mbox spec really demands. I don't think there's a really right solution, in general (due to the mbox format being a fundamentally broken one), but I'm hoping this will work with all modern mbox files. --- lib/sup/mbox.rb | 2 +- lib/sup/mbox/loader.rb | 13 ++++++------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/lib/sup/mbox.rb b/lib/sup/mbox.rb index 8497a37..33a8adb 100644 --- a/lib/sup/mbox.rb +++ b/lib/sup/mbox.rb @@ -10,7 +10,7 @@ module Redwood ## ## TODO: move functionality to somewhere better, like message.rb module MBox - BREAK_RE = /^From \S+/ + BREAK_RE = /^From \S+@\S+ / HEADER_RE = /\s*(.*?)\s*/ def read_header f diff --git a/lib/sup/mbox/loader.rb b/lib/sup/mbox/loader.rb index 7fe9129..086510d 100644 --- a/lib/sup/mbox/loader.rb +++ b/lib/sup/mbox/loader.rb @@ -68,13 +68,12 @@ class Loader < Source @mutex.synchronize do @f.seek offset begin - RMail::Mailbox::MBoxReader.new(@f).each_message do |input| - m = RMail::Parser.read(input) - if m.body && m.body.is_a?(String) - m.body.gsub!(/^>From /, "From ") - end - return m - end + ## don't use RMail::Mailbox::MBoxReader because it doesn't properly ignore + ## "From" at the start of a message body line. + string = "" + l = @f.gets + string << l until @f.eof? || (l = @f.gets) =~ BREAK_RE + RMail::Parser.read string rescue RMail::Parser::Error => e raise FatalSourceError, "error parsing mbox file: #{e.message}" end -- 2.45.2