From: William Morgan Date: Sun, 26 Apr 2009 16:10:56 +0000 (-0400) Subject: move MBox.parse_header -> Source.parse_raw_email_header X-Git-Url: https://git.notmuchmail.org/git?a=commitdiff_plain;h=47801067362e6bb2ba503da7c6bdea4a16153ac8;p=sup move MBox.parse_header -> Source.parse_raw_email_header --- diff --git a/lib/sup/draft.rb b/lib/sup/draft.rb index 35fac30..32266b5 100644 --- a/lib/sup/draft.rb +++ b/lib/sup/draft.rb @@ -79,9 +79,7 @@ class DraftLoader < Source def fn_for_offset o; File.join(@dir, o.to_s); end def load_header offset - File.open fn_for_offset(offset) do |f| - return MBox::read_header(f) - end + File.open(fn_for_offset(offset)) { |f| parse_raw_email_header f } end def load_message offset diff --git a/lib/sup/imap.rb b/lib/sup/imap.rb index 4eb13f4..7508c2c 100644 --- a/lib/sup/imap.rb +++ b/lib/sup/imap.rb @@ -93,7 +93,7 @@ class IMAP < Source def == o; o.is_a?(IMAP) && o.uri == self.uri && o.username == self.username; end def load_header id - MBox::read_header StringIO.new(raw_header(id)) + parse_raw_email_header StringIO.new(raw_header(id)) end def load_message id diff --git a/lib/sup/maildir.rb b/lib/sup/maildir.rb index 3d584f7..a9ae05c 100644 --- a/lib/sup/maildir.rb +++ b/lib/sup/maildir.rb @@ -56,7 +56,7 @@ class Maildir < Source def load_header id scan_mailbox - with_file_for(id) { |f| MBox::read_header f } + with_file_for(id) { |f| parse_raw_email_header f } end def load_message id diff --git a/lib/sup/mbox.rb b/lib/sup/mbox.rb index 223bb7c..5dd89b7 100644 --- a/lib/sup/mbox.rb +++ b/lib/sup/mbox.rb @@ -5,49 +5,7 @@ require "sup/rfc2047" module Redwood -## some utility functions. actually these are not mbox-specific at all -## and should be moved somewhere else. -## -## TODO: move functionality to somewhere better, like message.rb module MBox BREAK_RE = /^From \S+/ ######### TODO REMOVE ME - - ## WARNING! THIS IS A SPEED-CRITICAL SECTION. Everything you do here will have - ## a significant effect on Sup's processing speed of email from ALL sources. - ## Little things like string interpolation, regexp interpolation, += vs <<, - ## all have DRAMATIC effects. BE CAREFUL WHAT YOU DO! - def read_header f - header = {} - last = nil - - while(line = f.gets) - case line - ## these three can occur multiple times, and we want the first one - when /^(Delivered-To|X-Original-To|Envelope-To):\s*(.*?)\s*$/i; header[last = $1.downcase] ||= $2 - ## mark this guy specially. not sure why i care. - when /^([^:\s]+):\s*(.*?)\s*$/i; header[last = $1.downcase] = $2 - when /^\r*$/; break - else - if last - header[last] << " " unless header[last].empty? - header[last] << line.strip - end - end - end - - %w(subject from to cc bcc).each do |k| - v = header[k] or next - next unless Rfc2047.is_encoded? v - header[k] = begin - Rfc2047.decode_to $encoding, v - rescue Errno::EINVAL, Iconv::InvalidEncoding, Iconv::IllegalSequence => e - Redwood::log "warning: error decoding RFC 2047 header (#{e.class.name}): #{e.message}" - v - end - end - header - end - - module_function :read_header end end diff --git a/lib/sup/mbox/loader.rb b/lib/sup/mbox/loader.rb index fbf31ae..c623239 100644 --- a/lib/sup/mbox/loader.rb +++ b/lib/sup/mbox/loader.rb @@ -59,7 +59,7 @@ class Loader < Source unless l =~ BREAK_RE raise OutOfSyncSourceError, "mismatch in mbox file offset #{offset.inspect}: #{l.inspect}." end - header = MBox::read_header @f + header = parse_raw_email_header @f end header end diff --git a/lib/sup/modes/edit-message-mode.rb b/lib/sup/modes/edit-message-mode.rb index 31aa897..51f0824 100644 --- a/lib/sup/modes/edit-message-mode.rb +++ b/lib/sup/modes/edit-message-mode.rb @@ -212,7 +212,7 @@ protected def parse_file fn File.open(fn) do |f| - header = MBox::read_header f + header = Source.parse_raw_email_header f body = f.readlines.map { |l| l.chomp } header.delete_if { |k, v| NON_EDITABLE_HEADERS.member? k } diff --git a/lib/sup/source.rb b/lib/sup/source.rb index 6510aae..91cd71f 100644 --- a/lib/sup/source.rb +++ b/lib/sup/source.rb @@ -99,7 +99,49 @@ class Source end end + ## read a raw email header from a filehandle (or anything that responds to + ## #gets), and turn it into a hash of key-value pairs. + ## + ## WARNING! THIS IS A SPEED-CRITICAL SECTION. Everything you do here will have + ## a significant effect on Sup's processing speed of email from ALL sources. + ## Little things like string interpolation, regexp interpolation, += vs <<, + ## all have DRAMATIC effects. BE CAREFUL WHAT YOU DO! + def self.parse_raw_email_header f + header = {} + last = nil + + while(line = f.gets) + case line + ## these three can occur multiple times, and we want the first one + when /^(Delivered-To|X-Original-To|Envelope-To):\s*(.*?)\s*$/i; header[last = $1.downcase] ||= $2 + ## mark this guy specially. not sure why i care. + when /^([^:\s]+):\s*(.*?)\s*$/i; header[last = $1.downcase] = $2 + when /^\r*$/; break + else + if last + header[last] << " " unless header[last].empty? + header[last] << line.strip + end + end + end + + %w(subject from to cc bcc).each do |k| + v = header[k] or next + next unless Rfc2047.is_encoded? v + header[k] = begin + Rfc2047.decode_to $encoding, v + rescue Errno::EINVAL, Iconv::InvalidEncoding, Iconv::IllegalSequence => e + #Redwood::log "warning: error decoding RFC 2047 header (#{e.class.name}): #{e.message}" + v + end + end + header + end + protected + + ## convenience function + def parse_raw_email_header f; self.class.parse_raw_email_header f end def Source.expand_filesystem_uri uri uri.gsub "~", File.expand_path("~") diff --git a/test/dummy_source.rb b/test/dummy_source.rb index f3afa31..b84e64e 100644 --- a/test/dummy_source.rb +++ b/test/dummy_source.rb @@ -26,7 +26,7 @@ class DummySource < Source end def load_header offset - MBox::read_header StringIO.new(raw_header(offset)) + Source.parse_raw_email_header StringIO.new(raw_header(offset)) end def load_message offset diff --git a/test/test_header_parsing.rb b/test/test_header_parsing.rb new file mode 100644 index 0000000..7368d81 --- /dev/null +++ b/test/test_header_parsing.rb @@ -0,0 +1,107 @@ +#!/usr/bin/ruby + +require 'test/unit' +require 'sup' +require 'stringio' + +include Redwood + +class TestMBoxParsing < Test::Unit::TestCase + def setup + end + + def teardown + end + + def test_normal_headers + h = Source.parse_raw_email_header StringIO.new(< +To: Sally +EOS + + assert_equal "Bob ", h["from"] + assert_equal "Sally ", h["to"] + assert_nil h["message-id"] + end + + def test_multiline + h = Source.parse_raw_email_header StringIO.new(< +Subject: one two three + four five six +To: Sally +References: + +Seven: Eight +EOS + + assert_equal "one two three four five six", h["subject"] + assert_equal "Sally ", h["to"] + assert_equal " ", h["references"] + end + + def test_ignore_spacing + variants = [ + "Subject:one two three end\n", + "Subject: one two three end\n", + "Subject: one two three end \n", + ] + variants.each do |s| + h = Source.parse_raw_email_header StringIO.new(s) + assert_equal "one two three end", h["subject"] + end + end + + def test_message_id_ignore_spacing + variants = [ + "Message-Id: \n", + "Message-Id: \n", + ] + variants.each do |s| + h = Source.parse_raw_email_header StringIO.new(s) + assert_equal "", h["message-id"] + end + end + + def test_blank_lines + h = Source.parse_raw_email_header StringIO.new("") + assert_equal nil, h["message-id"] + end + + def test_empty_headers + variants = [ + "Message-Id: \n", + "Message-Id:\n", + ] + variants.each do |s| + h = Source.parse_raw_email_header StringIO.new(s) + assert_equal "", h["message-id"] + end + end + + def test_detect_end_of_headers + h = Source.parse_raw_email_header StringIO.new(< + +To: a dear friend +EOS + assert_equal "Bob ", h["from"] + assert_nil h["to"] + + h = Source.parse_raw_email_header StringIO.new(< +\r +To: a dear friend +EOS + assert_equal "Bob ", h["from"] + assert_nil h["to"] + + h = Source.parse_raw_email_header StringIO.new(< +\r\n\r +To: a dear friend +EOS + assert_equal "Bob ", h["from"] + assert_nil h["to"] + end +end diff --git a/test/test_mbox_parsing.rb b/test/test_mbox_parsing.rb deleted file mode 100644 index 3486f1b..0000000 --- a/test/test_mbox_parsing.rb +++ /dev/null @@ -1,107 +0,0 @@ -#!/usr/bin/ruby - -require 'test/unit' -require 'sup' -require 'stringio' - -include Redwood - -class TestMBoxParsing < Test::Unit::TestCase - def setup - end - - def teardown - end - - def test_normal_headers - h = MBox.read_header StringIO.new(< -To: Sally -EOS - - assert_equal "Bob ", h["from"] - assert_equal "Sally ", h["to"] - assert_nil h["message-id"] - end - - def test_multiline - h = MBox.read_header StringIO.new(< -Subject: one two three - four five six -To: Sally -References: - -Seven: Eight -EOS - - assert_equal "one two three four five six", h["subject"] - assert_equal "Sally ", h["to"] - assert_equal " ", h["references"] - end - - def test_ignore_spacing - variants = [ - "Subject:one two three end\n", - "Subject: one two three end\n", - "Subject: one two three end \n", - ] - variants.each do |s| - h = MBox.read_header StringIO.new(s) - assert_equal "one two three end", h["subject"] - end - end - - def test_message_id_ignore_spacing - variants = [ - "Message-Id: \n", - "Message-Id: \n", - ] - variants.each do |s| - h = MBox.read_header StringIO.new(s) - assert_equal "", h["message-id"] - end - end - - def test_blank_lines - h = MBox.read_header StringIO.new("") - assert_equal nil, h["message-id"] - end - - def test_empty_headers - variants = [ - "Message-Id: \n", - "Message-Id:\n", - ] - variants.each do |s| - h = MBox.read_header StringIO.new(s) - assert_equal "", h["message-id"] - end - end - - def test_detect_end_of_headers - h = MBox.read_header StringIO.new(< - -To: a dear friend -EOS - assert_equal "Bob ", h["from"] - assert_nil h["to"] - - h = MBox.read_header StringIO.new(< -\r -To: a dear friend -EOS - assert_equal "Bob ", h["from"] - assert_nil h["to"] - - h = MBox.read_header StringIO.new(< -\r\n\r -To: a dear friend -EOS - assert_equal "Bob ", h["from"] - assert_nil h["to"] - end -end