X-Git-Url: https://git.notmuchmail.org/git?a=blobdiff_plain;f=lib%2Fsup%2Fmessage.rb;h=8ec8a673730312d5942e2512184d32a001d3481c;hb=881bac62392d86edf46bcb0d269a7195e41ff8c1;hp=efa2e73b4464e4910e5b31129eb1991801ee0b17;hpb=7105b6ea2722b94d9a53f69f986aaf3dac62c9a4;p=sup diff --git a/lib/sup/message.rb b/lib/sup/message.rb index efa2e73..8ec8a67 100644 --- a/lib/sup/message.rb +++ b/lib/sup/message.rb @@ -1,5 +1,6 @@ require 'tempfile' require 'time' +require 'iconv' module Redwood @@ -13,12 +14,13 @@ class MessageFormatError < StandardError; end ## specific module that would detect and link to /ruby-talk:\d+/ ## sequences in the text of an email. (how sweet would that be?) ## -## TODO: integrate with user's addressbook to render names -## appropriately. +## this class cathces all source exceptions. if the underlying source throws +## an error, it is caught and handled. + class Message SNIPPET_LEN = 80 RE_PATTERN = /^((re|re[\[\(]\d[\]\)]):\s*)+/i - + ## some utility methods class << self def normalize_subj s; s.gsub(RE_PATTERN, ""); end @@ -26,126 +28,106 @@ class Message def reify_subj s; subj_is_reply?(s) ? s : "Re: " + s; end end - class Attachment - attr_reader :content_type, :desc, :filename - def initialize content_type, desc, part - @content_type = content_type - @desc = desc - @part = part - @file = nil - desc =~ /filename="(.*?)"/ && @filename = $1 - end - - def view! - unless @file - @file = Tempfile.new "redwood.attachment" - @file.print self - @file.close - end - - ## TODO: handle unknown mime-types - system "/usr/bin/run-mailcap --action=view #{@content_type}:#{@file.path}" - end - - def to_s; @part.decode; end - end - - class Text - attr_reader :lines - def initialize lines - ## do some wrapping - @lines = lines.map { |l| l.chomp.wrap 80 }.flatten - end - end - - class Quote - attr_reader :lines - def initialize lines - @lines = lines - end - end - - class Signature - attr_reader :lines - def initialize lines - @lines = lines - end - end - QUOTE_PATTERN = /^\s{0,4}[>|\}]/ BLOCK_QUOTE_PATTERN = /^-----\s*Original Message\s*----+$/ QUOTE_START_PATTERN = /(^\s*Excerpts from)|(^\s*In message )|(^\s*In article )|(^\s*Quoting )|((wrote|writes|said|says)\s*:\s*$)/ - SIG_PATTERN = /(^-- ?$)|(^\s*----------+\s*$)|(^\s*_________+\s*$)/ + SIG_PATTERN = /(^-- ?$)|(^\s*----------+\s*$)|(^\s*_________+\s*$)|(^\s*--~--~-)|(^\s*--\+\+\*\*==)/ + MAX_SIG_DISTANCE = 15 # lines from the end - DEFAULT_SUBJECT = "(missing subject)" + DEFAULT_SUBJECT = "" DEFAULT_SENDER = "(missing sender)" attr_reader :id, :date, :from, :subj, :refs, :replytos, :to, :source, :cc, :bcc, :labels, :list_address, :recipient_email, :replyto, - :source_info, :status + :source_info, :chunks, :list_subscribe, :list_unsubscribe - bool_reader :dirty + bool_reader :dirty, :source_marked_read - ## if you specify a :header, will use values from that. otherwise, will try and - ## load the header from the source. + ## if you specify a :header, will use values from that. otherwise, + ## will try and load the header from the source. def initialize opts @source = opts[:source] or raise ArgumentError, "source can't be nil" @source_info = opts[:source_info] or raise ArgumentError, "source_info can't be nil" @snippet = opts[:snippet] || "" - @labels = opts[:labels] || [] + @have_snippet = !opts[:snippet].nil? + @labels = [] + (opts[:labels] || []) @dirty = false + @chunks = nil - read_header(opts[:header] || @source.load_header(@source_info)) + parse_header(opts[:header] || @source.load_header(@source_info)) end - def read_header header + def parse_header header header.each { |k, v| header[k.downcase] = v } + + @id = + if header["message-id"] + sanitize_message_id header["message-id"] + else + returning("sup-faked-" + Digest::MD5.hexdigest(raw_header)) do |id| + Redwood::log "faking message-id for message from #@from: #{id}" + end + end + + @from = + if header["from"] + PersonManager.person_for header["from"] + else + name = "Sup Auto-generated Fake Sender " + Redwood::log "faking from for message #@id: #{name}" + PersonManager.person_for name + end - %w(message-id date).each do |f| - raise MessageFormatError, "no #{f} field in header #{header.inspect} (source #@source offset #@source_info)" unless header.include? f - raise MessageFormatError, "nil #{f} field in header #{header.inspect} (source #@source offset #@source_info)" unless header[f] - end - - begin - date = header["date"] - @date = Time === date ? date : Time.parse(header["date"]) - rescue ArgumentError => e - raise MessageFormatError, "unparsable date #{header['date']}: #{e.message}" - end + date = header["date"] + @date = + case date + when Time + date + when String + begin + Time.parse date + rescue ArgumentError => e + raise MessageFormatError, "unparsable date #{header['date']}: #{e.message}" + end + else + Redwood::log "faking date header for #{@id}" + Time.now + end @subj = header.member?("subject") ? header["subject"].gsub(/\s+/, " ").gsub(/\s+$/, "") : DEFAULT_SUBJECT - @from = Person.for header["from"] - @to = Person.for_several header["to"] - @cc = Person.for_several header["cc"] - @bcc = Person.for_several header["bcc"] - @id = header["message-id"] - @refs = (header["references"] || "").gsub(/[<>]/, "").split(/\s+/).flatten - @replytos = (header["in-reply-to"] || "").scan(/<(.*?)>/).flatten - @replyto = Person.for header["reply-to"] + @to = PersonManager.people_for header["to"] + @cc = PersonManager.people_for header["cc"] + @bcc = PersonManager.people_for header["bcc"] + @refs = (header["references"] || "").scan(/<(.+?)>/).map { |x| sanitize_message_id x.first } + @replytos = (header["in-reply-to"] || "").scan(/<(.+?)>/).map { |x| sanitize_message_id x.first } + + @replyto = PersonManager.person_for header["reply-to"] @list_address = if header["list-post"] - @list_address = Person.for header["list-post"].gsub(/^$/, "") + @list_address = PersonManager.person_for header["list-post"].gsub(/^$/, "") else nil end - @recipient_email = header["delivered-to"] - @status = header["status"] + @recipient_email = header["envelope-to"] || header["x-original-to"] || header["delivered-to"] + @source_marked_read = header["status"] == "RO" + @list_subscribe = header["list-subscribe"] + @list_unsubscribe = header["list-unsubscribe"] end - private :read_header + private :parse_header - def broken?; @source.broken?; end - def snippet; @snippet || to_chunks && @snippet; end + def snippet; @snippet || chunks && @snippet; end def is_list_message?; !@list_address.nil?; end - def is_draft?; DraftLoader === @source; end + def is_draft?; @source.is_a? DraftLoader; end def draft_filename raise "not a draft" unless is_draft? @source.fn_for_offset @source_info end + def sanitize_message_id mid; mid.gsub(/\s/, "") end + def save index - return if broken? - index.update_message self if @dirty + index.sync_message self if @dirty @dirty = false end @@ -171,16 +153,29 @@ class Message end ## this is called when the message body needs to actually be loaded. - def to_chunks + def load_from_source! @chunks ||= - if @source.broken? - [Text.new(error_message(@source.broken_msg.split("\n")))] + if @source.has_errors? + [Chunk::Text.new(error_message(@source.error.message.split("\n")))] else begin - read_header @source.load_header(@source_info) + ## we need to re-read the header because it contains information + ## that we don't store in the index. actually i think it's just + ## the mailing list address (if any), so this is kinda overkill. + ## i could just store that in the index, but i think there might + ## be other things like that in the future, and i'd rather not + ## bloat the index. + ## actually, it's also the differentiation between to/cc/bcc, + ## so i will keep this. + parse_header @source.load_header(@source_info) message_to_chunks @source.load_message(@source_info) - rescue SourceError, SocketError => e - [Text.new(error_message(e.message))] + rescue SourceError, SocketError, MessageFormatError => e + Redwood::log "problem getting messages from #{@source}: #{e.message}" + ## we need force_to_top here otherwise this window will cover + ## up the error message one + @source.error ||= e + Redwood::report_broken_sources :force_to_top => true + [Chunk::Text.new(error_message(e.message))] end end end @@ -190,8 +185,13 @@ class Message #@snippet... *********************************************************************** -* An error occurred while loading this message. It is possible that * -* the source has changed, or (in the case of remote sources) is down. * + An error occurred while loading this message. It is possible that + the source has changed, or (in the case of remote sources) is down. + You can check the log for errors, though hopefully an error window + should have popped up at some point. + + The message location was: + #@source##@source_info *********************************************************************** The error message was: @@ -199,38 +199,48 @@ The error message was: EOS end - def raw_header + ## wrap any source methods that might throw sourceerrors + def with_source_errors_handled begin - @source.raw_header @source_info + yield rescue SourceError => e + Redwood::log "problem getting messages from #{@source}: #{e.message}" + @source.error ||= e + Redwood::report_broken_sources :force_to_top => true error_message e.message end end - def raw_full_message - begin - @source.raw_full_message @source_info - rescue SourceError => e - error_message(e.message) - end + def raw_header + with_source_errors_handled { @source.raw_header @source_info } + end + + def raw_message + with_source_errors_handled { @source.raw_message @source_info } + end + + ## much faster than raw_message + def each_raw_message_line &b + with_source_errors_handled { @source.each_raw_message_line(@source_info, &b) } end def content + load_from_source! [ from && "#{from.name} #{from.email}", to.map { |p| "#{p.name} #{p.email}" }, cc.map { |p| "#{p.name} #{p.email}" }, bcc.map { |p| "#{p.name} #{p.email}" }, - to_chunks.select { |c| c.is_a? Text }.map { |c| c.lines }, + chunks.select { |c| c.is_a? Chunk::Text }.map { |c| c.lines }, Message.normalize_subj(subj), ].flatten.compact.join " " end - def basic_body_lines - to_chunks.find_all { |c| c.is_a?(Text) || c.is_a?(Quote) }.map { |c| c.lines }.flatten + def quotable_body_lines + chunks.find_all { |c| c.quotable? }.map { |c| c.lines }.flatten end - def basic_header_lines + def quotable_header_lines ["From: #{@from.full_address}"] + (@to.empty? ? [] : ["To: " + @to.map { |p| p.full_address }.join(", ")]) + (@cc.empty? ? [] : ["Cc: " + @cc.map { |p| p.full_address }.join(", ")]) + @@ -241,24 +251,142 @@ EOS private - ## everything RubyMail-specific goes here. - def message_to_chunks m - ret = [] << - case m.header.content_type - when "text/plain", nil - raise MessageFormatError, "no message body before decode (source #@source info #@source_info)" unless - m.body - body = m.decode or raise MessageFormatError, "no message body" - text_to_chunks body.normalize_whitespace.split("\n") - when /^multipart\// - nil + ## here's where we handle decoding mime attachments. unfortunately + ## but unsurprisingly, the world of mime attachments is a bit of a + ## mess. as an empiricist, i'm basing the following behavior on + ## observed mail rather than on interpretations of rfcs, so probably + ## this will have to be tweaked. + ## + ## the general behavior i want is: ignore content-disposition, at + ## least in so far as it suggests something being inline vs being an + ## attachment. (because really, that should be the recipient's + ## decision to make.) if a mime part is text/plain, OR if the user + ## decoding hook converts it, then decode it and display it + ## inline. for these decoded attachments, if it has associated + ## filename, then make it collapsable and individually saveable; + ## otherwise, treat it as regular body text. + ## + ## everything else is just an attachment and is not displayed + ## inline. + ## + ## so, in contrast to mutt, the user is not exposed to the workings + ## of the gruesome slaughterhouse and sausage factory that is a + ## mime-encoded message, but need only see the delicious end + ## product. + + def multipart_signed_to_chunks m +# Redwood::log ">> multipart SIGNED: #{m.header['Content-Type']}: #{m.body.size}" + if m.body.size != 2 + Redwood::log "warning: multipart/signed with #{m.body.size} parts (expecting 2)" + return + end + + payload, signature = m.body + if signature.multipart? + Redwood::log "warning: multipart/signed with payload multipart #{payload.multipart?} and signature multipart #{signature.multipart?}" + return + end + + if payload.header.content_type == "application/pgp-signature" + Redwood::log "warning: multipart/signed with payload content type #{payload.header.content_type}" + return + end + + if signature.header.content_type != "application/pgp-signature" + Redwood::log "warning: multipart/signed with signature content type #{signature.header.content_type}" + return + end + + [CryptoManager.verify(payload, signature), message_to_chunks(payload)].flatten.compact + end + + def multipart_encrypted_to_chunks m + Redwood::log ">> multipart ENCRYPTED: #{m.header['Content-Type']}: #{m.body.size}" + if m.body.size != 2 + Redwood::log "warning: multipart/encrypted with #{m.body.size} parts (expecting 2)" + return + end + + control, payload = m.body + if control.multipart? + Redwood::log "warning: multipart/encrypted with control multipart #{control.multipart?} and payload multipart #{payload.multipart?}" + return + end + + if payload.header.content_type != "application/octet-stream" + Redwood::log "warning: multipart/encrypted with payload content type #{payload.header.content_type}" + return + end + + if control.header.content_type != "application/pgp-encrypted" + Redwood::log "warning: multipart/encrypted with control content type #{signature.header.content_type}" + return + end + + decryptedm, sig, notice = CryptoManager.decrypt payload + children = message_to_chunks(decryptedm) if decryptedm + [notice, sig, children].flatten.compact + end + + def message_to_chunks m, sibling_types=[] + if m.multipart? + chunks = + case m.header.content_type + when "multipart/signed" + multipart_signed_to_chunks m + when "multipart/encrypted" + multipart_encrypted_to_chunks m + end + + unless chunks + sibling_types = m.body.map { |p| p.header.content_type } + chunks = m.body.map { |p| message_to_chunks p, sibling_types }.flatten.compact + end + + chunks + elsif m.header.content_type == "message/rfc822" + payload = RMail::Parser.read(m.body) + from = payload.header.from.first + from_person = from ? PersonManager.person_for(from.format) : nil + [Chunk::EnclosedMessage.new(from_person, payload.to_s)] + else + filename = + ## first, paw through the headers looking for a filename + if m.header["Content-Disposition"] && + m.header["Content-Disposition"] =~ /filename="?(.*?[^\\])("|;|$)/ + $1 + elsif m.header["Content-Type"] && + m.header["Content-Type"] =~ /name=(.*?)(;|$)/ + $1 + + ## haven't found one, but it's a non-text message. fake + ## it. + elsif m.header["Content-Type"] && m.header["Content-Type"] !~ /^text\/plain/ + "sup-attachment-#{Time.now.to_i}-#{rand 10000}" + end + + ## if there's a filename, we'll treat it as an attachment. + if filename + [Chunk::Attachment.new(m.header.content_type, filename, m, sibling_types)] + + ## otherwise, it's body text else - disp = m.header["Content-Disposition"] || "" - Attachment.new m.header.content_type, disp.gsub(/[\s\n]+/, " "), m + body = Message.convert_from m.decode, m.charset + text_to_chunks body.normalize_whitespace.split("\n") end - - m.each_part { |p| ret << message_to_chunks(p) } if m.multipart? - ret.compact.flatten + end + end + + def self.convert_from body, charset + begin + raise MessageFormatError, "RubyMail decode returned a null body" unless body + return body unless charset + Iconv.iconv($encoding, charset, body).join + rescue Errno::EINVAL, Iconv::InvalidEncoding, Iconv::IllegalSequence, MessageFormatError => e + Redwood::log "warning: error (#{e.class.name}) decoding message body from #{charset}: #{e.message}" + File.open("sup-unable-to-decode.txt", "w") { |f| f.write body } + body + end end ## parse the lines of text into chunk objects. the heuristics here @@ -285,7 +413,7 @@ private end if newstate - chunks << Text.new(chunk_lines) unless chunk_lines.empty? + chunks << Chunk::Text.new(chunk_lines) unless chunk_lines.empty? chunk_lines = [line] state = newstate else @@ -295,7 +423,7 @@ private when :quote newstate = nil - if line =~ QUOTE_PATTERN || line =~ QUOTE_START_PATTERN || line =~ /^\s*$/ + if line =~ QUOTE_PATTERN || line =~ QUOTE_START_PATTERN #|| line =~ /^\s*$/ chunk_lines << line elsif line =~ SIG_PATTERN && (lines.length - i) < MAX_SIG_DISTANCE newstate = :sig @@ -306,24 +434,18 @@ private if newstate if chunk_lines.empty? # nothing - elsif chunk_lines.size == 1 - chunks << Text.new(chunk_lines) # forget about one-line quotes else - chunks << Quote.new(chunk_lines) + chunks << Chunk::Quote.new(chunk_lines) end chunk_lines = [line] state = newstate end - when :block_quote - chunk_lines << line - - when :sig + when :block_quote, :sig chunk_lines << line end - if state == :text && (@snippet.nil? || @snippet.length < SNIPPET_LEN) && - line !~ /[=\*#_-]{3,}/ && line !~ /^\s*$/ + if !@have_snippet && state == :text && (@snippet.nil? || @snippet.length < SNIPPET_LEN) && line !~ /[=\*#_-]{3,}/ && line !~ /^\s*$/ @snippet += " " unless @snippet.empty? @snippet += line.gsub(/^\s+/, "").gsub(/[\r\n]/, "").gsub(/\s+/, " ") @snippet = @snippet[0 ... SNIPPET_LEN].chomp @@ -333,11 +455,11 @@ private ## final object case state when :quote, :block_quote - chunks << Quote.new(chunk_lines) unless chunk_lines.empty? + chunks << Chunk::Quote.new(chunk_lines) unless chunk_lines.empty? when :text - chunks << Text.new(chunk_lines) unless chunk_lines.empty? + chunks << Chunk::Text.new(chunk_lines) unless chunk_lines.empty? when :sig - chunks << Signature.new(chunk_lines) unless chunk_lines.empty? + chunks << Chunk::Signature.new(chunk_lines) unless chunk_lines.empty? end chunks end