From: wmorgan Date: Fri, 29 Dec 2006 17:28:29 +0000 (+0000) Subject: major improvements in broken source handling X-Git-Url: https://git.notmuchmail.org/git?a=commitdiff_plain;h=66983394cb5d972eb8836ff52729d22a0d7c54e5;p=sup major improvements in broken source handling git-svn-id: svn://rubyforge.org/var/svn/sup/trunk@117 5c8cc53c-5e98-4d25-b20a-d8db53a31250 --- diff --git a/bin/sup b/bin/sup index 1c602ac..c4e59b6 100644 --- a/bin/sup +++ b/bin/sup @@ -169,7 +169,7 @@ begin mode.edit when :poll bm.raise_to_front PollManager.buffer - PollManager.poll + reporting_thread { PollManager.poll } when :nothing when :redraw bm.completely_redraw_screen diff --git a/lib/sup/imap.rb b/lib/sup/imap.rb index 8f93458..e856d99 100644 --- a/lib/sup/imap.rb +++ b/lib/sup/imap.rb @@ -7,7 +7,7 @@ module Redwood class IMAP < Source attr_reader :labels - def initialize uri, username, password, last_uid=nil, usual=true, archived=false, id=nil + def initialize uri, username, password, uid_validity=nil, last_uid=nil, usual=true, archived=false, id=nil raise ArgumentError, "username and password must be specified" unless username && password raise ArgumentError, "not an imap uri" unless uri =~ %r!imaps?://! @@ -16,12 +16,11 @@ class IMAP < Source @parsed_uri = URI(uri) @username = username @password = password + @uid_validity = uid_validity @imap = nil @labels = [:unread] @labels << :inbox unless archived? @labels << mailbox.intern unless mailbox =~ /inbox/i || mailbox.nil? - - connect end def connect @@ -41,19 +40,24 @@ class IMAP < Source ## problem. ## ## FUCK!!!!!!!!! - ::Thread.new do - begin - #raise Net::IMAP::ByeResponseError, "simulated imap failure" - @imap = Net::IMAP.new host, ssl? ? 993 : 143, ssl? - @imap.authenticate 'LOGIN', @username, @password - @imap.examine mailbox - Redwood::log "successfully connected to #{@parsed_uri}, mailbox #{mailbox}" - rescue Exception => e - self.broken_msg = e.message.chomp # fucking chomp! fuck!!! - @imap = nil - Redwood::log "error connecting to IMAP server: #{self.broken_msg}" - end - end.join + + BufferManager.say "Connecting to IMAP server #{host}..." do + ::Thread.new do + begin + raise Net::IMAP::ByeResponseError, "simulated imap failure" + @imap = Net::IMAP.new host, ssl? ? 993 : 143, ssl? + @imap.authenticate 'LOGIN', @username, @password + @imap.examine mailbox + Redwood::log "successfully connected to #{@parsed_uri}, mailbox #{mailbox}" + @uid_validity ||= @imap.responses["UIDVALIDITY"][-1] + raise SourceError, "Your shitty IMAP server has kindly invalidated all 'unique' ids for the folder '#{mailbox}'. You will have to rescan this folder manually." if @imap.responses["UIDVALIDITY"][-1] != @uid_validity + rescue Exception => e + self.broken_msg = e.message.chomp # fucking chomp! fuck!!! + @imap = nil + Redwood::log "error connecting to IMAP server: #{self.broken_msg}" + end + end.join + end !!@imap end @@ -73,12 +77,12 @@ class IMAP < Source ## load the full header text def raw_header uid - connect or return broken_msg + connect or raise SourceError, broken_msg get_imap_field(uid, 'RFC822.HEADER').gsub(/\r\n/, "\n") end def raw_full_message uid - connect or return broken_msg + connect or raise SourceError, broken_msg get_imap_field(uid, 'RFC822').gsub(/\r\n/, "\n") end @@ -90,7 +94,7 @@ class IMAP < Source private :get_imap_field def each - connect or return broken_msg + connect or raise SourceError, broken_msg uids = @imap.uid_search ['UID', "#{cur_offset}:#{end_offset}"] uids.each do |uid| @last_uid = uid @@ -107,6 +111,6 @@ class IMAP < Source end end -Redwood::register_yaml(IMAP, %w(uri username password cur_offset usual archived id)) +Redwood::register_yaml(IMAP, %w(uri username password uid_validity cur_offset usual archived id)) end diff --git a/lib/sup/index.rb b/lib/sup/index.rb index 5c1e2e4..4e98cae 100644 --- a/lib/sup/index.rb +++ b/lib/sup/index.rb @@ -125,12 +125,13 @@ class Index end def num_results_for opts={} - query = build_query opts - x = @index.search(query).total_hits - Redwood::log "num_results_for: have #{x} for query #{query}" - x + with(@index.search(build_query(opts)).total_hits) { Redwood::log "num_results_for: have #{x} for query #{query}" } end + ## yield all messages in the thread containing 'm' by repeatedly + ## querying the index. yields pairs of message ids and + ## message-building lambdas, so that building an unwanted message + ## can be skipped in the block if desired. SAME_SUBJECT_DATE_LIMIT = 7 def each_message_in_thread_for m, opts={} messages = {} @@ -198,35 +199,9 @@ class Index "references" => doc[:refs], } - m = - if source.broken? - nil - else - begin - Message.new :source => source, :source_info => doc[:source_info].to_i, - :labels => doc[:label].split(" ").map { |s| s.intern }, - :snippet => doc[:snippet], :header => fake_header - rescue MessageFormatError => e - raise IndexError.new(source, "error building message #{doc[:message_id]} at #{source}/#{doc[:source_info]}: #{e.message}") - rescue SourceError => e - nil - end - end - - unless m - m = Message.new :labels => doc[:label].split(" ").map { |s| s.intern }, - :snippet => doc[:snippet], :header => fake_header, - :body => < source, :source_info => doc[:source_info].to_i, + :labels => doc[:label].split(" ").map { |s| s.intern }, + :snippet => doc[:snippet], :header => fake_header end def fresh_thread_id; @next_thread_id += 1; end diff --git a/lib/sup/mbox/loader.rb b/lib/sup/mbox/loader.rb index a459539..367f21d 100644 --- a/lib/sup/mbox/loader.rb +++ b/lib/sup/mbox/loader.rb @@ -4,8 +4,6 @@ module Redwood module MBox class Loader < Source - attr_reader :labels - def initialize uri_or_fp, start_offset=nil, usual=true, archived=false, id=nil super @@ -27,6 +25,9 @@ class Loader < Source end end + attr_writer :f + protected :f= + def start_offset; 0; end def end_offset; File.size @f; end def total; end_offset; end @@ -46,6 +47,7 @@ class Loader < Source end def load_message offset + raise SourceError, self.broken_msg if broken? @mutex.synchronize do @f.seek offset begin @@ -59,6 +61,7 @@ class Loader < Source end def raw_header offset + raise SourceError, self.broken_msg if broken? ret = "" @mutex.synchronize do @f.seek offset @@ -70,6 +73,7 @@ class Loader < Source end def raw_full_message offset + raise SourceError, self.broken_msg if broken? ret = "" @mutex.synchronize do @f.seek offset @@ -82,6 +86,7 @@ class Loader < Source end def next + raise SourceError, self.broken_msg if broken? returned_offset = nil next_offset = cur_offset @@ -111,7 +116,7 @@ class Loader < Source end self.cur_offset = next_offset - [returned_offset, labels] + [returned_offset, @labels] end end diff --git a/lib/sup/mbox/ssh-file.rb b/lib/sup/mbox/ssh-file.rb index e124f54..90a62ee 100644 --- a/lib/sup/mbox/ssh-file.rb +++ b/lib/sup/mbox/ssh-file.rb @@ -7,35 +7,17 @@ class SSHFileError < StandardError; end ## this is a file-like interface to a file that actually lives on the ## other end of an ssh connection. it works by using wc, head and tail -## to simulate (buffered) random access. - -## it doesn't work very well, because while on a fast connection ssh -## can have a nice bandwidth, the latency is pretty terrible: about 1 -## second (!) per request. and since reading mbox files involves -## jumping around a lot all over the file, it is tragically slow to do -## anything with this. i've tried to compensate by caching massive -## amounts of data, but that doesn't really help. -## -## so, your best bet for remote file access remains IMAP. i'm going -## to include this in the codebase for the time begin, because maybe -## someone very motivated can put some energy into a better approach -## (probably one that doesn't involve the synchronous shell.) - -## there are two kinds of file access that are typical in sup: the -## first is an import, which starts at some point in the file and -## reads until the end. the other is during loading time, which does -## arbitrary reads into the file, but typically reads *backwards* in -## the file (because messages are loaded and displayed most recent -## first, and typically later message are later in the mbox file). so -## we have to be careful that whatever caching we do supports both. +## to simulate (buffered) random access. ## on a fast connection, +## this can have a good bandwidth, but the latency is pretty terrible: +## about 1 second (!) per request. luckily, we're either just reading +## straight through the mbox (an import) or we're reading a few +## messages at a time (viewing messages) # debugging -# $f = File.open("asdf.txt", "w") -def debuggg s - # $f.puts s - # $f.flush +def debug s + Redwood::log s end -module_function :debuggg +module_function :debug class Buffer def initialize @@ -43,7 +25,6 @@ class Buffer end def clear! - MBox::debuggg ">>> CLEARING <<<" @start = nil @buf = "" end @@ -53,7 +34,7 @@ class Buffer def endd; @start + @buf.length; end def add data, offset=endd - MBox::debuggg "+ adding #{data.length} bytes; size will be #{size + data.length}; limit #{SSHFile::MAX_BUF_SIZE}" + MBox::debug "+ adding #{data.length} bytes; size will be #{size + data.length}; limit #{SSHFile::MAX_BUF_SIZE}" if start.nil? @buf = data @@ -91,9 +72,9 @@ class Buffer end class SSHFile - MAX_BUF_SIZE = 1024 * 1024 * 3 - MAX_TRANSFER_SIZE = 1024 * 64 # bytes - REASONABLE_TRANSFER_SIZE = 1024 * 4 # bytes + MAX_BUF_SIZE = 1024 * 1024 # bytes + MAX_TRANSFER_SIZE = 1024 * 64 + REASONABLE_TRANSFER_SIZE = 1024 * 16 SIZE_CHECK_INTERVAL = 60 * 1 # seconds def initialize host, fn, ssh_opts={} @@ -106,23 +87,17 @@ class SSHFile def connect return if @session - # MBox::debuggg "starting session..." + MBox::debug "starting SSH session to #@host for #@fn..." @session = Net::SSH.start @host, @ssh_opts - # MBox::debuggg "starting shell..." - # @shell = @session.shell.sync - @input, @output, @error = @session.process.popen3("/bin/sh") - - # MBox::debuggg "ready for heck!" - raise Errno::ENOENT, @fn unless do_remote("if [ -e #@fn ]; then echo y; else echo n; fi").chomp == "y" + MBox::debug "starting SSH shell..." + @shell = @session.shell.sync + MBox::debug "SSH is ready" + raise Errno::ENOENT, @fn unless @shell.test("-e #@fn").status == 0 end def eof?; @offset >= size; end def eof; eof?; end # lame but IO does this and rmail depends on it - - def seek loc - # MBox::debuggg "seeking to #{loc} from #@offset" - @offset = loc - end + def seek loc; @offset = loc; end def tell; @offset; end def total; size; end @@ -140,15 +115,11 @@ class SSHFile make_buf_include @offset expand_buf_forward while @buf.index("\n", @offset).nil? && @buf.endd < size - line = @buf[@offset .. (@buf.index("\n", @offset) || -1)] - @offset += line.length - # MBox::debuggg "gets is of length #{line.length}, offset now #@offset" - line + with(@buf[@offset .. (@buf.index("\n", @offset) || -1)]) { |line| @offset += line.length } end def read n return nil if eof? - make_buf_include @offset, n @buf[@offset ... (@offset += n)] end @@ -158,45 +129,35 @@ private def do_remote cmd, expected_size=0 retries = 0 connect - MBox::debuggg "sending command: #{cmd.inspect}" + MBox::debug "sending command: #{cmd.inspect}" begin - @input.puts cmd - result = "" - begin - result += @output.read - end while result.length < expected_size - # result = @shell.send_command cmd - - #raise SSHFileError, "Unable to perform remote command #{cmd.inspect}: #{result.stderr[0 .. 100]}" unless result.status == 0 + result = @shell.send_command cmd + raise SSHFileError, "Failure during remote command #{cmd.inspect}: #{result.stderr[0 .. 100]}" unless result.status == 0 rescue Net::SSH::Exception retry if (retries += 1) < 3 raise end - result - #result.stdout + result.stdout end def get_bytes offset, size - MBox::debuggg "get_bytes(#{offset}, #{size})" - MBox::debuggg "! request for [#{offset}, #{offset + size}); buf is #@buf" + MBox::debug "! request for [#{offset}, #{offset + size}); buf is #@buf" raise "wtf: offset #{offset} size #{size}" if size == 0 || offset < 0 do_remote "tail -c +#{offset + 1} #@fn | head -c #{size}", size end def expand_buf_forward n=REASONABLE_TRANSFER_SIZE @buf.add get_bytes(@buf.endd, n) - # trim if necessary end ## try our best to transfer somewhere between ## REASONABLE_TRANSFER_SIZE and MAX_TRANSFER_SIZE bytes def make_buf_include offset, size=0 good_size = [size, REASONABLE_TRANSFER_SIZE].max - remainder = good_size - size trans_start, trans_size = if @buf.empty? - [[offset - (remainder / 2), 0].max, good_size] + [offset, good_size] elsif offset < @buf.start if @buf.start - offset <= good_size start = [@buf.start - good_size, 0].max @@ -204,9 +165,9 @@ private elsif @buf.start - offset < MAX_TRANSFER_SIZE [offset, @buf.start - offset] else - MBox::debuggg "clearing buffer because buf.start #{@buf.start} - offset #{offset} >= #{MAX_TRANSFER_SIZE}" + MBox::debug "clearing buffer because buf.start #{@buf.start} - offset #{offset} >= #{MAX_TRANSFER_SIZE}" @buf.clear! - [[offset - (remainder / 2), 0].max, good_size] + [offset, good_size] end else return if [offset + size, self.size].min <= @buf.endd # whoohoo! @@ -215,13 +176,12 @@ private elsif offset - @buf.endd < MAX_TRANSFER_SIZE [@buf.endd, offset - @buf.endd] else - MBox::debuggg "clearing buffer because offset #{offset} - buf.end #{@buf.endd} >= #{MAX_TRANSFER_SIZE}" + MBox::debug "clearing buffer because offset #{offset} - buf.end #{@buf.endd} >= #{MAX_TRANSFER_SIZE}" @buf.clear! - [[offset - (remainder / 2), 0].max, good_size] + [offset, good_size] end end - MBox::debuggg "make_buf_include(#{offset}, #{size})" @buf.clear! if @buf.size > MAX_BUF_SIZE @buf.add get_bytes(trans_start, trans_size), trans_start end diff --git a/lib/sup/mbox/ssh-loader.rb b/lib/sup/mbox/ssh-loader.rb index 6a41900..74b5823 100644 --- a/lib/sup/mbox/ssh-loader.rb +++ b/lib/sup/mbox/ssh-loader.rb @@ -7,18 +7,25 @@ class SSHLoader < Loader def initialize uri, username=nil, password=nil, start_offset=nil, usual=true, archived=false, id=nil raise ArgumentError, "not an mbox+ssh uri" unless uri =~ %r!^mbox\+ssh://! + super nil, start_offset, usual, archived, id + @parsed_uri = URI(uri) @username = username @password = password @f = nil + @uri = uri opts = {} opts[:username] = @username if @username opts[:password] = @password if @password - @f = SSHFile.new host, filename, opts - super @f, start_offset, usual, archived, id - @uri = uri + begin + @f = SSHFile.new host, filename, opts + self.f = @f + rescue SSHFileError => e + self.broken_msg = e.message + end + ## heuristic: use the filename as a label, unless the file ## has a path that probably represents an inbox. @labels << File.basename(filename).intern unless File.dirname(filename) =~ /\b(var|usr|spool)\b/ diff --git a/lib/sup/message.rb b/lib/sup/message.rb index 3106fa5..e75f48f 100644 --- a/lib/sup/message.rb +++ b/lib/sup/message.rb @@ -54,7 +54,7 @@ class Message attr_reader :lines def initialize lines ## do some wrapping - @lines = lines.map { |l| l.wrap 80 }.flatten + @lines = lines.map { |l| l.chomp.wrap 80 }.flatten end end @@ -88,26 +88,14 @@ class Message ## if index_entry is specified, will fill in values from that def initialize opts - if opts[:source] - @source = opts[:source] - @source_info = opts[:source_info] or raise ArgumentError, ":source but no :source_info" - @body = nil - else - @source = @source_info = nil - @body = opts[:body] or raise ArgumentError, "one of :body or :source must be specified" - end + @source = opts[:source] or raise ArgumentError, "source can't be nil" + @source_info = opts[:source_info] or raise ArgumentError, "source_info can't be nil" @snippet = opts[:snippet] || "" @labels = opts[:labels] || [] @dirty = false - header = - if opts[:header] - opts[:header] - else - header = @source.load_header @source_info - header.each { |k, v| header[k.downcase] = v } - header - end + header = opts[:header] || @source.load_header(@source_info) + header.each { |k, v| header[k.downcase] = v } %w(message-id date).each do |f| raise MessageFormatError, "no #{f} field in header #{header.inspect} (source #@source offset #@source_info)" unless header.include? f @@ -116,22 +104,19 @@ class Message begin date = header["date"] - @date = (Time === date ? date : Time.parse(header["date"])) + @date = Time === date ? date : Time.parse(header["date"]) rescue ArgumentError => e raise MessageFormatError, "unparsable date #{header['date']}: #{e.message}" end - if(@subj = header["subject"]) - @subj = @subj.gsub(/\s+/, " ").gsub(/\s+$/, "") - else - @subj = DEFAULT_SUBJECT - end + @subj = header.member?("subject") ? header["subject"].gsub(/\s+/, " ").gsub(/\s+$/, "") : DEFAULT_SUBJECT @from = Person.for header["from"] @to = Person.for_several header["to"] @cc = Person.for_several header["cc"] @bcc = Person.for_several header["bcc"] @id = header["message-id"] - @refs = (header["references"] || "").scan(/<(.*?)>/).flatten + @refs = (header["references"] || "").gsub(/[<>]/, "").split(/\s+/).flatten + Redwood::log "got refs #{@refs.inspect} for #@id" unless @refs.empty? @replytos = (header["in-reply-to"] || "").scan(/<(.*?)>/).flatten @replyto = Person.for header["reply-to"] @list_address = @@ -145,7 +130,7 @@ class Message @status = header["status"] end - def broken?; @source.nil?; end + def broken?; @source.broken?; end def snippet; @snippet || to_chunks && @snippet; end def is_list_message?; !@list_address.nil?; end def is_draft?; DraftLoader === @source; end @@ -183,13 +168,29 @@ class Message def to_chunks @chunks ||= - if @body - [Text.new(@body.split("\n"))] + if @source.broken? + [Text.new(error_message(@source.broken_msg.split("\n")))] else - message_to_chunks @source.load_message(@source_info) + begin + message_to_chunks @source.load_message(@source_info) + rescue SourceError => e + [Text.new(error_message(e.message))] + end end end + def error_message msg + <