From 0964ab9efbca6ba040abe8f156f6a4235d581cc5 Mon Sep 17 00:00:00 2001 From: wmorgan Date: Sun, 31 Dec 2006 21:28:21 +0000 Subject: [PATCH] massive fixes for imap and mbox+ssh git-svn-id: svn://rubyforge.org/var/svn/sup/trunk@133 5c8cc53c-5e98-4d25-b20a-d8db53a31250 --- bin/sup-import | 6 +- lib/sup/imap.rb | 135 +++++++++++++++++++++++-------------- lib/sup/index.rb | 3 +- lib/sup/mbox/loader.rb | 3 - lib/sup/mbox/ssh-file.rb | 77 ++++++++++++++------- lib/sup/mbox/ssh-loader.rb | 28 +++++--- lib/sup/message.rb | 2 +- lib/sup/source.rb | 56 ++++++++++----- lib/sup/util.rb | 4 ++ 9 files changed, 204 insertions(+), 110 deletions(-) diff --git a/bin/sup-import b/bin/sup-import index bbf7e89..a31387d 100644 --- a/bin/sup-import +++ b/bin/sup-import @@ -4,6 +4,8 @@ require 'rubygems' require 'highline' require "sup" +Thread.abort_on_exception = true # make debugging possible + class Float def to_s; sprintf '%.2f', self; end end @@ -145,8 +147,9 @@ begin else found[m.id] = true end + m.remove_label :unread if m.status == "RO" unless force_read - puts "# message at #{offset}, labels: #{labels * ', '}" unless rebuild || force_rebuild + puts "# message at #{offset}, labels: #{labels * ', '}" if (rebuild || force_rebuild) && (docid, entry = index.load_entry_for_id(m.id)) && entry if force_rebuild || entry[:source_info].to_i != offset @@ -170,6 +173,7 @@ begin puts "loaded #{num} messages" unless num == 0 end ensure + $stderr.puts "saving index and sources..." index.save Redwood::finish end diff --git a/lib/sup/imap.rb b/lib/sup/imap.rb index e388577..289820a 100644 --- a/lib/sup/imap.rb +++ b/lib/sup/imap.rb @@ -2,22 +2,48 @@ require 'uri' require 'net/imap' require 'stringio' +## fucking imap fucking sucks. what the FUCK kind of committee of +## dunces designed this shit. + +## you see, imap touts 'unique ids' for messages, which are to be used +## for cross-session identification. great, just what sup needs! only, +## it turns out the uids can be invalidated every time some arbitrary +## 'uidvalidity' value changes on the server, and 'uidvalidity' has no +## restrictions. it can change any time you log in. it can change +## EVERY time you log in. of course the imap spec "strongly +## recommends" that it never change, but there's nothing to stop +## people from just setting it to the current time, and in fact that's +## exactly what the one imap server i have at my disposal does. thus +## the so-called uids are absolutely useless and imap provides no +## cross-session way of uniquely identifying a message. but thanks for +## the "strong recommendation", guys! + +## right now i'm using the 'internal date' and the size of each +## message to uniquely identify it, and i have to scan over the entire +## mailbox each time i open it to map those things to message ids, and +## we'll just hope that there are no collisions. ho ho! that's a +## perfectly reasonable solution! + +## fuck you imap committee. you managed to design something as shitty +## as mbox but goddamn THIRTY YEARS LATER. + module Redwood class IMAP < Source attr_reader_cloned :labels - def initialize uri, username, password, uid_validity=nil, last_uid=nil, usual=true, archived=false, id=nil + def initialize uri, username, password, last_idate=nil, usual=true, archived=false, id=nil raise ArgumentError, "username and password must be specified" unless username && password raise ArgumentError, "not an imap uri" unless uri =~ %r!imaps?://! - super uri, last_uid, usual, archived, id + super uri, last_idate, usual, archived, id @parsed_uri = URI(uri) @username = username @password = password - @uid_validity = uid_validity @imap = nil + @imap_ids = {} + @ids = [] @labels = [:unread] @labels << :inbox unless archived? @labels << mailbox.intern unless mailbox =~ /inbox/i || mailbox.nil? @@ -26,7 +52,6 @@ class IMAP < Source def connect return false if broken? return true if @imap - Redwood::log "connecting to #{@parsed_uri.host} port #{ssl? ? 993 : 143}, ssl=#{ssl?} ..." ## ok, this is FUCKING ANNOYING. ## @@ -41,23 +66,38 @@ class IMAP < Source ## ## FUCK!!!!!!!!! - BufferManager.say "Connecting to IMAP server #{host}..." do - ::Thread.new do - begin - #raise Net::IMAP::ByeResponseError, "simulated imap failure" - @imap = Net::IMAP.new host, ssl? ? 993 : 143, ssl? - @imap.authenticate 'LOGIN', @username, @password - @imap.examine mailbox - Redwood::log "successfully connected to #{@parsed_uri}, mailbox #{mailbox}" - @uid_validity ||= @imap.responses["UIDVALIDITY"][-1] - raise SourceError, "Your shitty IMAP server has taken advantage of the shitty IMAP spec and invalidated all supposedly 'unique' ids for the folder '#{mailbox}'. You will have to rescan this folder manually by running sup-import --rebuild #{self}" if @imap.responses["UIDVALIDITY"][-1] != @uid_validity - rescue Net::IMAP::Error, SourceError => e - self.broken_msg = e.message.chomp # fucking chomp! fuck!!! - @imap = nil - Redwood::log "error connecting to IMAP server: #{self.broken_msg}" + Redwood::log "connecting to #{@parsed_uri.host} port #{ssl? ? 993 : 143}, ssl=#{ssl?} ..." + sid = BufferManager.say "Connecting to IMAP server #{host}..." if BufferManager.instantiated? + + ::Thread.new do + begin + #raise Net::IMAP::ByeResponseError, "simulated imap failure" + @imap = Net::IMAP.new host, ssl? ? 993 : 143, ssl? + BufferManager.say "Logging in...", sid if BufferManager.instantiated? + @imap.authenticate 'LOGIN', @username, @password + BufferManager.say "Sizing mailbox...", sid if BufferManager.instantiated? + @imap.examine mailbox + last_id = @imap.responses["EXISTS"][-1] + + BufferManager.say "Reading headers (because IMAP sucks)...", sid if BufferManager.instantiated? + values = @imap.fetch(1 .. last_id, ['RFC822.SIZE', 'INTERNALDATE']) + + Redwood::log "successfully connected to #{@parsed_uri}" + + values.each do |v| + msize, mdate = v.attr['RFC822.SIZE'], Time.parse(v.attr["INTERNALDATE"]) + id = sprintf("%d.%08d", mdate.to_i, msize) + @ids << id + @imap_ids[id] = v.seqno end - end.join - end + rescue SocketError, Net::IMAP::Error, SourceError => e + self.broken_msg = e.message.chomp # fucking chomp! fuck!!! + @imap = nil + Redwood::log "error connecting to IMAP server: #{self.broken_msg}" + ensure + BufferManager.clear sid if BufferManager.instantiated? + end + end.join !!@imap end @@ -67,65 +107,60 @@ class IMAP < Source def mailbox; @parsed_uri.path[1..-1] end ##XXXX TODO handle nil def ssl?; @parsed_uri.scheme == 'imaps' end - def load_header uid=nil - MBox::read_header StringIO.new(raw_header(uid)) + def load_header id + MBox::read_header StringIO.new(raw_header(id)) end - def load_message uid - RMail::Parser.read raw_full_message(uid) + def load_message id + RMail::Parser.read raw_full_message(id) end ## load the full header text - def raw_header uid + def raw_header id connect or raise SourceError, broken_msg - get_imap_field(uid, 'RFC822.HEADER').gsub(/\r\n/, "\n") + get_imap_field(id, 'RFC822.HEADER').gsub(/\r\n/, "\n") end - def raw_full_message uid + def raw_full_message id connect or raise SourceError, broken_msg - get_imap_field(uid, 'RFC822').gsub(/\r\n/, "\n") + get_imap_field(id, 'RFC822').gsub(/\r\n/, "\n") end - def get_imap_field uid, field + def get_imap_field id, field + imap_id = @imap_ids[id] or raise SourceError, "Unknown message id #{id}. It is likely that messages have been deleted from this IMAP mailbox. Please run sup-import --rebuild #{to_s} in order to correct this problem." + f = begin - @imap.uid_fetch uid, field + @imap.fetch imap_id, field rescue Net::IMAP::Error => e raise SourceError, e.message end - raise SourceError, "null IMAP field '#{field}' for message with uid #{uid}" if f.nil? + raise SourceError, "null IMAP field '#{field}' for message with id #{id} imap id #{imap_id}" if f.nil? f[0].attr[field] end private :get_imap_field def each connect or raise SourceError, broken_msg - uids = - begin - @imap.uid_search ['UID', "#{cur_offset}:#{end_offset}"] - rescue Net::IMAP::Error => e - raise SourceError, e.message - end - uids.each do |uid| - @last_uid = uid - @dirty = true - self.cur_offset = uid - yield uid, labels + start = @ids.index(cur_offset || start_offset) + start.upto(@ids.length - 1) do |i| + id = @ids[i] + self.cur_offset = id + yield id, labels end end - def start_offset; 1; end + def start_offset + connect or raise SourceError, broken_msg + @ids.first + end def end_offset - connect or return start_offset - begin - @imap.uid_search(['ALL']).last - rescue Net::IMAP::Error => e - raise SourceError, e.message - end + connect or raise SourceError, broken_msg + @ids.last end end -Redwood::register_yaml(IMAP, %w(uri username password uid_validity cur_offset usual archived id)) +Redwood::register_yaml(IMAP, %w(uri username password cur_offset usual archived id)) end diff --git a/lib/sup/index.rb b/lib/sup/index.rb index 3de011e..9c0f056 100644 --- a/lib/sup/index.rb +++ b/lib/sup/index.rb @@ -218,7 +218,7 @@ class Index if m.source.is_a? Integer m.source else - m.source.id or raise "unregistered source #{m.source}" + m.source.id or raise "unregistered source #{m.source} (id #{m.source.id.inspect})" end to = (m.to + m.cc + m.bcc).map { |x| x.email }.join(" ") @@ -288,7 +288,6 @@ protected def parse_user_query_string str; @qparser.parse str; end def build_query opts - query = Ferret::Search::BooleanQuery.new query.add_query opts[:qobj], :must if opts[:qobj] labels = ([opts[:label]] + (opts[:labels] || [])).compact diff --git a/lib/sup/mbox/loader.rb b/lib/sup/mbox/loader.rb index d46ce8f..22518ca 100644 --- a/lib/sup/mbox/loader.rb +++ b/lib/sup/mbox/loader.rb @@ -27,9 +27,6 @@ class Loader < Source end end - attr_writer :f - protected :f= - def start_offset; 0; end def end_offset; File.size @f; end def total; end_offset; end diff --git a/lib/sup/mbox/ssh-file.rb b/lib/sup/mbox/ssh-file.rb index 90a62ee..e8290a6 100644 --- a/lib/sup/mbox/ssh-file.rb +++ b/lib/sup/mbox/ssh-file.rb @@ -7,18 +7,25 @@ class SSHFileError < StandardError; end ## this is a file-like interface to a file that actually lives on the ## other end of an ssh connection. it works by using wc, head and tail -## to simulate (buffered) random access. ## on a fast connection, -## this can have a good bandwidth, but the latency is pretty terrible: +## to simulate (buffered) random access. on a fast connection, this +## can have a good bandwidth, but the latency is pretty terrible: ## about 1 second (!) per request. luckily, we're either just reading ## straight through the mbox (an import) or we're reading a few -## messages at a time (viewing messages) +## messages at a time (viewing messages) so the latency is not a problem. -# debugging +## all of the methods here catch SSHFileErrors, SocketErrors, and +## Net::SSH::Exceptions and reraise them as SourceErrors. due to this +## and to the logging, this class is somewhat tied to Sup, but it +## wouldn't be too difficult to remove those bits and make it more +## general-purpose. + +## debugging TODO: remove me def debug s Redwood::log s end module_function :debug +## a simple buffer of contiguous data class Buffer def initialize clear! @@ -34,7 +41,7 @@ class Buffer def endd; @start + @buf.length; end def add data, offset=endd - MBox::debug "+ adding #{data.length} bytes; size will be #{size + data.length}; limit #{SSHFile::MAX_BUF_SIZE}" + #MBox::debug "+ adding #{data.length} bytes; size will be #{size + data.length}; limit #{SSHFile::MAX_BUF_SIZE}" if start.nil? @buf = data @@ -71,10 +78,11 @@ class Buffer def to_s; empty? ? "" : "[#{start}, #{endd})"; end # for debugging end +## the file-like interface to a remote file class SSHFile MAX_BUF_SIZE = 1024 * 1024 # bytes MAX_TRANSFER_SIZE = 1024 * 64 - REASONABLE_TRANSFER_SIZE = 1024 * 16 + REASONABLE_TRANSFER_SIZE = 1024 * 32 SIZE_CHECK_INTERVAL = 60 * 1 # seconds def initialize host, fn, ssh_opts={} @@ -83,21 +91,31 @@ class SSHFile @fn = fn @ssh_opts = ssh_opts @file_size = nil + @offset = 0 end def connect return if @session - MBox::debug "starting SSH session to #@host for #@fn..." - @session = Net::SSH.start @host, @ssh_opts - MBox::debug "starting SSH shell..." - @shell = @session.shell.sync - MBox::debug "SSH is ready" - raise Errno::ENOENT, @fn unless @shell.test("-e #@fn").status == 0 + + Redwood::log "starting SSH session to #@host for #@fn..." + sid = BufferManager.say "Connecting to SSH host #{@host}..." if BufferManager.instantiated? + + begin + @session = Net::SSH.start @host, @ssh_opts + MBox::debug "starting SSH shell..." + BufferManager.say "Starting SSH shell...", sid if BufferManager.instantiated? + @shell = @session.shell.sync + MBox::debug "checking for file existence..." + raise Errno::ENOENT, @fn unless @shell.test("-e #@fn").status == 0 + MBox::debug "SSH is ready" + ensure + BufferManager.clear sid if BufferManager.instantiated? + end end - def eof?; @offset >= size; end + def eof?; raise "offset #@offset size #{size}" unless @offset && size; @offset >= size; end def eof; eof?; end # lame but IO does this and rmail depends on it - def seek loc; @offset = loc; end + def seek loc; raise "nil" unless loc; @offset = loc; end def tell; @offset; end def total; size; end @@ -127,21 +145,28 @@ class SSHFile private def do_remote cmd, expected_size=0 - retries = 0 - connect - MBox::debug "sending command: #{cmd.inspect}" begin - result = @shell.send_command cmd - raise SSHFileError, "Failure during remote command #{cmd.inspect}: #{result.stderr[0 .. 100]}" unless result.status == 0 - rescue Net::SSH::Exception - retry if (retries += 1) < 3 - raise + retries = 0 + connect + MBox::debug "sending command: #{cmd.inspect}" + begin + result = @shell.send_command cmd + raise SSHFileError, "Failure during remote command #{cmd.inspect}: #{result.stderr[0 .. 100]}" unless result.status == 0 + + rescue Net::SSH::Exception # these happen occasionally for no apparent reason. gotta love that nondeterminism! + retry if (retries += 1) < 3 + raise + end + result.stdout + rescue Net::SSH::Exception, SocketError, Errno::ENOENT => e + @session = nil + Redwood::log "error connecting to SSH server: #{e.message}" + raise SourceError, "error connecting to SSH server: #{e.message}" end - result.stdout end def get_bytes offset, size - MBox::debug "! request for [#{offset}, #{offset + size}); buf is #@buf" + #MBox::debug "! request for [#{offset}, #{offset + size}); buf is #@buf" raise "wtf: offset #{offset} size #{size}" if size == 0 || offset < 0 do_remote "tail -c +#{offset + 1} #@fn | head -c #{size}", size end @@ -165,7 +190,7 @@ private elsif @buf.start - offset < MAX_TRANSFER_SIZE [offset, @buf.start - offset] else - MBox::debug "clearing buffer because buf.start #{@buf.start} - offset #{offset} >= #{MAX_TRANSFER_SIZE}" + MBox::debug "clearing SSH buffer because buf.start #{@buf.start} - offset #{offset} >= #{MAX_TRANSFER_SIZE}" @buf.clear! [offset, good_size] end @@ -176,7 +201,7 @@ private elsif offset - @buf.endd < MAX_TRANSFER_SIZE [@buf.endd, offset - @buf.endd] else - MBox::debug "clearing buffer because offset #{offset} - buf.end #{@buf.endd} >= #{MAX_TRANSFER_SIZE}" + MBox::debug "clearing SSH buffer because offset #{offset} - buf.end #{@buf.endd} >= #{MAX_TRANSFER_SIZE}" @buf.clear! [offset, good_size] end diff --git a/lib/sup/mbox/ssh-loader.rb b/lib/sup/mbox/ssh-loader.rb index 74b5823..5b697a8 100644 --- a/lib/sup/mbox/ssh-loader.rb +++ b/lib/sup/mbox/ssh-loader.rb @@ -3,39 +3,45 @@ require 'net/ssh' module Redwood module MBox -class SSHLoader < Loader +class SSHLoader < Source + attr_reader_cloned :labels + def initialize uri, username=nil, password=nil, start_offset=nil, usual=true, archived=false, id=nil - raise ArgumentError, "not an mbox+ssh uri" unless uri =~ %r!^mbox\+ssh://! + raise ArgumentError, "not an mbox+ssh uri: #{uri.inspect}" unless uri =~ %r!^mbox\+ssh://! - super nil, start_offset, usual, archived, id + super uri, start_offset, usual, archived, id @parsed_uri = URI(uri) @username = username @password = password - @f = nil @uri = uri opts = {} opts[:username] = @username if @username opts[:password] = @password if @password - begin - @f = SSHFile.new host, filename, opts - self.f = @f - rescue SSHFileError => e - self.broken_msg = e.message - end - + @f = SSHFile.new host, filename, opts + @loader = Loader.new @f, start_offset, usual, archived, id + ## heuristic: use the filename as a label, unless the file ## has a path that probably represents an inbox. + @labels = [:unread] + @labels << :inbox unless archived? @labels << File.basename(filename).intern unless File.dirname(filename) =~ /\b(var|usr|spool)\b/ end def host; @parsed_uri.host; end def filename; @parsed_uri.path[1..-1] end ##XXXX TODO handle nil + def next; with(@loader.next) { @cur_offset = @loader.cur_offset }; end # only necessary because YAML is a PITA def end_offset; @f.size; end + def cur_offset= o; @cur_offset = @loader.cur_offset = o; @dirty = true; end + def id; @loader.id; end + def id= o; @id = @loader.id = o; end + def cur_offset; @loader.cur_offset; end def to_s; @parsed_uri.to_s; end + + defer_all_other_method_calls_to :loader end Redwood::register_yaml(SSHLoader, %w(uri username password cur_offset usual archived id)) diff --git a/lib/sup/message.rb b/lib/sup/message.rb index 6c3b96b..666da1e 100644 --- a/lib/sup/message.rb +++ b/lib/sup/message.rb @@ -179,7 +179,7 @@ class Message begin read_header @source.load_header(@source_info) message_to_chunks @source.load_message(@source_info) - rescue SourceError => e + rescue SourceError, SocketError => e [Text.new(error_message(e.message))] end end diff --git a/lib/sup/source.rb b/lib/sup/source.rb index d041768..87925db 100644 --- a/lib/sup/source.rb +++ b/lib/sup/source.rb @@ -8,11 +8,8 @@ class Source ## ## broken? means no message can be loaded, e.g. IMAP server is ## down, mbox file is corrupt and needs to be rescanned. - bool_reader :usual, :archived, :dirty - attr_reader :cur_offset, :broken_msg - attr_accessor :id - ## You should implement: + ## When writing a new source, you should implement: ## ## start_offset ## end_offset @@ -20,11 +17,19 @@ class Source ## load_message(offset) ## raw_header(offset) ## raw_full_message(offset) - ## next + ## next (or each, if you prefer) + + ## you can throw SourceErrors from any of those, but we don't catch + ## anything else, so make sure you catch all non-fatal errors and + ## reraise them as source errors. + + bool_reader :usual, :archived, :dirty + attr_reader :cur_offset, :broken_msg + attr_accessor :id def initialize uri, initial_offset=nil, usual=true, archived=false, id=nil @uri = uri - @cur_offset = initial_offset || start_offset + @cur_offset = initial_offset @usual = usual @archived = archived @id = id @@ -35,27 +40,46 @@ class Source def broken?; !@broken_msg.nil?; end def to_s; @uri; end def seek_to! o; self.cur_offset = o; end - def reset!; seek_to! start_offset; end + def reset! + return if broken? + begin + seek_to! start_offset + rescue SourceError + end + end def == o; o.to_s == to_s; end - def done?; cur_offset >= end_offset; end + def done?; + return true if broken? + begin + (cur_offset ||= start_offset) >= end_offset + rescue SourceError => e + true + end + end def is_source_for? s; to_s == s; end def each - until done? - n, labels = self.next - raise "no message" unless n - yield n, labels + begin + self.cur_offset ||= start_offset + until done? || broken? # just like life! + n, labels = self.next + raise "no message" unless n + yield n, labels + end + rescue SourceError + # just die end end -protected - def cur_offset= o @cur_offset = o @dirty = true end - - attr_writer :broken_msg + + def broken_msg= m + @broken_msg = m + Redwood::log "#{to_s}: #{m}" + end end Redwood::register_yaml(Source, %w(uri cur_offset usual archived id)) diff --git a/lib/sup/util.rb b/lib/sup/util.rb index 35c087a..0d94e14 100644 --- a/lib/sup/util.rb +++ b/lib/sup/util.rb @@ -11,6 +11,10 @@ class Module def attr_reader_cloned *args args.each { |sym| class_eval %{ def #{sym}; @#{sym}.clone; end } } end + + def defer_all_other_method_calls_to obj + class_eval %{ def method_missing meth, *a, &b; @#{obj}.send meth, *a, &b; end } + end end class Object -- 2.45.2