#!/usr/bin/env ruby require 'uri' require 'rubygems' require 'highline/import' require "sup" Thread.abort_on_exception = true # make debugging possible class Float def to_s; sprintf '%.2f', self; end end class Numeric def to_time_s i = to_i sprintf "%d:%02d:%02d", i / 3600, (i / 60) % 60, i % 60 end end def time startt = Time.now yield Time.now - startt end def educate_user $stderr.puts <* where * is zero or more source descriptions (e.g., mbox filenames on disk, or imap/imaps URIs). If the sources listed are not already in the Sup source list, they will be added to it, as parameterized by the following options: --archive: messages from these sources will not appear in the inbox --unusual: these sources will not be polled when the flag --the-usual is called Regardless of whether the sources are new or not, they will be polled, and any new messages will be added to the index, as parameterized by the following options: --force-archive: regardless of the source "archive" flag, any new messages found will not appear in the inbox. --force-read: any messages found will not be marked as new. The following options can also be specified: --verbose: print message ids as they're processed --the-usual: import new messages from all usual sources --rebuild: rebuild the index for the specified sources rather than just adding new messages. Useful if the sources have changed in any way *other* than new messages being added. Only updates messages if the offsets have changed. --force-rebuild: force a rebuild of all messages in the inbox, not just ones that have changed. You probably won't need this unless William changes the index format. --overwrite-labels: if rebuilding, update message if the labels have changed, not just the offset. --optimize: optimize the index after adding any new messages. --help: don't do anything, just show this message. EOS exit end #' stupid ruby-mode ## for sources that require login information, prompt the user for ## that. also provide a list of previously-defined login info to ## choose from, if any. def get_login_info uri, sources uri = URI(uri) accounts = sources.map do |s| next unless s.respond_to?(:username) suri = URI(s.uri) [suri.host, s.username, s.password] end.compact.uniq.sort_by { |h, u, p| h == uri.host ? 0 : 1 } username, password = nil, nil unless accounts.empty? say "Would you like to use the same account as for a previous source for #{uri}?" choose do |menu| accounts.each do |host, olduser, oldpw| menu.choice("Use the account info for #{olduser}@#{host}") { username, password = olduser, oldpw } end menu.choice("Use a new account") { } menu.prompt = "Account selection? " end end unless username && password username = ask("Username for #{uri.host}: "); password = ask("Password for #{uri.host}: ") { |q| q.echo = false } puts # why? end [username, password] end educate_user if ARGV.member? '--help' archive = ARGV.delete "--archive" unusual = ARGV.delete "--unusual" force_archive = ARGV.delete "--force-archive" force_read = ARGV.delete "--force-read" the_usual = ARGV.delete "--the-usual" rebuild = ARGV.delete "--rebuild" force_rebuild = ARGV.delete "--force-rebuild" overwrite_labels = ARGV.delete "--overwrite-labels" optimize = ARGV.delete "--optimize" verbose = ARGV.delete "--verbose" start_at = # ok really need to use optparse or something now if(i = ARGV.index("--start-at")) raise "start-at requires a numeric argument: #{ARGV[i + 1].inspect}" unless ARGV.length > (i + 1) && ARGV[i + 1] =~ /\d/ ARGV.delete_at i ARGV.delete_at(i).to_i # whoa! end if(o = ARGV.find { |x| x =~ /^--/ }) $stderr.puts "error: unknown option #{o}" educate_user end $terminal.wrap_at = :auto Redwood::start index = Redwood::Index.new index.load sources = ARGV.map do |uri| uri = "mbox://#{uri}" unless uri =~ %r!://! source = index.source_for uri unless source source = case uri when %r!^mbox\+ssh://! say "For SSH connections, if you will use public key authentication, you may leave the username and password blank." say "\n" username, password = get_login_info uri, index.sources Redwood::MBox::SSHLoader.new(uri, username, password, nil, !unusual, !!archive) when %r!^imaps?://! username, password = get_login_info uri, index.sources Redwood::IMAP.new(uri, username, password, nil, !unusual, !!archive) else Redwood::MBox::Loader.new(uri, nil, !unusual, !!archive) end index.add_source source end source end sources = (sources + index.usual_sources).uniq if the_usual if rebuild || force_rebuild if start_at sources.each { |s| s.seek_to! start_at } else sources.each { |s| s.reset! } end end found = {} start = Time.now begin sources.each do |source| if source.broken? $stderr.puts "error loading messages from #{source}: #{source.broken_msg}" next end next if source.done? puts "loading from #{source}... " num = 0 start_offset = nil source.each do |offset, labels| labels.each { |l| Redwood::LabelManager << l } start_offset ||= offset labels -= [:inbox] if force_archive || archive labels -= [:unread] if force_read begin m = Redwood::Message.new :source => source, :source_info => offset, :labels => labels if found[m.id] puts "skipping duplicate message #{m.id}" next else found[m.id] = true end if m.source_marked_read? m.remove_label :unread labels -= [:unread] end puts "# message at #{offset}, labels: #{labels * ' '}" if verbose ## possibly rebuild the message if (rebuild || force_rebuild) && (docid, entry = index.load_entry_for_id(m.id)) && entry oldlabels = entry[:label].split(" ").sort newlabels = labels.map { |x| x.to_s }.sort if force_rebuild || entry[:source_info].to_i != offset || (overwrite_labels && (oldlabels != newlabels)) puts "replacing message #{m.id}: offset #{entry[:source_info]} => #{offset}, labels #{oldlabels * ' '} => #{newlabels * ' '}" m.labels = newlabels.map { |l| l.intern } if overwrite_labels num += 1 if index.update_message m, source, offset end else num += 1 if index.add_message m end rescue Redwood::MessageFormatError, Redwood::SourceError => e $stderr.puts "ignoring erroneous message at #{source}##{offset}: #{e.message}" end if num % 1000 == 0 && num > 0 elapsed = Time.now - start pctdone = source.pct_done remaining = (100.0 - pctdone) * (elapsed.to_f / pctdone) puts "## #{num} (#{pctdone}% done) read; #{elapsed.to_time_s} elapsed; est. #{remaining.to_time_s} remaining" end end puts "loaded #{num} messages" unless num == 0 end ensure $stderr.puts "saving index and sources..." index.save Redwood::finish end if rebuild || force_rebuild puts "deleting missing messages from the index..." numdel = num = 0 sources.each do |source| raise "no source id for #{source}" unless source.id q = "+source_id:#{source.id}" q += " +source_info: >= #{start_at}" if start_at #p q num += index.index.search_each(q, :limit => :all) do |docid, score| mid = index.index[docid][:message_id] next if found[mid] puts "deleting #{mid}" index.index.delete docid numdel += 1 end #p num end puts "deleted #{numdel} / #{num} messages" end if optimize puts "optimizing index..." optt = time { index.index.optimize } puts "optimized index of size #{index.size} in #{optt}s." end