#!/usr/bin/env ruby require 'uri' require 'rubygems' require 'trollop' require "sup" Thread.abort_on_exception = true # make debugging possible class Float def to_s; sprintf '%.2f', self; end end class Numeric def to_time_s i = to_i sprintf "%d:%02d:%02d", i / 3600, (i / 60) % 60, i % 60 end end def time startt = Time.now yield Time.now - startt end opts = Trollop::options do version "sup-import (sup #{Redwood::VERSION})" banner <* where * is zero or more source URIs or mbox filenames, e.g. "imaps://my.imapserver.com", or "/var/spool/mail/me". If no sources are given, imports messages from all sources marked as "usual". Options are: EOS opt :archive, "Automatically archive any imported messages." opt :read, "Automatically mark as read any imported messages." opt :verbose, "Print message ids as they're processed." opt :optimize, "As the last stage of the import, optimize the index." text < :int opt :overwrite_state, "For --full-rebuild, overwrite the message state to the default state for that source, obeying --archive and --read if given." end Trollop::die :start_at, "must be non-negative" if (opts[:start_at] || 0) < 0 Trollop::die :start_at, "requires either --rebuild or --full-rebuild" if opts[:start_at] && !(opts[:rebuild] || opts[:full_rebuild]) Trollop::die :overwrite_state, "requires --full-rebuild" if opts[:overwrite_state] && !opts[:full_rebuild] Trollop::die :force_rebuild, "cannot be specified with --rebuild" if opts[:full_rebuild] && opts[:rebuild] Redwood::start index = Redwood::Index.new index.load sources = ARGV.map do |uri| uri = "mbox://#{uri}" unless uri =~ %r!://! index.source_for uri or raise "Unknown source: #{uri}" end sources = index.usual_sources if sources.empty? if opts[:rebuild] || opts[:full_rebuild] if opts[:start_at] sources.each { |s| s.seek_to! opts[:start_at] } else sources.each { |s| s.reset! } end end start = Time.now found = {} begin sources.each do |source| num = 0 index.add_new_messages_from source do |m, offset, source_labels, entry| found[m.id] = true m.labels = source_labels if opts[:overwrite_state] m.labels -= [:inbox] if opts[:archive] m.labels -= [:unread] if opts[:read] num += 1 if num % 1000 == 0 && num > 0 elapsed = Time.now - start pctdone = source.pct_done remaining = (100.0 - pctdone) * (elapsed.to_f / pctdone) puts "## #{num} (#{pctdone}% done) read; #{elapsed.to_time_s} elapsed; est. #{remaining.to_time_s} remaining" end ## update if... if entry.nil? # it's a new message; or puts "# adding message at #{offset}, labels: #{m.labels * ' '}" if opts[:verbose] m elsif opts[:full_rebuild] || # we're updating everyone; or (opts[:rebuild] && (entry[:source_id].to_i != source.id || entry[:source_info].to_i != offset)) # we're updating just the changed ones puts "# updating message at #{offset}, source #{entry[:source_id]} => #{source.id}, offset #{entry[:source_info]} => #{offset}, labels: #{m.labels * ' '}" if opts[:verbose] m else nil end end puts "loaded #{num} messages from #{source}" unless num == 0 end ensure $stderr.puts "saving index and sources..." index.save Redwood::finish end ## delete any messages in the index that claim they're from one of ## these sources, but that we didn't see. ## ## kinda crappy code here, because we delve directly into the Ferret ## API. ## ## TODO: move this to Index, i suppose. if opts[:rebuild] || opts[:full_rebuild] puts "deleting missing messages from the index..." numdel = num = 0 sources.each do |source| raise "no source id for #{source}" unless source.id q = "+source_id:#{source.id}" q += " +source_info: >= #{opts[:start_at]}" if opts[:start_at] num += index.index.search_each(q, :limit => :all) do |docid, score| mid = index.index[docid][:message_id] # puts "got #{mid}" next if found[mid] puts "deleting #{mid}" index.index.delete docid numdel += 1 end end puts "deleted #{numdel} / #{num} messages" end if opts[:optimize] puts "optimizing index..." optt = time { index.index.optimize } puts "optimized index of size #{index.size} in #{optt}s." end