8 Thread.abort_on_exception = true # make debugging possible
11 def to_s; sprintf '%.2f', self; end
17 sprintf "%d:%02d:%02d", i / 3600, (i / 60) % 60, i % 60
27 opts = Trollop::options do
28 version "sup-import (sup #{Redwood::VERSION})"
30 Imports messages into the Sup index from one or more sources.
33 sup-import [options] <source>*
35 where <source>* is zero or more source URIs or mbox filenames. If no
36 sources are given, imports messages from all sources marked as
41 opt :archive, "Automatically archive any imported messages."
42 opt :read, "Automatically mark as read any imported messages."
43 opt :verbose, "Print message ids as they're processed."
44 opt :optimize, "As the last stage of the import, optimize the index."
47 The following options allow sup-import to consider *all* messages in the
48 source, not just new ones:
50 opt :rebuild, "Scan over the entire source and update the index to account for any messages that have been deleted, altered, or moved from another source."
51 opt :full_rebuild, "Re-insert all messages in the source, not just ones that have changed or are new."
52 opt :start_at, "For rescan and rebuild, start at the given offset.", :type => :int
53 opt :overwrite_state, "For --full-rebuild, overwrite the message state to the default state for that source, obeying --archive and --read if given."
55 Trollop::die :start_at, "must be non-negative" if (opts[:start_at] || 0) < 0
56 Trollop::die :start_at, "requires either --rebuild or --full-rebuild" if opts[:start_at] && !(opts[:rebuild] || opts[:full_rebuild])
57 Trollop::die :overwrite_state, "requires --full-rebuild" if opts[:overwrite_state] && !opts[:full_rebuild]
58 Trollop::die :force_rebuild, "cannot be specified with --rebuild" if opts[:full_rebuild] && opts[:rebuild]
61 index = Redwood::Index.new
64 sources = ARGV.map do |uri|
65 uri = "mbox://#{uri}" unless uri =~ %r!://!
66 index.source_for uri or raise "Unknown source: #{uri}"
69 sources = index.usual_sources if sources.empty?
71 if opts[:rebuild] || opts[:full_rebuild]
73 sources.each { |s| s.seek_to! opts[:start_at] }
75 sources.each { |s| s.reset! }
79 last_update = start = Time.now
82 sources.each do |source|
85 puts "Scanning #{source}..."
86 Redwood::PollManager.add_new_messages_from source do |m, offset, entry|
87 ## if the entry exists on disk
88 if entry && !opts[:overwrite_state]
89 m.labels = entry[:label].split(/\s+/).map { |x| x.intern }
91 ## m.labels defaults to labels from the source
92 m.labels -= [:inbox] if opts[:archive]
93 m.labels -= [:unread] if opts[:read]
96 if Time.now - last_update > 60
97 last_update = Time.now
98 elapsed = last_update - start
99 pctdone = source.respond_to?(:pct_done) ? source.pct_done : 100.0 * (source.cur_offset.to_f - source.start_offset).to_f / (source.end_offset - source.start_offset).to_f
100 remaining = (100.0 - pctdone) * (elapsed.to_f / pctdone)
101 puts "## #{num} (#{pctdone}% done) read; #{elapsed.to_time_s} elapsed; est. #{remaining.to_time_s} remaining"
105 if entry.nil? # it's a new message; or
106 puts "Adding message at #{offset}, labels: #{m.labels * ' '}" if opts[:verbose]
110 elsif opts[:full_rebuild] || # we're updating everyone; or
111 (opts[:rebuild] && (entry[:source_id].to_i != source.id || entry[:source_info].to_i != offset)) # we're updating just the changed ones
112 puts "Updating message at #{offset} (from #{m.from.longname}, subject '#{m.subj}'), source #{entry[:source_id]} => #{source.id}, offset #{entry[:source_info]} => #{offset}, labels: {#{m.labels * ', '}}" if opts[:verbose]
113 num_updated += 1 unless found[m.id]
121 puts "Added #{num_added}, updated #{num_updated} messages from #{source}."
124 puts "Saving index and sources..."
129 ## delete any messages in the index that claim they're from one of
130 ## these sources, but that we didn't see.
132 ## kinda crappy code here, because we delve directly into the Ferret
135 ## TODO: move this to Index, i suppose.
136 if opts[:rebuild] || opts[:full_rebuild]
137 puts "Deleting missing messages from the index..."
139 sources.each do |source|
140 raise "no source id for #{source}" unless source.id
141 q = "+source_id:#{source.id}"
142 q += " +source_info: >= #{opts[:start_at]}" if opts[:start_at]
143 num += index.index.search_each(q, :limit => :all) do |docid, score|
144 mid = index.index[docid][:message_id]
147 puts "Deleting #{mid}" if opts[:verbose]
148 index.index.delete docid
152 puts "Deleted #{numdel} / #{num} messages"
156 puts "Optimizing index..."
157 optt = time { index.index.optimize }
158 puts "Optimized index of size #{index.size} in #{optt}s."