From: wmorgan Date: Sun, 1 Apr 2007 04:00:47 +0000 (+0000) Subject: dump and restore functionality X-Git-Url: https://git.notmuchmail.org/git?a=commitdiff_plain;h=9bcaba24e29748e1124f56f5994a26db676f2dc1;p=sup dump and restore functionality git-svn-id: svn://rubyforge.org/var/svn/sup/trunk@354 5c8cc53c-5e98-4d25-b20a-d8db53a31250 --- diff --git a/Manifest.txt b/Manifest.txt index a1252e1..722f119 100644 --- a/Manifest.txt +++ b/Manifest.txt @@ -6,8 +6,9 @@ README.txt Rakefile bin/sup bin/sup-add -bin/sup-sync +bin/sup-dump bin/sup-recover-sources +bin/sup-sync doc/FAQ.txt doc/Philosophy.txt doc/TODO diff --git a/bin/sup-dump b/bin/sup-dump new file mode 100644 index 0000000..29f6d6e --- /dev/null +++ b/bin/sup-dump @@ -0,0 +1,31 @@ +#!/usr/bin/env ruby + +require 'rubygems' +require 'trollop' +require "sup" + +$opts = Trollop::options do + version "sup-dump (sup #{Redwood::VERSION})" + banner < to recover the index. + +This tool is primarily useful in the event that a Ferret upgrade breaks +the index format. This happened, for example, at Ferret version 0.11. + +Usage: + sup-dump > + sup-dump | bzip2 > # even better + +No options. +EOS +end + +index = Redwood::Index.new +index.load + +(1 ... index.index.reader.max_doc).each do |i| + next if index.index.deleted? i + d = index.index[i] + puts [d[:message_id], "(" + d[:label] + ")"] * " " +end diff --git a/bin/sup-sync b/bin/sup-sync index 68f462d..bf7538d 100644 --- a/bin/sup-sync +++ b/bin/sup-sync @@ -56,7 +56,7 @@ Options controlling WHICH messages sup-sync operates on: EOS opt :new, "Operate on new messages only. Don't scan over the entire source. (Default.)", :short => :none opt :changed, "Scan over the entire source for messages that have been deleted, altered, or moved from another source. (In the case of mbox sources, this includes all messages AFTER an altered message.)" - opt :restored, "Operate only on those messages included in a dump file as specified by --restore." + opt :restored, "Operate only on those messages included in a dump file as specified by --restore which have changed state." opt :all, "Operate on all messages in the source, regardless of newness or changedness." opt :start_at, "For --changed and --all, start at a particular offset.", :type => :int @@ -77,13 +77,14 @@ Other options: EOS opt :verbose, "Print message ids as they're processed." opt :optimize, "As the final operation, optimize the index." + opt :all_sources, "Scan over all sources.", :short => :none opt :dry_run, "Don't actually modify the index. Probably only useful with --verbose.", :short => "-n" opt :version, "Show version information", :short => :none conflicts :changed, :all, :new, :restored conflicts :asis, :restore, :discard end -Trollop::die :restored, "requires --restore" if opts[:restore] unless opts[:restored] +Trollop::die :restored, "requires --restore" if opts[:restored] unless opts[:restore] if opts[:start_at] Trollop::die :start_at, "must be non-negative" if opts[:start_at] < 0 Trollop::die :start_at, "requires either --changed or --all" unless opts[:changed] || opts[:all] @@ -101,8 +102,8 @@ restored_state = dump = {} $stderr.puts "Loading state dump from #{opts[:restore]}..." IO.foreach opts[:restore] do |l| - l =~ /^(\S+) (\d+) (\d+) \((.*?)\)$/ or raise "Can't read dump line: #{l.inspect}" - mid, source_id, source_info, labels = $1, $2.to_i, $3.to_i, $4 + l =~ /^(\S+) \((.*?)\)$/ or raise "Can't read dump line: #{l.inspect}" + mid, labels = $1, $2 dump[mid] = labels.split(" ").map { |x| x.intern } end $stderr.puts "Read #{dump.size} entries from dump file." @@ -117,6 +118,7 @@ sources = ARGV.map do |uri| end sources = index.usual_sources if sources.empty? +sources = index.sources if opts[:all_sources] unless target == :new if opts[:start_at] @@ -126,12 +128,12 @@ unless target == :new end end -last_info_time = start_time = Time.now seen = {} begin sources.each do |source| - num_added, num_updated, num_scanned = 0, 0, 0 $stderr.puts "Scanning #{source}..." + num_added = num_updated = num_scanned = num_restored = 0 + last_info_time = start_time = Time.now Redwood::PollManager.add_messages_from source do |m, offset, entry| num_scanned += 1 @@ -142,9 +144,17 @@ begin ## reporting. next if target == :changed && entry && entry[:source_id].to_i == source.id && entry[:source_info].to_i == offset + ## get the state currently in the index + index_state = + if entry + entry[:label].split(/\s+/).map { |x| x.intern } + else + nil + end + ## skip if we're operating on restored messages, and this one ## ain't. - next if target == :restored && !restored_state[m.id] + next if target == :restored && (!restored_state[m.id] || restored_state[m.id].sort_by { |s| s.to_s } == index_state.sort_by { |s| s.to_s }) ## m.labels is the default source labels. tweak these according ## to default source state modification flags. @@ -152,14 +162,6 @@ begin m.labels -= [:unread] if opts[:read] m.labels += opts[:extra_labels].split(/\s*,\s*/).map { |x| x.intern } if opts[:extra_labels] - ## get the state currently in the index - index_state = - if entry - entry[:label].split(/\s+/).map { |x| x.intern } - else - nil - end - ## assign message labels based on the operation we're performing case op when :asis @@ -168,6 +170,7 @@ begin ## if the entry exists on disk if restored_state[m.id] m.labels = restored_state[m.id] + num_restored += 1 elsif index_state m.labels = index_state end @@ -178,9 +181,9 @@ begin if Time.now - last_info_time > 60 last_info_time = Time.now elapsed = last_info_time - start_time - pctdone = source.respond_to?(:pct_done) ? source.pct_done : 100.0 * (source.cur_offset.to_f - source.start_time_offset).to_f / (source.end_offset - source.start_time_offset).to_f + pctdone = source.respond_to?(:pct_done) ? source.pct_done : 100.0 * (source.cur_offset.to_f - source.start_offset).to_f / (source.end_offset - source.start_offset).to_f remaining = (100.0 - pctdone) * (elapsed.to_f / pctdone) - puts "## #{num_added + num_updated} (#{pctdone}% done) read; #{elapsed.to_time_s} elapsed; est. #{remaining.to_time_s} remaining" + $stderr.puts "## #{num_added + num_updated} (#{pctdone}% done) read; #{elapsed.to_time_s} elapsed; est. #{remaining.to_time_s} remaining (for this source)" end if index_state.nil? @@ -193,7 +196,8 @@ begin opts[:dry_run] ? nil : m end - $stderr.puts "Added #{num_added}, updated #{num_updated} messages from #{source}." + $stderr.puts "Scanned #{num_scanned}, added #{num_added}, updated #{num_updated} messages from #{source}." + $stderr.puts "Restored state on #{num_restored} (#{100.0 * num_restored / num_scanned}%) messages." if num_restored > 0 end ensure $stderr.puts "Saving index and sources..." diff --git a/doc/FAQ.txt b/doc/FAQ.txt index 62add38..159f9b7 100644 --- a/doc/FAQ.txt +++ b/doc/FAQ.txt @@ -8,11 +8,9 @@ Q: If you love GMail so much, why not just use it? A: I hate ads, I hate using a mouse, and I hate non-programmability and non-extensibility. - Also, GMail encourages top-posting in a variety of ways. THIS - CANNOT BE TOLERATED! + Also, GMail encourages top-posting. THIS CANNOT BE TOLERATED! Q: Why the console? - A: Because a keystroke is with a hundred mouse clicks (as any Unix user knows). Because you don't need web browser. Because you get instantaneous response and a simple interface. @@ -23,33 +21,71 @@ A: You can manually mark messages as spam, which prevents them from filtering should be done by a dedicated tool like SpamAssassin. Q: How do I delete a message? -A: Press the 'd' key. +A: Why delete? Unless it's spam, you might as well just archive it. + +Q: C'mon, really now! +A: Ok, press the 'd' key. Q: But I want to delete it for real, not just add a 'deleted' flag in the index. I want it gone from disk! -A: Deleting a message is an old-fashioned concept. In the modern - world, disk space is cheap enough that you should never have to - delete a message. If it's spam, save it for future analysis. - -Q: C'mon, really now! A: Ok, at some point I plan to have a batch deletion tool that will run through a source and delete all messages that have a 'spam' or - 'deleted' tags (and, for mbox sources, will update the offsets of - all later messages). But that doesn't exist yet. + 'deleted' tags. But that doesn't exist yet. -Q: I got some error message about needing to run sup-import --rescan +Q: I got some error message about needing to run sup-sync --changed when I tried to read a message. What's that about? A: If messages have been moved, deleted, or altered in a source, Sup may have to rebuild its index for that source. For example, for - mbox files, even reading a message changes the offsets of every - file on disk. Rather than rescanning every time, Sup assumes + mbox files, reading a single unread message changes the offsets of + every file on disk. Rather than rescanning every time, Sup assumes sources don't change except by having new messages added. If that - assumption is violated, you'll have to run sup-import --rescan. + assumption is violated, you'll have to sync the index. The alternative is to rescan every source when Sup starts up. Because Sup is designed to work with arbitrarily large mbox files, this would not be a good idea. +Q: How do I back up my index? +Q: How do I make a state dump? +A: Since the contents of the messages are recoverable from their + sources using sup-sync, all you need to back up is the message + state. To do this, simply run: + sup-dump > + This will save all message state in a big text file, which you + should probably compress. + +Q: How do I restore the message state I saved in my state dump? +A: Run: + sup-sync [+] --restored --restore + where was created as above. + +Q: I see this message from Ferret: + Error occured in index.c:825 - sis_find_segments_file +A: Yikes! You've upgraded Ferret and the index format changed beneath + you. Follow the index rebuild instructions below. + +Q: I upgraded Ferret and the index format changed. I need to + completely rebuild my index. How do I do this? +A: First, you'll need a complete state dump. If you haven't made + one, you'll need to downgrade Ferret and make a state dump as + above. Then run these commands: + rm -rf ~/.sup/ferret # omg wtf + sup-sync --all-sources --all --restore + Voila! A brand new index. + +Q: I want to move messages from one source to another. (E.g., my + primary inbox is an IMAP server with a quota, and I want to move + some of those messages to local mbox files.) How do I do that while + preserving message state? +A: Move the messages from the source to the target using whatever tool + you'd like. Then (and this is the important part), run: + sup-sync --changed + + If you sup-sync only one source at a time, depending on the order, + the messages may be treated as missing and then deleted from the + index, which means that their state will be lost when you sync the + other source. + Q: What are all these "Redwood" references I see in the code? A: That was Sup's original name. (Think pine, elm. Although I am a Mutt user, I couldn't think of a good progression there.) But it was @@ -59,18 +95,9 @@ A: That was Sup's original name. (Think pine, elm. Although I am a Maybe one day I'll do a huge search-and-replace on the code, but it doesn't seem that important at this point. -Q: I want to move messages from one source to another. (E.g., my - primary inbox is an IMAP server with a quota, and I want to move - some of those messages to local mbox files.) How do I do that while - preserving message state? -A: Move the messages from the source to the target using whatever tool - you'd like. Then (and this is the important part), sup-import - --rebuild both sources at once. If you do it one at a time, you may - lose message state. (Depending, actually, on which order you do it - in. But just do them both at once.) - Q: How is Sup possible? A: Sup is only possible through the hard work of Dave Balmain, the author of ferret, which is the search engine behind Sup. Ferret is really a first-class piece of software, and it's due to the tremendous amount of time and effort he's put in to it. +