From: Rich Lane Date: Wed, 17 Jun 2009 00:24:58 +0000 (-0700) Subject: index: cleanup interface X-Git-Url: https://git.notmuchmail.org/git?a=commitdiff_plain;h=5f9e826bb9eb3721ee6d1f7f1ce4893f57a543ce;p=sup index: cleanup interface Added the public methods 'each_docid', 'each_message', and 'optimize' to the index. Removed the 'index' and 'ferret' accessors and modified their callers to use the new methods. Bonus fixes: sup-dump no longer skips the first message and sup_sync --start_at can now delete unseen messages. --- diff --git a/bin/sup-dump b/bin/sup-dump index 29f6d6e..9b0892e 100755 --- a/bin/sup-dump +++ b/bin/sup-dump @@ -24,8 +24,6 @@ end index = Redwood::Index.new index.load -(1 ... index.index.reader.max_doc).each do |i| - next if index.index.deleted? i - d = index.index[i] - puts [d[:message_id], "(" + d[:label] + ")"] * " " +index.each_message do |m| + puts "#{m.id} (#{m.labels * ' '})" end diff --git a/bin/sup-sync b/bin/sup-sync index 9c342d2..a6e3478 100755 --- a/bin/sup-sync +++ b/bin/sup-sync @@ -208,24 +208,17 @@ begin ## delete any messages in the index that claim they're from one of ## these sources, but that we didn't see. - ## - ## kinda crappy code here, because we delve directly into the Ferret - ## API. - ## - ## TODO: move this to Index, i suppose. - if (target == :all || target == :changed) && !opts[:start_at] + if (target == :all || target == :changed) $stderr.puts "Deleting missing messages from the index..." num_del, num_scanned = 0, 0 sources.each do |source| raise "no source id for #{source}" unless source.id - q = "+source_id:#{source.id}" - q += " +source_info: >= #{opts[:start_at]}" if opts[:start_at] - index.index.search_each(q, :limit => :all) do |docid, score| + index.each_message :source_id => source.id do |m| num_scanned += 1 - mid = index.index[docid][:message_id] - unless seen[mid] - puts "Deleting #{mid}" if opts[:verbose] - index.index.delete docid unless opts[:dry_run] + unless seen[m.id] + next unless m.source_info >= opts[:start_at] if opts[:start_at] + puts "Deleting #{m.id}" if opts[:verbose] + index.drop_entry m.id unless opts[:dry_run] num_del += 1 end end @@ -237,7 +230,7 @@ begin if opts[:optimize] $stderr.puts "Optimizing index..." - optt = time { index.index.optimize unless opts[:dry_run] } + optt = time { index.optimize unless opts[:dry_run] } $stderr.puts "Optimized index of size #{index.size} in #{optt}s." end rescue Redwood::FatalSourceError => e diff --git a/bin/sup-tweak-labels b/bin/sup-tweak-labels index 538db8b..f526a95 100755 --- a/bin/sup-tweak-labels +++ b/bin/sup-tweak-labels @@ -118,7 +118,7 @@ begin unless num_changed == 0 $stderr.puts "Optimizing index..." - index.ferret.optimize unless opts[:dry_run] + index.optimize unless opts[:dry_run] end rescue Exception => e diff --git a/lib/sup/index.rb b/lib/sup/index.rb index ca01ee7..c0910b6 100644 --- a/lib/sup/index.rb +++ b/lib/sup/index.rb @@ -24,11 +24,6 @@ class Index include Singleton - ## these two accessors should ONLY be used by single-threaded programs. - ## otherwise you will have a naughty ferret on your hands. - attr_reader :index - alias ferret index - def initialize dir=BASE_DIR @index_mutex = Monitor.new @@ -151,7 +146,7 @@ EOS if File.exists? dir Redwood::log "loading index..." @index_mutex.synchronize do - @index = Ferret::Index::Index.new(:path => dir, :analyzer => @analyzer) + @index = Ferret::Index::Index.new(:path => dir, :analyzer => @analyzer, :id_field => 'message_id') Redwood::log "loaded index of #{@index.size} messages" end else @@ -171,7 +166,7 @@ EOS field_infos.add_field :refs field_infos.add_field :snippet, :index => :no, :term_vector => :no field_infos.create_index dir - @index = Ferret::Index::Index.new(:path => dir, :analyzer => @analyzer) + @index = Ferret::Index::Index.new(:path => dir, :analyzer => @analyzer, :id_field => 'message_id') end end end @@ -496,6 +491,22 @@ EOS results.hits.map { |hit| hit.doc } end + def each_docid opts={} + query = build_query opts + results = @index_mutex.synchronize { @index.search query, :limit => (opts[:limit] || :all) } + results.hits.map { |hit| yield hit.doc } + end + + def each_message opts={} + each_docid opts do |docid| + yield build_message(docid) + end + end + + def optimize + @index_mutex.synchronize { @index.optimize } + end + protected class ParseError < StandardError; end @@ -621,6 +632,8 @@ protected query.add_query Ferret::Search::TermQuery.new("label", "spam"), :must_not unless opts[:load_spam] || labels.include?(:spam) query.add_query Ferret::Search::TermQuery.new("label", "deleted"), :must_not unless opts[:load_deleted] || labels.include?(:deleted) query.add_query Ferret::Search::TermQuery.new("label", "killed"), :must_not if opts[:skip_killed] + + query.add_query Ferret::Search::TermQuery.new("source_id", opts[:source_id]), :must if opts[:source_id] query end