1 ## Herein lies all the code responsible for threading messages. It's
2 ## basically an online version of the JWZ threading algorithm:
3 ## http://www.jwz.org/doc/threading.html
5 ## I didn't implement it for efficiency, but thanks to our search
6 ## engine backend, it's typically not applied to very many messages at
9 ## At the top level, we have a ThreadSet, which represents a set of
10 ## threads, e.g. a message folder or an inbox. Each ThreadSet contains
11 ## zero or more Threads. A Thread represents all the message related
12 ## to a particular subject. Each Thread has one or more Containers. A
13 ## Container is a recursive structure that holds the message tree as
14 ## determined by the references: and in-reply-to: headers. Each
15 ## Container holds zero or one messages. In the case of zero messages,
16 ## it means we've seen a reference to the message but haven't (yet)
17 ## seen the message itself.
19 ## A Thread can have multiple top-level Containers if we decide to
20 ## group them together independent of tree structure, typically if
21 ## (e.g. due to someone using a primitive MUA) the messages have the
22 ## same subject but we don't have evidence from in-reply-to: or
23 ## references: headers. In this case Thread#each can optionally yield
24 ## a faked root object tying them all together into one tree
32 attr_reader :containers
34 ## ah, the joys of a multithreaded application with a class called
35 ## "Thread". i keep instantiating the wrong one...
36 raise "wrong Thread class, buddy!" if block_given?
44 def empty?; @containers.empty?; end
45 def empty!; @containers.clear; end
46 def drop c; @containers.delete(c) or raise "bad drop"; end
50 f.puts "=== start thread with #{@containers.length} trees ==="
51 @containers.each { |c| c.dump_recursive f; f.puts }
52 f.puts "=== end thread ==="
55 ## yields each message, its depth, and its parent. the message yield
56 ## parameter can be a Message object, or :fake_root, or nil (no
57 ## message found but the presence of one deduced from other
59 def each fake_root=false
61 root = @containers.find_all { |c| c.message && !Message.subj_is_reply?(c.message.subj) }.argmin { |c| c.date }
65 root.first_useful_descendant.each_with_stuff do |c, d, par|
66 yield c.message, d, (par ? par.message : nil)
68 elsif @containers.length > 1 && fake_root
70 yield :fake_root, 0, nil
73 @containers.each do |cont|
75 fud = cont.first_useful_descendant
76 fud.each_with_stuff do |c, d, par|
77 ## special case here: if we're an empty root that's already
78 ## been joined by a fake root, don't emit
79 yield c.message, d + adj, (par ? par.message : nil) unless
80 fake_root && c.message.nil? && root.nil? && c == fud
85 def first; each { |m, *o| return m if m }; nil; end
86 def dirty?; any? { |m, *o| m && m.dirty? }; end
87 def date; map { |m, *o| m.date if m }.compact.max; end
89 with_snippets = select { |m, *o| m && m.snippet && !m.snippet.empty? }
90 first_unread, * = with_snippets.select { |m, *o| m.has_label?(:unread) }.sort_by { |m, *o| m.date }.first
91 return first_unread.snippet if first_unread
92 last_read, * = with_snippets.sort_by { |m, *o| m.date }.last
93 return last_read.snippet if last_read
96 def authors; map { |m, *o| m.from if m }.compact.uniq; end
98 def apply_label t; each { |m, *o| m && m.add_label(t) }; end
99 def remove_label t; each { |m, *o| m && m.remove_label(t) }; end
101 def toggle_label label
111 def set_labels l; each { |m, *o| m && m.labels = l }; end
113 def has_label? t; any? { |m, *o| m && m.has_label?(t) }; end
114 def save index; each { |m, *o| m && m.save(index) }; end
116 def direct_participants
117 map { |m, *o| [m.from] + m.to if m }.flatten.compact.uniq
121 map { |m, *o| [m.from] + m.to + m.cc + m.bcc if m }.flatten.compact.uniq
124 def size; map { |m, *o| m ? 1 : 0 }.sum; end
125 def subj; argfind { |m, *o| m && m.subj }; end
127 map { |m, *o| m && m.labels }.flatten.compact.uniq.sort_by { |t| t.to_s }
130 each { |m, *o| m && m.labels = l.clone }
134 inject(nil) do |a, b|
141 b.date > a.date ? b : a
147 "<thread containing: #{@containers.join ', '}>"
151 ## recursive structure used internally to represent message trees as
152 ## described by reply-to: and references: headers.
154 ## the 'id' field is the same as the message id. but the message might
155 ## be empty, in the case that we represent a message that was referenced
156 ## by another message (as an ancestor) but never received.
158 attr_accessor :message, :parent, :children, :id, :thread
161 raise "non-String #{id.inspect}" unless id.is_a? String
163 @message, @parent, @thread = nil, nil, nil
167 def each_with_stuff parent=nil
168 yield self, 0, parent
169 @children.each do |c|
170 c.each_with_stuff(self) { |cc, d, par| yield cc, d + 1, par }
178 @parent && @parent.descendant_of?(o)
182 def == o; Container === o && id == o.id; end
184 def empty?; @message.nil?; end
185 def root?; @parent.nil?; end
186 def root; root? ? self : @parent.root; end
188 ## skip over any containers which are empty and have only one child. we use
189 ## this make the threaded display a little nicer, and only stick in the
190 ## "missing message" line when it's graphically necessary, i.e. when the
191 ## missing message has more than one descendent.
192 def first_useful_descendant
193 if empty? && @children.size == 1
194 @children.first.first_useful_descendant
202 @children.argfind { |c| c.find_attr attr }
207 def subj; find_attr :subj; end
208 def date; find_attr :date; end
210 def is_reply?; subj && Message.subj_is_reply?(subj); end
214 (@parent.nil? ? nil : "parent=#{@parent.id}"),
215 (@children.empty? ? nil : "children=#{@children.map { |c| c.id }.inspect}"),
216 ].compact.join(" ") + ">"
219 def dump_recursive f=$stdout, indent=0, root=true, parent=nil
220 raise "inconsistency" unless parent.nil? || parent.children.include?(self)
225 line = "[#{thread.nil? ? ' ' : '*'}] " + #"[#{useful? ? 'U' : ' '}] " +
227 message.subj ##{@message.refs.inspect} / #{@message.replytos.inspect}"
232 f.puts "#{id} #{line}"#[0 .. (105 - indent)]
234 @children.each { |c| c.dump_recursive f, indent, false, self }
238 ## A set of threads, so a forest. Is integrated with the index and
239 ## builds thread structures by reading messages from it.
241 ## If 'thread_by_subj' is true, puts messages with the same subject in
242 ## one thread, even if they don't reference each other. This is
243 ## helpful for crappy MUAs that don't set In-reply-to: or References:
244 ## headers, but means that messages may be threaded unnecessarily.
246 ## The following invariants are maintained: every Thread has at least one
247 ## Container tree, and every Container tree has at least one Message.
249 attr_reader :num_messages
250 bool_reader :thread_by_subj
252 def initialize index, thread_by_subj=true
255 ## map from message ids to container objects
256 @messages = SavingHash.new { |id| Container.new id }
257 ## map from subject strings or (or root message ids) to thread objects
258 @threads = SavingHash.new { Thread.new }
259 @thread_by_subj = thread_by_subj
262 def thread_for_id mid; (c = @messages[mid]) && c.root.thread end
263 def contains_id? id; @messages.member?(id) && !@messages[id].empty? end
264 def thread_for m; thread_for_id m.id end
265 def contains? m; contains_id? m.id end
267 def threads; @threads.values end
268 def size; @threads.size end
271 @threads.each do |s, t|
272 f.puts "**********************"
273 f.puts "** for subject #{s} **"
274 f.puts "**********************"
279 ## link two containers
280 def link p, c, overwrite=false
281 if p == c || p.descendant_of?(c) || c.descendant_of?(p) # would create a loop
282 #puts "*** linking parent #{p.id} and child #{c.id} would create a loop"
286 #puts "in link for #{p.id} to #{c.id}, perform? #{c.parent.nil?} || #{overwrite}"
288 return unless c.parent.nil? || overwrite
293 ## if the child was previously a top-level container, it now ain't,
294 ## so ditch our thread and kill it if necessary
299 def remove_container c
300 c.parent.children.delete c if c.parent # remove from tree
302 private :remove_container
304 def prune_thread_of c
305 return unless c.thread
307 @threads.delete_if { |k, v| v == c.thread } if c.thread.empty?
310 private :prune_thread_of
313 return unless(c = @messages[mid])
318 def remove_thread_containing_id mid
319 c = @messages[mid] or return
321 @threads.delete_if { |key, thread| t == thread }
324 ## load in (at most) num number of threads from the index
325 def load_n_threads num, opts={}
326 @index.each_id_by_date opts do |mid, builder|
328 next if contains_id? mid
331 load_thread_for_message m, :skip_killed => opts[:skip_killed], :load_deleted => opts[:load_deleted], :load_spam => opts[:load_spam]
332 yield size if block_given?
336 ## loads in all messages needed to thread m
337 ## may do nothing if m's thread is killed
338 def load_thread_for_message m, opts={}
339 good = @index.each_message_in_thread_for m, opts do |mid, builder|
340 next if contains_id? mid
341 add_message builder.call
343 add_message m if good
346 ## merges in a pre-loaded thread
348 raise "duplicate" if @threads.values.member? t
349 t.each { |m, *o| add_message m }
352 ## merges two threads together. both must be members of this threadset.
353 ## does its best, heuristically, to determine which is the parent.
354 def join_threads threads
355 return if threads.size < 2
357 containers = threads.map do |t|
358 c = @messages[t.first.id]
359 raise "not in threadset: #{t.first.id}" unless c && c.message
363 ## use subject headers heuristically
364 parent = containers.find { |c| !c.is_reply? }
366 ## no thread was rooted by a non-reply, so make a fake parent
367 parent ||= @messages["joining-ref-" + containers.map { |c| c.id }.join("-")]
369 containers.each do |c|
371 c.message.add_ref parent.id
379 m.refs.any? { |ref_id| @messages.member? ref_id }
382 ## the heart of the threading code
383 def add_message message
384 el = @messages[message.id]
385 return if el.message # we've seen it before
387 #puts "adding: #{message.id}, refs #{message.refs.inspect}"
392 ## link via references:
393 (message.refs + [el.id]).inject(nil) do |prev, ref_id|
394 ref = @messages[ref_id]
395 link prev, ref if prev
399 ## link via in-reply-to:
400 message.replytos.each do |ref_id|
401 ref = @messages[ref_id]
403 break # only do the first one
409 Message.normalize_subj root.subj
414 ## check to see if the subject is still the same (in the case
415 ## that we first added a child message with a different
418 unless @threads[key] == root.thread
421 @threads[key] << root
422 root.thread = @threads[key]
424 @threads[key] = root.thread
428 thread = @threads[key]