1 ## Herein is all the code responsible for threading messages. I use an
2 ## online version of the JWZ threading algorithm:
3 ## http://www.jwz.org/doc/threading.html
5 ## I certainly didn't implement it for efficiency, but thanks to our
6 ## search engine backend, it's typically not applied to very many
9 ## At the top level, we have a ThreadSet. A ThreadSet represents a set
10 ## of threads, e.g. a message folder or an inbox. Each ThreadSet
11 ## contains zero or more Threads. A Thread represents all the message
12 ## related to a particular subject. Each Thread has one or more
13 ## Containers. A Container is a recursive structure that holds the
14 ## tree structure as determined by the references: and in-reply-to:
15 ## headers. A Thread with multiple Containers occurs if they have the
16 ## same subject, but (most likely due to someone using a primitive
17 ## MUA) we don't have evidence from in-reply-to: or references:
18 ## headers, only subject: (and thus our tree is probably broken). A
19 ## Container holds zero or one message. In the case of no message, it
20 ## means we've seen a reference to the message but haven't seen the
21 ## message itself (yet).
28 attr_reader :containers
30 ## ah, the joys of a multithreaded application with a class called
31 ## "Thread". i keep instantiating the wrong one...
32 raise "wrong Thread class, buddy!" if block_given?
40 def empty?; @containers.empty?; end
43 raise "bad drop" unless @containers.member? c
48 puts "=== start thread #{self} with #{@containers.length} trees ==="
49 @containers.each { |c| c.dump_recursive }
50 puts "=== end thread ==="
53 ## yields each message, its depth, and its parent. note that the
54 ## message can be a Message object, or :fake_root, or nil.
55 def each fake_root=false
57 root = @containers.find_all { |c| !Message.subj_is_reply?(c) }.argmin { |c| c.date }
61 root.first_useful_descendant.each_with_stuff do |c, d, par|
62 yield c.message, d, (par ? par.message : nil)
64 elsif @containers.length > 1 && fake_root
66 yield :fake_root, 0, nil
69 @containers.each do |cont|
71 fud = cont.first_useful_descendant
72 fud.each_with_stuff do |c, d, par|
73 ## special case here: if we're an empty root that's already
74 ## been joined by a fake root, don't emit
75 yield c.message, d + adj, (par ? par.message : nil) unless
76 fake_root && c.message.nil? && root.nil? && c == fud
81 def first; each { |m, *o| return m if m }; nil; end
82 def dirty?; any? { |m, *o| m && m.dirty? }; end
83 def date; map { |m, *o| m.date if m }.compact.max; end
84 def snippet; argfind { |m, *o| m && m.snippet }; end
85 def authors; map { |m, *o| m.from if m }.compact.uniq; end
87 def apply_label t; each { |m, *o| m && m.add_label(t) }; end
88 def remove_label t; each { |m, *o| m && m.remove_label(t) }; end
90 def toggle_label label
100 def set_labels l; each { |m, *o| m && m.labels = l }; end
102 def has_label? t; any? { |m, *o| m && m.has_label?(t) }; end
103 def save index; each { |m, *o| m && m.save(index) }; end
105 def direct_participants
106 map { |m, *o| [m.from] + m.to if m }.flatten.compact.uniq
110 map { |m, *o| [m.from] + m.to + m.cc + m.bcc if m }.flatten.compact.uniq
113 def size; map { |m, *o| m ? 1 : 0 }.sum; end
114 def subj; argfind { |m, *o| m && m.subj }; end
116 map { |m, *o| m && m.labels }.flatten.compact.uniq.sort_by { |t| t.to_s }
119 each { |m, *o| m && m.labels = l.clone }
123 inject(nil) do |a, b|
130 b.date > a.date ? b : a
136 "<thread containing: #{@containers.join ', '}>"
140 ## recursive structure used internally to represent message trees as
141 ## described by reply-to: and references: headers.
143 ## the 'id' field is the same as the message id. but the message might
144 ## be empty, in the case that we represent a message that was referenced
145 ## by another message (as an ancestor) but never received.
147 attr_accessor :message, :parent, :children, :id, :thread
150 raise "non-String #{id.inspect}" unless id.is_a? String
152 @message, @parent, @thread = nil, nil, nil
156 def each_with_stuff parent=nil
157 yield self, 0, parent
158 @children.each do |c|
159 c.each_with_stuff(self) { |cc, d, par| yield cc, d + 1, par }
167 @parent && @parent.descendant_of?(o)
171 def == o; Container === o && id == o.id; end
173 def empty?; @message.nil?; end
174 def root?; @parent.nil?; end
175 def root; root? ? self : @parent.root; end
177 def first_useful_descendant
178 if empty? && @children.size == 1
179 @children.first.first_useful_descendant
187 @children.argfind { |c| c.find_attr attr }
192 def subj; find_attr :subj; end
193 def date; find_attr :date; end
195 def is_reply?; subj && Message.subject_is_reply?(subj); end
199 (@parent.nil? ? nil : "parent=#{@parent.id}"),
200 (@children.empty? ? nil : "children=#{@children.map { |c| c.id }.inspect}"),
201 ].compact.join(" ") + ">"
204 def dump_recursive indent=0, root=true, parent=nil
205 raise "inconsistency" unless parent.nil? || parent.children.include?(self)
210 line = #"[#{useful? ? 'U' : ' '}] " +
212 "[#{thread}] #{@message.subj} " ##{@message.refs.inspect} / #{@message.replytos.inspect}"
217 puts "#{id} #{line}"#[0 .. (105 - indent)]
219 @children.each { |c| c.dump_recursive indent, false, self }
223 ## a set of threads (so a forest). builds the thread structures by
224 ## reading messages from an index.
226 attr_reader :num_messages
231 @messages = {} ## map from message ids to container objects
232 @subj_thread = {} ## map from subject strings to thread objects
235 def contains_id? id; @messages.member?(id) && !@messages[id].empty?; end
237 (c = @messages[m.id]) && c.root.thread
241 @subj_thread.each { |k, v| @subj_thread.delete(k) if v.empty? || v.subj != k }
243 private :delete_cruft
245 def threads; delete_cruft; @subj_thread.values; end
246 def size; delete_cruft; @subj_thread.size; end
249 @subj_thread.each do |s, t|
250 puts "**********************"
251 puts "** for subject #{s} **"
252 puts "**********************"
257 def link p, c, overwrite=false
258 if p == c || p.descendant_of?(c) || c.descendant_of?(p) # would create a loop
259 # puts "*** linking parent #{p} and child #{c} would create a loop"
263 if c.parent.nil? || overwrite
264 c.parent.children.delete c if overwrite && c.parent
276 return unless(c = @messages[mid])
278 c.parent.children.delete c if c.parent
285 ## load in (at most) num number of threads from the index
286 def load_n_threads num, opts={}
287 @index.each_id_by_date opts do |mid, builder|
289 next if contains_id? mid
293 load_thread_for_message m, :load_killed => opts[:load_killed]
294 yield @subj_thread.size if block_given?
298 ## loads in all messages needed to thread m
299 def load_thread_for_message m, opts={}
300 @index.each_message_in_thread_for m, opts.merge({:limit => 100}) do |mid, builder|
301 next if contains_id? mid
302 add_message builder.call
307 m.refs.any? { |ref_id| @messages[ref_id] }
310 ## an "online" version of the jwz threading algorithm.
311 def add_message message
313 el = (@messages[id] ||= Container.new id)
314 return if @messages[id].message # we've seen it before
319 ## link via references:
321 message.refs.each do |ref_id|
322 raise "non-String ref id #{ref_id.inspect} (full: #{message.refs.inspect})" unless ref_id.is_a?(String)
323 ref = (@messages[ref_id] ||= Container.new ref_id)
324 link prev, ref if prev
327 link prev, el, true if prev
329 ## link via in-reply-to:
330 message.replytos.each do |ref_id|
331 ref = (@messages[ref_id] ||= Container.new ref_id)
333 break # only do the first one
336 ## update subject grouping
338 # puts "> have #{el}, root #{root}, oldroot #{oldroot}"
343 ## check to see if the subject is still the same (in the case
344 ## that we first added a child message with a different
347 ## this code is duplicated below. sorry! TODO: refactor
348 s = Message.normalize_subj(root.subj)
349 unless @subj_thread[s] == root.thread
350 ## Redwood::log "[1] moving thread to new subject #{root.subj}"
352 @subj_thread[s] << root
353 root.thread = @subj_thread[s]
355 @subj_thread[s] = root.thread
360 ## to disable subject grouping, use the next line instead
361 ## (and the same for below)
362 #Redwood::log "[1] for #{root}, subject #{Message.normalize_subj(root.subj)} has #{@subj_thread[Message.normalize_subj(root.subj)] ? 'a' : 'no'} thread"
363 thread = (@subj_thread[Message.normalize_subj(root.subj)] ||= Thread.new)
364 #thread = (@subj_thread[root.id] ||= Thread.new)
368 # Redwood::log "[1] added #{root} to #{thread}"
372 ## new root. need to drop old one and put this one in its place
373 oldroot.thread.drop oldroot
378 ## check to see if the subject is still the same (in the case
379 ## that we first added a child message with a different
381 s = Message.normalize_subj(root.subj)
382 unless @subj_thread[s] == root.thread
383 # Redwood::log "[2] moving thread to new subject #{root.subj}"
385 @subj_thread[s] << root
386 root.thread = @subj_thread[s]
388 @subj_thread[s] = root.thread
393 ## to disable subject grouping, use the next line instead
394 ## (and the same above)
396 ## this code is duplicated above. sorry! TODO: refactor
397 # Redwood::log "[2] for #{root}, subject '#{Message.normalize_subj(root.subj)}' has #{@subj_thread[Message.normalize_subj(root.subj)] ? 'a' : 'no'} thread"
399 thread = (@subj_thread[Message.normalize_subj(root.subj)] ||= Thread.new)
400 #thread = (@subj_thread[root.id] ||= Thread.new)
404 # Redwood::log "[2] added #{root} to #{thread}"