]> git.notmuchmail.org Git - notmuch/blob - bindings/go/cmds/notmuch-addrlookup.go
Merge remote branch 'amdragon/search-perf-3'
[notmuch] / bindings / go / cmds / notmuch-addrlookup.go
1 package main
2
3 // stdlib imports
4 import "os"
5 import "path"
6 import "log"
7 import "fmt"
8 import "regexp"
9 import "strings"
10 import "sort"
11
12 // 3rd-party imports
13 import "notmuch"
14 import "github.com/kless/goconfig/config"
15
16 type mail_addr_freq struct {
17         addr  string
18         count [3]uint
19 }
20
21 type frequencies map[string]uint
22
23 /* Used to sort the email addresses from most to least used */
24 func sort_by_freq(m1, m2 *mail_addr_freq) int {
25         if (m1.count[0] == m2.count[0] &&
26                 m1.count[1] == m2.count[1] &&
27                 m1.count[2] == m2.count[2]) {
28                 return 0
29         }
30
31         if (m1.count[0] >  m2.count[0] ||
32                 m1.count[0] == m2.count[0] &&
33                 m1.count[1] >  m2.count[1] ||
34                 m1.count[0] == m2.count[0] &&
35                 m1.count[1] == m2.count[1] &&
36                 m1.count[2] >  m2.count[2]) {
37                 return -1
38         }
39
40         return 1
41 }
42
43 type maddresses []*mail_addr_freq
44
45 func (self *maddresses) Len() int {
46         return len(*self)
47 }
48
49 func (self *maddresses) Less(i,j int) bool {
50         m1 := (*self)[i]
51         m2 := (*self)[j]
52         v  := sort_by_freq(m1, m2)
53         if v<=0 {
54                 return true
55         }
56         return false
57 }
58
59 func (self *maddresses) Swap(i,j int) {
60         (*self)[i], (*self)[j] = (*self)[j], (*self)[i]
61 }
62
63 // find most frequent real name for each mail address
64 func frequent_fullname(freqs frequencies) string {
65         var maxfreq uint = 0
66         fullname := ""
67         freqs_sz := len(freqs)
68
69         for mail,freq := range freqs {
70                 if (freq > maxfreq && mail != "") || freqs_sz == 1 {
71                         // only use the entry if it has a real name
72                         // or if this is the only entry
73                         maxfreq = freq
74                         fullname = mail
75                 }
76         }
77         return fullname
78 }
79
80 func addresses_by_frequency(msgs *notmuch.Messages, name string, pass uint, addr_to_realname *map[string]*frequencies) *frequencies {
81
82         freqs := make(frequencies)
83
84         pattern := `\s*(("(\.|[^"])*"|[^,])*<?(?mail\b\w+([-+.]\w+)*\@\w+[-\.\w]*\.([-\.\w]+)*\w\b)>?)`
85         // pattern := "\\s*((\\\"(\\\\.|[^\\\\\"])*\\\"|[^,])*" +
86         //      "<?(?P<mail>\\b\\w+([-+.]\\w+)*\\@\\w+[-\\.\\w]*\\.([-\\.\\w]+)*\\w\\b)>?)"
87         pattern = `.*` + strings.ToLower(name) + `.*`
88         var re *regexp.Regexp = nil
89         var err os.Error = nil
90         if re,err = regexp.Compile(pattern); err != nil {
91                 log.Printf("error: %v\n", err)
92                 return &freqs
93         }
94         
95         headers := []string{"from"}
96         if pass == 1 {
97                 headers = append(headers, "to", "cc", "bcc")
98         }
99
100         for ;msgs.Valid();msgs.MoveToNext() {
101                 msg := msgs.Get()
102                 //println("==> msg [", msg.GetMessageId(), "]")
103                 for _,header := range headers {
104                         froms := strings.ToLower(msg.GetHeader(header))
105                         //println("  froms: ["+froms+"]")
106                         for _,from := range strings.Split(froms, ",", -1) {
107                                 from = strings.Trim(from, " ")
108                                 match := re.FindString(from)
109                                 //println("  -> match: ["+match+"]")
110                                 occ,ok := freqs[match]
111                                 if !ok {
112                                         freqs[match] = 0
113                                         occ = 0
114                                 }
115                                 freqs[match] = occ+1
116                         }
117                 }
118         }
119         return &freqs
120 }
121
122 func search_address_passes(queries [3]*notmuch.Query, name string) []string {
123         var val []string
124         addr_freq := make(map[string]*mail_addr_freq)
125         addr_to_realname := make(map[string]*frequencies)
126
127         var pass uint = 0 // 0-based
128         for _,query := range queries {
129                 if query == nil {
130                         //println("**warning: idx [",idx,"] contains a nil query")
131                         continue
132                 }
133                 msgs := query.SearchMessages()
134                 ht := addresses_by_frequency(msgs, name, pass, &addr_to_realname)
135                 for addr, count := range *ht {
136                         freq,ok := addr_freq[addr]
137                         if !ok {
138                                 freq = &mail_addr_freq{addr:addr, count:[3]uint{0,0,0}}
139                         }
140                         freq.count[pass] = count
141                         addr_freq[addr] = freq
142                 }
143                 msgs.Destroy()
144                 pass += 1
145         }
146
147         addrs := make(maddresses, len(addr_freq))
148         {
149                 iaddr := 0
150                 for _, freq := range addr_freq {
151                         addrs[iaddr] = freq
152                         iaddr += 1
153                 }
154         }
155         sort.Sort(&addrs)
156
157         for _,addr := range addrs {
158                 freqs,ok := addr_to_realname[addr.addr]
159                 if ok {
160                         val = append(val, frequent_fullname(*freqs))
161                 } else {
162                         val = append(val, addr.addr)
163                 }
164         }
165         //println("val:",val)
166         return val
167 }
168
169 type address_matcher struct {
170         // the notmuch database
171         db *notmuch.Database
172         // full path of the notmuch database
173         user_db_path string
174         // user primary email
175         user_primary_email string
176         // user tag to mark from addresses as in the address book
177         user_addrbook_tag string
178 }
179
180 func new_address_matcher() *address_matcher {
181         var cfg *config.Config
182         var err os.Error
183
184         // honor NOTMUCH_CONFIG
185         home := os.Getenv("NOTMUCH_CONFIG")
186         if home == "" {
187                 home = os.Getenv("HOME")
188         }
189
190         if cfg,err = config.ReadDefault(path.Join(home, ".notmuch-config")); err != nil {
191                 log.Fatalf("error loading config file:",err)
192         }
193
194         db_path,_ := cfg.String("database", "path")
195         primary_email,_ := cfg.String("user", "primary_email")
196         addrbook_tag,err := cfg.String("user", "addrbook_tag")
197         if err != nil {
198                 addrbook_tag = "addressbook"
199         }
200
201         self := &address_matcher{db:nil, 
202                                  user_db_path:db_path,
203                                  user_primary_email:primary_email,
204                                  user_addrbook_tag:addrbook_tag}
205         return self
206 }
207
208 func (self *address_matcher) run(name string) {
209         queries := [3]*notmuch.Query{}
210         
211         // open the database
212         self.db = notmuch.OpenDatabase(self.user_db_path, 
213                 notmuch.DATABASE_MODE_READ_ONLY)
214
215         // pass 1: look at all from: addresses with the address book tag
216         query := "tag:" + self.user_addrbook_tag
217         if name != "" {
218                 query = query + " and from:" + name + "*"
219         }
220         queries[0] = self.db.CreateQuery(query)
221
222         // pass 2: look at all to: addresses sent from our primary mail
223         query = ""
224         if name != "" {
225                 query = "to:"+name+"*"
226         }
227         if self.user_primary_email != "" {
228                 query = query + " from:" + self.user_primary_email
229         }
230         queries[1] = self.db.CreateQuery(query)
231
232         // if that leads only to a few hits, we check every from too
233         if queries[0].CountMessages() + queries[1].CountMessages() < 10 {
234                 query = ""
235                 if name != "" {
236                         query = "from:"+name+"*"
237                 }
238                 queries[2] = self.db.CreateQuery(query)
239         }
240         
241         // actually retrieve and sort addresses
242         results := search_address_passes(queries, name)
243         for _,v := range results {
244                 if v != "" && v != "\n" {
245                         fmt.Println(v)
246                 }
247         }
248         return
249 }
250
251 func main() {
252         //fmt.Println("args:",os.Args)
253         app := new_address_matcher()
254         name := ""
255         if len(os.Args) > 1 {
256                 name = os.Args[1]
257         }
258         app.run(name)
259 }