git.notmuchmail.org Git - notmuch/blob - devel/notmuch-web/nmweb.py

   1 #!/usr/bin/env python
   2
   3 from __future__ import absolute_import
   4
   5 try:
   6   from urllib.parse import quote_plus
   7   from urllib.parse import unquote_plus
   8 except ImportError:
   9   from urllib import quote_plus
  10   from urllib import unquote_plus
  11
  12 from datetime import datetime
  13 from mailbox import MaildirMessage
  14 import mimetypes
  15 import email
  16 import re
  17 import html
  18 import os
  19
  20 import bleach
  21 import web
  22 from notmuch2 import Database
  23 from jinja2 import Environment, FileSystemLoader # FIXME to PackageLoader
  24 from jinja2 import Markup
  25 try:
  26   import bjoern # from https://github.com/jonashaag/bjoern/
  27   use_bjoern = True
  28 except:
  29   use_bjoern = False
  30
  31 # Configuration options
  32 safe_tags = bleach.sanitizer.ALLOWED_TAGS + \
  33             [u'div', u'span', u'p', u'br', u'table', u'tr', u'td', u'th']
  34 linkify_plaintext = True # delays page load by about 0.02s of 0.20s budget
  35 show_thread_nav = True   # delays page load by about 0.04s of 0.20s budget
  36
  37 prefix = os.environ.get('NMWEB_PREFIX', "http://localhost:8080")
  38 webprefix = os.environ.get('NMWEB_STATIC', prefix + "/static")
  39 cachedir = os.environ.get('NMWEB_CACHE', "static/cache") # special for webpy server; changeable if using your own
  40 cachepath = os.environ.get('NMWEB_CACHE_PATH', cachedir) # location of static cache in the local filesystem
  41
  42 if 'NMWEB_DEBUG' in os.environ:
  43   web.config.debug = True
  44 else:
  45   web.config.debug = False
  46
  47 # End of config options
  48
  49 env = Environment(autoescape=True,
  50                   loader=FileSystemLoader('templates'))
  51
  52 urls = (
  53   '/', 'index',
  54   '/search/(.*)', 'search',
  55   '/show/(.*)', 'show',
  56 )
  57
  58 def urlencode_filter(s):
  59   if type(s) == 'Markup':
  60     s = s.unescape()
  61   s = s.encode('utf8')
  62   s = quote_plus(s)
  63   return Markup(s)
  64 env.filters['url'] = urlencode_filter
  65
  66 class index:
  67   def GET(self):
  68     web.header('Content-type', 'text/html')
  69     base = env.get_template('base.html')
  70     template = env.get_template('index.html')
  71     db = Database()
  72     tags = db.tags
  73     return template.render(tags=tags,
  74                            title="Notmuch webmail",
  75                            prefix=prefix,
  76                            sprefix=webprefix)
  77
  78 class search:
  79   def GET(self, terms):
  80     redir = False
  81     if web.input(terms=None).terms:
  82       redir = True
  83       terms = web.input().terms
  84     terms = unquote_plus (terms)
  85     if web.input(afters=None).afters:
  86       afters = web.input(afters=None).afters[:-3]
  87     else:
  88       afters = '0'
  89     if web.input(befores=None).befores:
  90       befores = web.input(befores=None).befores
  91     else:
  92       befores = '4294967296' # 2^32
  93     try:
  94       if int(afters) > 0 or int(befores) < 4294967296:
  95         redir = True
  96         terms += ' date:@%s..@%s' % (int(afters), int(befores))
  97     except ValueError:
  98       pass
  99     if redir:
 100       raise web.seeother('/search/%s' % quote_plus(terms.encode('utf8')))
 101     web.header('Content-type', 'text/html')
 102     db = Database()
 103     ts = db.threads(query=terms, sort=Database.SORT.NEWEST_FIRST)
 104     template = env.get_template('search.html')
 105     return template.generate(terms=terms,
 106                              ts=ts,
 107                              title=terms,
 108                              prefix=prefix,
 109                              sprefix=webprefix)
 110
 111 def format_time_range(start, end):
 112   if end-start < (60*60*24):
 113     time = datetime.fromtimestamp(start).strftime('%Y %b %d %H:%M')
 114   else:
 115     start = datetime.fromtimestamp(start).strftime("%Y %b %d")
 116     end = datetime.fromtimestamp(end).strftime("%Y %b %d")
 117     time = "%s through %s" % (start, end)
 118   return time
 119 env.globals['format_time_range'] = format_time_range
 120
 121 def mailto_addrs(msg,header_name):
 122   try:
 123     hdr = msg.header(header_name)
 124   except LookupError:
 125     return ''
 126
 127   frm = email.utils.getaddresses([hdr])
 128   return ', '.join(['<a href="mailto:%s">%s</a>' % ((l, p) if p else (l, l)) for (p, l) in frm])
 129 env.globals['mailto_addrs'] = mailto_addrs
 130
 131 def link_msg(msg):
 132   lnk = quote_plus(msg.messageid.encode('utf8'))
 133   try:
 134     subj = html.escape(msg.header('Subject'))
 135   except LookupError:
 136     subj = ""
 137   out = '<a href="%s/show/%s">%s</a>' % (prefix, lnk, subj)
 138   return out
 139 env.globals['link_msg'] = link_msg
 140
 141 def show_msgs(msgs):
 142   r = '<ul>'
 143   for msg in msgs:
 144     red = 'color:black; font-style:normal'
 145     if msg.matched:
 146       red = 'color:red; font-style:italic'
 147     frm = mailto_addrs(msg,'From')
 148     lnk = link_msg(msg)
 149     tags = ", ".join(msg.tags)
 150     rs = show_msgs(msg.replies())
 151     r += '<li><span style="%s">%s&mdash;%s</span> [%s] %s</li>' % (red, frm, lnk, tags, rs)
 152   r += '</ul>'
 153   return r
 154 env.globals['show_msgs'] = show_msgs
 155
 156 # As email.message.walk, but showing close tags as well
 157 def mywalk(self):
 158   yield self
 159   if self.is_multipart():
 160     for subpart in self.get_payload():
 161       for subsubpart in mywalk(subpart):
 162         yield subsubpart
 163     yield 'close-div'
 164
 165 class show:
 166   def GET(self, mid):
 167     web.header('Content-type', 'text/html')
 168     db = Database()
 169     try:
 170       m = db.find(mid)
 171     except:
 172       raise web.notfound("No such message id.")
 173     template = env.get_template('show.html')
 174     # FIXME add reply-all link with email.urils.getaddresses
 175     # FIXME add forward link using mailto with body parameter?
 176     return template.render(m=m,
 177                            mid=mid,
 178                            title=m.header('Subject'),
 179                            prefix=prefix,
 180                            sprefix=webprefix)
 181
 182 def thread_nav(m):
 183   if not show_thread_nav: return
 184   db = Database()
 185   thread = next(db.threads('thread:'+m.threadid))
 186   prv = None
 187   found = False
 188   nxt = None
 189   for msg in thread:
 190     if m == msg:
 191       found = True
 192     elif not found:
 193       prv = msg
 194     else: # found message, but not on this loop
 195       nxt = msg
 196       break
 197   yield "<hr><ul>"
 198   if prv: yield "<li>Previous message (by thread): %s</li>" % link_msg(prv)
 199   if nxt: yield "<li>Next message (by thread): %s</li>" % link_msg(nxt)
 200   yield "</ul><h3>Thread:</h3>"
 201   # FIXME show now takes three queries instead of 1;
 202   # can we yield the message body while computing the thread shape?
 203   thread = next(db.threads('thread:'+m.threadid))
 204   yield show_msgs(thread.toplevel())
 205   return
 206 env.globals['thread_nav'] = thread_nav
 207
 208 def format_message(nm_msg, mid):
 209   fn = list(nm_msg.filenames())[0]
 210   msg = MaildirMessage(open(fn, 'rb'))
 211   return format_message_walk(msg, mid)
 212
 213 def decodeAnyway(txt, charset='ascii'):
 214   try:
 215     out = txt.decode(charset)
 216   except:
 217     try:
 218       out = txt.decode('utf-8')
 219     except UnicodeDecodeError:
 220       out = txt.decode('latin1')
 221   return out
 222
 223 def require_protocol_prefix(attrs, new=False):
 224   if not new:
 225     return attrs
 226   link_text = attrs[u'_text']
 227   if link_text.startswith(('http:', 'https:', 'mailto:', 'git:', 'id:')):
 228     return attrs
 229   return None
 230
 231 # Bleach doesn't even try to linkify id:... text, so no point invoking this yet
 232 def modify_id_links(attrs, new=False):
 233   if attrs[(None, u'href')].startswith(u'id:'):
 234     attrs[(None, u'href')] = prefix + "/show/" + attrs[(None, u'href')][3:]
 235   return attrs
 236
 237 def css_part_id(content_type, parts=[]):
 238   c = content_type.replace('/', '-')
 239   out = "-".join(parts + [c])
 240   return out
 241
 242 def format_message_walk(msg, mid):
 243   counter = 0
 244   cid_refd = []
 245   parts = ['main']
 246   for part in mywalk(msg):
 247     if part == 'close-div':
 248       parts.pop()
 249       yield '</div>'
 250     elif part.get_content_maintype() == 'multipart':
 251       yield '<div class="multipart-%s" id="%s">' % \
 252           (part.get_content_subtype(), css_part_id(part.get_content_type(), parts))
 253       parts.append(part.get_content_subtype())
 254       if part.get_content_subtype() == 'alternative':
 255         yield '<ul>'
 256         for subpart in part.get_payload():
 257           yield ('<li><a href="#%s">%s</a></li>' %
 258                  (css_part_id(subpart.get_content_type(), parts),
 259                   subpart.get_content_type()))
 260         yield '</ul>'
 261     elif part.get_content_type() == 'message/rfc822':
 262       # FIXME extract subject, date, to/cc/from into a separate template and use it here
 263       yield '<div class="message-rfc822">'
 264     elif part.get_content_maintype() == 'text':
 265       if part.get_content_subtype() == 'plain':
 266         yield '<div id="%s">' % css_part_id(part.get_content_type(), parts)
 267         yield '<pre>'
 268         out = part.get_payload(decode=True)
 269         out = decodeAnyway(out, part.get_content_charset('ascii'))
 270         out = html.escape(out)
 271         out = out.encode('ascii', 'xmlcharrefreplace').decode('ascii')
 272         if linkify_plaintext: out = bleach.linkify(out, callbacks=[require_protocol_prefix])
 273         yield out
 274         yield '</pre></div>'
 275       elif part.get_content_subtype() == 'html':
 276         yield '<div id="%s">' % css_part_id(part.get_content_type(), parts)
 277         unb64 = part.get_payload(decode=True)
 278         decoded = decodeAnyway(unb64, part.get_content_charset('ascii'))
 279         cid_refd += find_cids(decoded)
 280         part.set_payload(bleach.clean(replace_cids(decoded, mid), tags=safe_tags).
 281                          encode(part.get_content_charset('ascii'), 'xmlcharrefreplace'))
 282         (filename, cid) = link_to_cached_file(part, mid, counter)
 283         counter += 1
 284         yield '<iframe class="embedded-html" src="%s"></iframe>' % \
 285             os.path.join(prefix, cachedir, mid, filename)
 286         yield '</div>'
 287       else:
 288         yield '<div id="%s">' % css_part_id(part.get_content_type(), parts)
 289         (filename, cid) = link_to_cached_file(part, mid, counter)
 290         counter += 1
 291         yield '<a href="%s">%s (%s)</a>' % (os.path.join(prefix,
 292                                                          cachedir,
 293                                                          mid,
 294                                                          filename),
 295                                             filename,
 296                                             part.get_content_type())
 297         yield '</div>'
 298     elif part.get_content_maintype() == 'image':
 299       (filename, cid) = link_to_cached_file(part, mid, counter)
 300       if cid not in cid_refd:
 301         counter += 1
 302         yield '<img src="%s" alt="%s">' % (os.path.join(prefix,
 303                                                         cachedir,
 304                                                         mid,
 305                                                         filename),
 306                                            filename)
 307     else:
 308       (filename, cid) = link_to_cached_file(part, mid, counter)
 309       counter += 1
 310       yield '<a href="%s">%s (%s)</a>' % (os.path.join(prefix,
 311                                                        cachedir,
 312                                                        mid,
 313                                                        filename),
 314                                           filename,
 315                                           part.get_content_type())
 316 env.globals['format_message'] = format_message
 317
 318 def replace_cids(body, mid):
 319   return body.replace('cid:', os.path.join(prefix, cachedir, mid)+'/')
 320
 321 def find_cids(body):
 322   return re.findall(r'cid:([^ "\'>]*)', body)
 323
 324 def link_to_cached_file(part, mid, counter):
 325   filename = part.get_filename()
 326   if not filename:
 327     ext = mimetypes.guess_extension(part.get_content_type())
 328     if not ext:
 329       ext = '.bin'
 330     filename = 'part-%03d%s' % (counter, ext)
 331   try:
 332     os.makedirs(os.path.join(cachepath, mid))
 333   except OSError:
 334     pass
 335   fn = os.path.join(cachepath, mid, filename) # FIXME escape mid, filename
 336   fp = open(fn, 'wb')
 337   if part.get_content_maintype() == 'text':
 338     data = part.get_payload(decode=True)
 339     data = decodeAnyway(data, part.get_content_charset('ascii')).encode('utf-8')
 340   else:
 341     try:
 342       data = part.get_payload(decode=True)
 343     except:
 344       data = part.get_payload(decode=False)
 345   if data:
 346     fp.write(data)
 347   fp.close()
 348   if 'Content-ID' in part:
 349     cid = part['Content-ID']
 350     if cid[0] == '<' and cid[-1] == '>': cid = cid[1:-1]
 351     cid_fn = os.path.join(cachepath, mid, cid) # FIXME escape mid, cid
 352     try:
 353       os.unlink(cid_fn)
 354     except OSError:
 355       pass
 356     os.link(fn, cid_fn)
 357     return (filename, cid)
 358   else:
 359     return (filename, None)
 360
 361 if __name__ == '__main__':
 362   app = web.application(urls, globals())
 363   if use_bjoern:
 364     bjoern.run(app.wsgifunc(), "127.0.0.1", 8080)
 365   else:
 366     app.run()