]> git.notmuchmail.org Git - notmuch/blobdiff - notmuch-git.py
emacs: add notmuch-search-edit-search and notmuch-tree-edit-search
[notmuch] / notmuch-git.py
index b4253c0dfc4dc739d0bb5b2cd2d06396f9f92e71..ceb86fbc14dfa97db10dfec87732452a648edc0b 100644 (file)
@@ -46,10 +46,12 @@ _LOG.addHandler(_logging.StreamHandler())
 
 NOTMUCH_GIT_DIR = None
 TAG_PREFIX = None
+FORMAT_VERSION = 1
 
 _HEX_ESCAPE_REGEX = _re.compile('%[0-9A-F]{2}')
 _TAG_DIRECTORY = 'tags/'
-_TAG_FILE_REGEX = _re.compile(_TAG_DIRECTORY + '(?P<id>[^/]*)/(?P<tag>[^/]*)')
+_TAG_FILE_REGEX = ( _re.compile(_TAG_DIRECTORY + '(?P<id>[^/]*)/(?P<tag>[^/]*)'),
+                    _re.compile(_TAG_DIRECTORY + '([0-9a-f]{2}/){2}(?P<id>[^/]*)/(?P<tag>[^/]*)'))
 
 # magic hash for Git (git hash-object -t blob /dev/null)
 _EMPTYBLOB = 'e69de29bb2d1d6434b8b29ae775ad8c2e48c5391'
@@ -265,7 +267,7 @@ def archive(treeish='HEAD', args=()):
     Each tag $tag for message with Message-Id $id is written to
     an empty file
 
-      tags/encode($id)/encode($tag)
+      tags/hash1(id)/hash2(id)/encode($id)/encode($tag)
 
     The encoding preserves alphanumerics, and the characters
     "+-_@=.:," (not the quotes).  All other octets are replaced with
@@ -450,7 +452,7 @@ def fetch(remote=None):
     _git(args=args, wait=True)
 
 
-def init(remote=None):
+def init(remote=None,format_version=None):
     """
     Create an empty notmuch-git repository.
 
@@ -464,14 +466,34 @@ def init(remote=None):
     except FileExistsError:
         pass
 
+    if not format_version:
+        format_version = 1
+
+    format_version=int(format_version)
+
+    if format_version > 1 or format_version < 0:
+        _LOG.error("Illegal format version {:d}".format(format_version))
+        _sys.exit(1)
+
     _spawn(args=['git', '--git-dir', NOTMUCH_GIT_DIR, 'init',
                  '--initial-branch=master', '--quiet', '--bare'], wait=True)
     _git(args=['config', 'core.logallrefupdates', 'true'], wait=True)
     # create an empty blob (e69de29bb2d1d6434b8b29ae775ad8c2e48c5391)
     _git(args=['hash-object', '-w', '--stdin'], input='', wait=True)
+    allow_empty=('--allow-empty',)
+    if format_version >= 1:
+        allow_empty=()
+        # create a blob for the FORMAT file
+        (status, stdout, _) = _git(args=['hash-object', '-w', '--stdin'], stdout=_subprocess.PIPE,
+                                   input='{:d}\n'.format(format_version), wait=True)
+        verhash=stdout.rstrip()
+        _LOG.debug('hash of FORMAT blob = {:s}'.format(verhash))
+        # Add FORMAT to the index
+        _git(args=['update-index', '--add', '--cacheinfo', '100644,{:s},FORMAT'.format(verhash)], wait=True)
+
     _git(
         args=[
-            'commit', '--allow-empty', '-m', 'Start a new nmbug repository'
+            'commit', *allow_empty, '-m', 'Start a new notmuch-git repository'
         ],
         additional_env={'GIT_WORK_TREE': NOTMUCH_GIT_DIR},
         wait=True)
@@ -676,6 +698,32 @@ def _is_unmerged(ref='@{upstream}'):
         stdout=_subprocess.PIPE, wait=True)
     return base != fetch_head
 
+class DatabaseCache:
+    def __init__(self):
+        try:
+            from notmuch2 import Database
+            self._notmuch = Database()
+        except ImportError:
+            self._notmuch = None
+        self._known = {}
+
+    def known(self,id):
+        if id in self._known:
+            return self._known[id];
+
+        if self._notmuch:
+            try:
+                _ = self._notmuch.find(id)
+                self._known[id] = True
+            except LookupError:
+                self._known[id] = False
+        else:
+            (_, stdout, stderr) = _spawn(
+                args=['notmuch', 'search', '--output=files', 'id:{0}'.format(id)],
+                stdout=_subprocess.PIPE,
+                wait=True)
+            self._known[id] = stdout != None
+        return self._known[id]
 
 @timed
 def get_status():
@@ -683,14 +731,11 @@ def get_status():
         'deleted': {},
         'missing': {},
         }
+    db = DatabaseCache()
     with PrivateIndex(repo=NOTMUCH_GIT_DIR, prefix=TAG_PREFIX) as index:
         maybe_deleted = index.diff(filter='D')
         for id, tags in maybe_deleted.items():
-            (_, stdout, stderr) = _spawn(
-                args=['notmuch', 'search', '--output=files', 'id:{0}'.format(id)],
-                stdout=_subprocess.PIPE,
-                wait=True)
-            if stdout:
+            if db.known(id):
                 status['deleted'][id] = tags
             else:
                 status['missing'][id] = tags
@@ -716,6 +761,7 @@ class PrivateIndex:
         self.lastmod = None
         self.checksum = None
         self._load_cache_file()
+        self.file_tree = None
         self._index_tags()
 
     def __enter__(self):
@@ -741,6 +787,43 @@ class PrivateIndex:
             _LOG.error("Error decoding cache")
             _sys.exit(1)
 
+    @timed
+    def _read_file_tree(self):
+        self.file_tree = {}
+
+        with _git(
+                args=['ls-files', 'tags'],
+                additional_env={'GIT_INDEX_FILE': self.index_path},
+                stdout=_subprocess.PIPE) as git:
+            for file in git.stdout:
+                dir=_os.path.dirname(file)
+                tag=_os.path.basename(file).rstrip()
+                if dir not in self.file_tree:
+                    self.file_tree[dir]=[tag]
+                else:
+                    self.file_tree[dir].append(tag)
+
+
+    def _clear_tags_for_message(self, id):
+        """
+        Clear any existing index entries for message 'id'
+
+        Neither 'id' nor the tags in 'tags' should be encoded/escaped.
+        """
+
+        if self.file_tree == None:
+            self._read_file_tree()
+
+        dir = _id_path(id)
+
+        if dir not in self.file_tree:
+            return
+
+        for file in self.file_tree[dir]:
+            line = '0 0000000000000000000000000000000000000000\t{:s}/{:s}\n'.format(dir,file)
+            yield line
+
+
     @timed
     def _index_tags(self):
         "Write notmuch tags to private git index."
@@ -776,7 +859,7 @@ class PrivateIndex:
                         if tag.startswith(prefix)]
                     id = _xapian_unquote(string=id)
                     if clear_tags:
-                        for line in _clear_tags_for_message(index=self.index_path, id=id):
+                        for line in self._clear_tags_for_message(id=id):
                             git.stdin.write(line)
                     for line in _index_tags_for_message(
                             id=id, status='A', tags=tags):
@@ -813,24 +896,6 @@ def _read_index_checksum (index_path):
     except FileNotFoundError:
         return None
 
-
-def _clear_tags_for_message(index, id):
-    """
-    Clear any existing index entries for message 'id'
-
-    Neither 'id' nor the tags in 'tags' should be encoded/escaped.
-    """
-
-    dir = 'tags/{id}'.format(id=_hex_quote(string=id))
-
-    with _git(
-            args=['ls-files', dir],
-            additional_env={'GIT_INDEX_FILE': index},
-            stdout=_subprocess.PIPE) as git:
-        for file in git.stdout:
-            line = '0 0000000000000000000000000000000000000000\t{:s}\n'.format(file.strip())
-            yield line
-
 def _read_database_lastmod():
     with _spawn(
             args=['notmuch', 'count', '--lastmod', '*'],
@@ -838,6 +903,21 @@ def _read_database_lastmod():
         (count,uuid,lastmod_str) = notmuch.stdout.readline().split()
         return (count,uuid,int(lastmod_str))
 
+def _id_path(id):
+    hid=_hex_quote(string=id)
+    from hashlib import blake2b
+
+    if FORMAT_VERSION==0:
+        return 'tags/{hid}'.format(hid=hid)
+    elif FORMAT_VERSION==1:
+        idhash = blake2b(hid.encode('utf8'), digest_size=2).hexdigest()
+        return 'tags/{dir1}/{dir2}/{hid}'.format(
+            hid=hid,
+            dir1=idhash[0:2],dir2=idhash[2:])
+    else:
+        _LOG.error("Unknown format version",FORMAT_VERSION)
+        _sys.exit(1)
+
 def _index_tags_for_message(id, status, tags):
     """
     Update the Git index to either create or delete an empty file.
@@ -852,8 +932,7 @@ def _index_tags_for_message(id, status, tags):
         hash = '0000000000000000000000000000000000000000'
 
     for tag in tags:
-        path = 'tags/{id}/{tag}'.format(
-            id=_hex_quote(string=id), tag=_hex_quote(string=tag))
+        path = '{ipath}/{tag}'.format(ipath=_id_path(id),tag=_hex_quote(string=tag))
         yield '{mode} {hash}\t{path}\n'.format(mode=mode, hash=hash, path=path)
 
 
@@ -869,7 +948,7 @@ def _diff_refs(filter, a='HEAD', b='@{upstream}'):
 def _unpack_diff_lines(stream):
     "Iterate through (id, tag) tuples in a diff stream."
     for line in stream:
-        match = _TAG_FILE_REGEX.match(line.strip())
+        match = _TAG_FILE_REGEX[FORMAT_VERSION].match(line.strip())
         if not match:
             message = 'non-tag line in diff: {!r}'.format(line.strip())
             if line.startswith(_TAG_DIRECTORY):
@@ -907,6 +986,17 @@ def _notmuch_config_get(key):
         _sys.exit(1)
     return stdout.rstrip()
 
+def read_format_version():
+    try:
+        (status, stdout, stderr) = _git(
+            args=['cat-file', 'blob', 'master:FORMAT'],
+            stdout=_subprocess.PIPE, stderr=_subprocess.PIPE, wait=True)
+    except SubprocessError as e:
+        _LOG.debug("failed to read FORMAT file from git, assuming format version 0")
+        return 0
+
+    return int(stdout)
+
 # based on BaseDirectory.save_data_path from pyxdg (LGPL2+)
 def xdg_data_path(profile):
     resource = _os.path.join('notmuch',profile,'git')
@@ -1008,6 +1098,11 @@ if __name__ == '__main__':
             subparser.add_argument(
                 'command', metavar='COMMAND', nargs='?',
                 help='The command to show help for.')
+        elif command == 'init':
+            subparser.add_argument(
+                '--format-version', metavar='VERSION',
+                default = None,
+                help='create format VERSION repository.')
         elif command == 'log':
             subparser.add_argument(
                 'args', metavar='ARG', nargs='*',
@@ -1104,6 +1199,11 @@ if __name__ == '__main__':
     _LOG.debug('prefix = {:s}'.format(TAG_PREFIX))
     _LOG.debug('repository = {:s}'.format(NOTMUCH_GIT_DIR))
 
+    if args.func != init:
+        FORMAT_VERSION = read_format_version()
+
+    _LOG.debug('FORMAT_VERSION={:d}'.format(FORMAT_VERSION))
+
     if args.func == help:
         arg_names = ['command']
     else: