X-Git-Url: https://git.notmuchmail.org/git?p=notmuch;a=blobdiff_plain;f=test%2Fcorpus%2Fbar%2Fbaz%2Fcur%2F26%3A2%2C;fp=test%2Fcorpus%2Fbar%2Fbaz%2Fcur%2F26%3A2%2C;h=f3c5f53dea7ec5b435e92cf5e0b725d6abed3309;hp=0000000000000000000000000000000000000000;hb=ded713c39d8b0221a3b1b2b52a74966c20c3aba8;hpb=7630f300ba52f4aab22ee696fe1507d0ef9790bc diff --git a/test/corpus/bar/baz/cur/26:2, b/test/corpus/bar/baz/cur/26:2, new file mode 100644 index 00000000..f3c5f53d --- /dev/null +++ b/test/corpus/bar/baz/cur/26:2, @@ -0,0 +1,121 @@ +From: "Stewart Smith" +To: notmuch@notmuchmail.org +Date: Wed, 18 Nov 2009 12:56:40 +1100 +Subject: [notmuch] [PATCH 2/2] Read mail directory in inode number order +Message-ID: <1258509400-32511-1-git-send-email-stewart@flamingspork.com> + +This gives a rather decent reduction in number of seeks required when +reading a Maildir that isn't in pagecache. + +Most filesystems give some locality on disk based on inode numbers. +In ext[234] this is the inode tables, in XFS groups of sequential inode +numbers are together on disk and the most significant bits indicate +allocation group (i.e inode 1,000,000 is always after inode 1,000). + +With this patch, we read in the whole directory, sort by inode number +before stat()ing the contents. + +Ideally, directory is sequential and then we make one scan through the +file system stat()ing. + +Since the universe is not ideal, we'll probably seek during reading the +directory and a fair bit while reading the inodes themselves. + +However... with readahead, and stat()ing in inode order, we should be +in the best place possible to hit the cache. + +In a (not very good) benchmark of "how long does it take to find the first +15,000 messages in my Maildir after 'echo 3 > /proc/sys/vm/drop_caches'", +this patch consistently cut at least 8 seconds off the scan time. + +Without patch: 50 seconds +With patch: 38-42 seconds. + +(I did this in a previous maildir reading project and saw large improvements too) +--- + notmuch-new.c | 32 +++++++++++++++----------------- + 1 files changed, 15 insertions(+), 17 deletions(-) + +diff --git a/notmuch-new.c b/notmuch-new.c +index 83a05ba..11fad8c 100644 +--- a/notmuch-new.c ++++ b/notmuch-new.c +@@ -73,6 +73,11 @@ add_files_print_progress (add_files_state_t *state) + fflush (stdout); + } + ++static int ino_cmp(const struct dirent **a, const struct dirent **b) ++{ ++ return ((*a)->d_ino < (*b)->d_ino)? -1: 1; ++} ++ + /* Examine 'path' recursively as follows: + * + * o Ask the filesystem for the mtime of 'path' (path_mtime) +@@ -100,13 +105,12 @@ add_files_recursive (notmuch_database_t *notmuch, + add_files_state_t *state) + { + DIR *dir = NULL; +- struct dirent *e, *entry = NULL; +- int entry_length; +- int err; ++ struct dirent *entry = NULL; + char *next = NULL; + time_t path_mtime, path_dbtime; + notmuch_status_t status, ret = NOTMUCH_STATUS_SUCCESS; + notmuch_message_t *message = NULL; ++ struct dirent **namelist = NULL; + + /* If we're told to, we bail out on encountering a read-only + * directory, (with this being a clear clue from the user to +@@ -122,31 +126,23 @@ add_files_recursive (notmuch_database_t *notmuch, + path_mtime = st->st_mtime; + + path_dbtime = notmuch_database_get_timestamp (notmuch, path); ++ int n_entries= scandir(path, &namelist, 0, ino_cmp); + +- dir = opendir (path); +- if (dir == NULL) { ++ if (n_entries == -1) { + fprintf (stderr, "Error opening directory %s: %s\n", + path, strerror (errno)); + ret = NOTMUCH_STATUS_FILE_ERROR; + goto DONE; + } + +- entry_length = offsetof (struct dirent, d_name) + +- pathconf (path, _PC_NAME_MAX) + 1; +- entry = malloc (entry_length); ++ int i=0; + + while (!interrupted) { +- err = readdir_r (dir, entry, &e); +- if (err) { +- fprintf (stderr, "Error reading directory: %s\n", +- strerror (errno)); +- ret = NOTMUCH_STATUS_FILE_ERROR; +- goto DONE; +- } +- +- if (e == NULL) ++ if (i == n_entries) + break; + ++ entry= namelist[i++]; ++ + /* If this directory hasn't been modified since the last + * add_files, then we only need to look further for + * sub-directories. */ +@@ -243,6 +239,8 @@ add_files_recursive (notmuch_database_t *notmuch, + free (entry); + if (dir) + closedir (dir); ++ if (namelist) ++ free (namelist); + + return ret; + } +-- +1.6.3.3 + +