X-Git-Url: https://git.notmuchmail.org/git?p=notmuch;a=blobdiff_plain;f=notmuch-new.c;h=b2b6043a5de051598051c418bfd7d3a46745e597;hp=ca68a68473af918dc7004762a8f42f3c85007a67;hb=03d5175001185a7eb0b3219665ea0974147da8cc;hpb=999f4c895c2442f50f943f6a8b391e1adc9cbba4 diff --git a/notmuch-new.c b/notmuch-new.c index ca68a684..b2b6043a 100644 --- a/notmuch-new.c +++ b/notmuch-new.c @@ -87,11 +87,18 @@ add_files_print_progress (add_files_state_t *state) fflush (stdout); } -static int ino_cmp(const struct dirent **a, const struct dirent **b) +static int +dirent_sort_inode (const struct dirent **a, const struct dirent **b) { return ((*a)->d_ino < (*b)->d_ino) ? -1 : 1; } +static int +dirent_sort_strcmp_name (const struct dirent **a, const struct dirent **b) +{ + return strcmp ((*a)->d_name, (*b)->d_name); +} + /* Test if the directory looks like a Maildir directory. * * Search through the array of directory entries to see if we can find all @@ -100,12 +107,14 @@ static int ino_cmp(const struct dirent **a, const struct dirent **b) * Return 1 if the directory looks like a Maildir and 0 otherwise. */ static int -is_maildir (struct dirent **entries, int count) +_entries_resemble_maildir (struct dirent **entries, int count) { int i, found = 0; for (i = 0; i < count; i++) { - if (entries[i]->d_type != DT_DIR) continue; + if (entries[i]->d_type != DT_DIR) + continue; + if (strcmp(entries[i]->d_name, "new") == 0 || strcmp(entries[i]->d_name, "cur") == 0 || strcmp(entries[i]->d_name, "tmp") == 0) @@ -121,178 +130,241 @@ is_maildir (struct dirent **entries, int count) /* Examine 'path' recursively as follows: * - * o Ask the filesystem for the mtime of 'path' (path_mtime) + * o Ask the filesystem for the mtime of 'path' (fs_mtime) + * o Ask the database for its timestamp of 'path' (db_mtime) + * + * o Ask the filesystem for files and directories within 'path' + * (via scandir and stored in fs_entries) + * o Ask the database for files and directories within 'path' + * (db_files and db_subdirs) * - * o Ask the database for its timestamp of 'path' (path_dbtime) + * o Pass 1: For each directory in fs_entries, recursively call into + * this same function. * - * o If 'path_mtime' > 'path_dbtime' + * o Pass 2: If 'fs_mtime' > 'db_mtime', then walk fs_entries + * simultaneously with db_files and db_subdirs. Look for one of + * three interesting cases: * - * o For each regular file in 'path' with mtime newer than the - * 'path_dbtime' call add_message to add the file to the - * database. + * 1. Regular file in fs_entries and not in db_files + * This is a new file to add_message into the database. * - * o For each sub-directory of path, recursively call into this - * same function. + * 2. Filename in db_files not in fs_entries. + * This is a file that has been removed from the mail store. * - * o Tell the database to update its time of 'path' to 'path_mtime' + * 3. Directory in db_subdirs not in fs_entries + * This is a directory that has been removed from the mail store. * - * The 'struct stat *st' must point to a structure that has already - * been initialized for 'path' by calling stat(). + * Note that the addition of a directory is not interesting here, + * since that will have been taken care of in pass 1. Also, we + * don't immediately act on file/directory removal since we must + * ensure that in the case of a rename that the new filename is + * added before the old filename is removed, (so that no + * information is lost from the database). + * + * o Tell the database to update its time of 'path' to 'fs_mtime' */ static notmuch_status_t add_files_recursive (notmuch_database_t *notmuch, const char *path, - struct stat *st, add_files_state_t *state) { DIR *dir = NULL; struct dirent *entry = NULL; char *next = NULL; - time_t path_mtime, path_dbtime; + time_t fs_mtime, db_mtime; notmuch_status_t status, ret = NOTMUCH_STATUS_SUCCESS; notmuch_message_t *message = NULL; - struct dirent **namelist = NULL; - int num_entries; + struct dirent **fs_entries = NULL; + int i, num_fs_entries; notmuch_directory_t *directory; + notmuch_filenames_t *db_files = NULL; + notmuch_filenames_t *db_subdirs = NULL; + struct stat st; + notmuch_bool_t is_maildir; - path_mtime = st->st_mtime; + if (stat (path, &st)) { + fprintf (stderr, "Error reading directory %s: %s\n", + path, strerror (errno)); + return NOTMUCH_STATUS_FILE_ERROR; + } + + if (! S_ISDIR (st.st_mode)) { + fprintf (stderr, "Error: %s is not a directory.\n", path); + return NOTMUCH_STATUS_FILE_ERROR; + } + + fs_mtime = st.st_mtime; directory = notmuch_database_get_directory (notmuch, path); - path_dbtime = notmuch_directory_get_mtime (directory); + db_mtime = notmuch_directory_get_mtime (directory); - num_entries = scandir (path, &namelist, 0, ino_cmp); + /* If the database knows about this directory, then we sort based + * on strcmp to match the database sorting. Otherwise, we can do + * inode-based sorting for faster filesystem operation. */ + num_fs_entries = scandir (path, &fs_entries, 0, + db_mtime ? + dirent_sort_strcmp_name : dirent_sort_inode); - if (num_entries == -1) { + if (num_fs_entries == -1) { fprintf (stderr, "Error opening directory %s: %s\n", path, strerror (errno)); ret = NOTMUCH_STATUS_FILE_ERROR; goto DONE; } - int i=0; + /* Pass 1: Recurse into all sub-directories. */ + is_maildir = _entries_resemble_maildir (fs_entries, num_fs_entries); - while (!interrupted) { - if (i == num_entries) + for (i = 0; i < num_fs_entries; i++) { + if (interrupted) break; - entry= namelist[i++]; + entry = fs_entries[i]; - /* If this directory hasn't been modified since the last - * add_files, then we only need to look further for - * sub-directories. */ - if (path_mtime <= path_dbtime && entry->d_type == DT_REG) + if (entry->d_type != DT_DIR) continue; /* Ignore special directories to avoid infinite recursion. - * Also ignore the .notmuch directory. + * Also ignore the .notmuch directory and any "tmp" directory + * that appears within a maildir. */ /* XXX: Eventually we'll want more sophistication to let the * user specify files to be ignored. */ if (strcmp (entry->d_name, ".") == 0 || strcmp (entry->d_name, "..") == 0 || - (entry->d_type == DT_DIR && - (strcmp (entry->d_name, "tmp") == 0) && - is_maildir (namelist, num_entries)) || + (is_maildir && strcmp (entry->d_name, "tmp") == 0) || strcmp (entry->d_name, ".notmuch") ==0) { continue; } next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name); + status = add_files_recursive (notmuch, next, state); + if (status && ret == NOTMUCH_STATUS_SUCCESS) + ret = status; + talloc_free (next); + next = NULL; + } + + /* If this directory hasn't been modified since the last + * "notmuch new", then we can skip the second pass entirely. */ + if (fs_mtime <= db_mtime) + goto DONE; - if (stat (next, st)) { - int err = errno; + /* Pass 2: Scan for new files, removed files, and removed directories. */ + db_files = notmuch_directory_get_child_files (directory); + db_subdirs = notmuch_directory_get_child_directories (directory); - switch (err) { - case ENOENT: - /* The file was removed between scandir and now... */ - case EPERM: - case EACCES: - /* We can't read this file so don't add it to the cache. */ - continue; - } + for (i = 0; i < num_fs_entries; i++) + { + if (interrupted) + break; + + entry = fs_entries[i]; + + /* Check if we've walked past any names in db_files or + * db_subdirs. If so, these have been deleted. */ + while (notmuch_filenames_has_more (db_files) && + strcmp (notmuch_filenames_get (db_files), entry->d_name) < 0) + { + printf ("Detected deleted file %s/%s\n", path, + notmuch_filenames_get (db_files)); + + notmuch_filenames_advance (db_files); + } - fprintf (stderr, "Error reading %s: %s\n", - next, strerror (errno)); - ret = NOTMUCH_STATUS_FILE_ERROR; + while (notmuch_filenames_has_more (db_subdirs) && + strcmp (notmuch_filenames_get (db_subdirs), entry->d_name) <= 0) + { + if (strcmp (notmuch_filenames_get (db_subdirs), entry->d_name) < 0) + printf ("Detected deleted directory %s/%s", path, + notmuch_filenames_get (db_subdirs)); + + notmuch_filenames_advance (db_subdirs); + } + + if (entry->d_type != DT_REG) + continue; + + /* Don't add a file that we've added before. */ + if (notmuch_filenames_has_more (db_files) && + strcmp (notmuch_filenames_get (db_files), entry->d_name) == 0) + { + notmuch_filenames_advance (db_files); + continue; + } + + /* We're not looking at a regular file that doesn't yet exist + * in the database, so add it. */ + next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name); + + state->processed_files++; + + if (state->verbose) { + if (state->output_is_a_tty) + printf("\r\033[K"); + + printf ("%i/%i: %s", + state->processed_files, + state->total_files, + next); + + putchar((state->output_is_a_tty) ? '\r' : '\n'); + fflush (stdout); + } + + status = notmuch_database_add_message (notmuch, next, &message); + switch (status) { + /* success */ + case NOTMUCH_STATUS_SUCCESS: + state->added_messages++; + tag_inbox_and_unread (message); + break; + /* Non-fatal issues (go on to next file) */ + case NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID: + /* Stay silent on this one. */ + break; + case NOTMUCH_STATUS_FILE_NOT_EMAIL: + fprintf (stderr, "Note: Ignoring non-mail file: %s\n", + next); + break; + /* Fatal issues. Don't process anymore. */ + case NOTMUCH_STATUS_READONLY_DATABASE: + case NOTMUCH_STATUS_XAPIAN_EXCEPTION: + case NOTMUCH_STATUS_OUT_OF_MEMORY: + fprintf (stderr, "Error: %s. Halting processing.\n", + notmuch_status_to_string (status)); + ret = status; + goto DONE; + default: + case NOTMUCH_STATUS_FILE_ERROR: + case NOTMUCH_STATUS_NULL_POINTER: + case NOTMUCH_STATUS_TAG_TOO_LONG: + case NOTMUCH_STATUS_UNBALANCED_FREEZE_THAW: + case NOTMUCH_STATUS_LAST_STATUS: + INTERNAL_ERROR ("add_message returned unexpected value: %d", status); goto DONE; } - if (S_ISREG (st->st_mode)) { - /* If the file hasn't been modified since the last - * add_files, then we need not look at it. */ - if (path_dbtime == 0 || st->st_mtime > path_dbtime) { - state->processed_files++; - - if (state->verbose) { - if (state->output_is_a_tty) - printf("\r\033[K"); - - printf ("%i/%i: %s", - state->processed_files, - state->total_files, - next); - - putchar((state->output_is_a_tty) ? '\r' : '\n'); - fflush (stdout); - } - - status = notmuch_database_add_message (notmuch, next, &message); - switch (status) { - /* success */ - case NOTMUCH_STATUS_SUCCESS: - state->added_messages++; - tag_inbox_and_unread (message); - break; - /* Non-fatal issues (go on to next file) */ - case NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID: - /* Stay silent on this one. */ - break; - case NOTMUCH_STATUS_FILE_NOT_EMAIL: - fprintf (stderr, "Note: Ignoring non-mail file: %s\n", - next); - break; - /* Fatal issues. Don't process anymore. */ - case NOTMUCH_STATUS_READONLY_DATABASE: - case NOTMUCH_STATUS_XAPIAN_EXCEPTION: - case NOTMUCH_STATUS_OUT_OF_MEMORY: - fprintf (stderr, "Error: %s. Halting processing.\n", - notmuch_status_to_string (status)); - ret = status; - goto DONE; - default: - case NOTMUCH_STATUS_FILE_ERROR: - case NOTMUCH_STATUS_NULL_POINTER: - case NOTMUCH_STATUS_TAG_TOO_LONG: - case NOTMUCH_STATUS_UNBALANCED_FREEZE_THAW: - case NOTMUCH_STATUS_LAST_STATUS: - INTERNAL_ERROR ("add_message returned unexpected value: %d", status); - goto DONE; - } - - if (message) { - notmuch_message_destroy (message); - message = NULL; - } - - if (do_add_files_print_progress) { - do_add_files_print_progress = 0; - add_files_print_progress (state); - } - } - } else if (S_ISDIR (st->st_mode)) { - status = add_files_recursive (notmuch, next, st, state); - if (status && ret == NOTMUCH_STATUS_SUCCESS) - ret = status; + if (message) { + notmuch_message_destroy (message); + message = NULL; + } + + if (do_add_files_print_progress) { + do_add_files_print_progress = 0; + add_files_print_progress (state); } talloc_free (next); next = NULL; } - status = notmuch_directory_set_mtime (directory, path_mtime); - if (status && ret == NOTMUCH_STATUS_SUCCESS) - ret = status; + if (! interrupted) { + status = notmuch_directory_set_mtime (directory, fs_mtime); + if (status && ret == NOTMUCH_STATUS_SUCCESS) + ret = status; + } DONE: if (next) @@ -301,8 +373,14 @@ add_files_recursive (notmuch_database_t *notmuch, free (entry); if (dir) closedir (dir); - if (namelist) - free (namelist); + if (fs_entries) + free (fs_entries); + if (db_subdirs) + notmuch_filenames_destroy (db_subdirs); + if (db_files) + notmuch_filenames_destroy (db_files); + if (directory) + notmuch_directory_destroy (directory); return ret; } @@ -315,23 +393,11 @@ add_files (notmuch_database_t *notmuch, const char *path, add_files_state_t *state) { - struct stat st; notmuch_status_t status; struct sigaction action; struct itimerval timerval; notmuch_bool_t timer_is_active = FALSE; - if (stat (path, &st)) { - fprintf (stderr, "Error reading directory %s: %s\n", - path, strerror (errno)); - return NOTMUCH_STATUS_FILE_ERROR; - } - - if (! S_ISDIR (st.st_mode)) { - fprintf (stderr, "Error: %s is not a directory.\n", path); - return NOTMUCH_STATUS_FILE_ERROR; - } - if (state->output_is_a_tty && ! debugger_is_active () && ! state->verbose) { /* Setup our handler for SIGALRM */ memset (&action, 0, sizeof (struct sigaction)); @@ -350,7 +416,7 @@ add_files (notmuch_database_t *notmuch, timer_is_active = TRUE; } - status = add_files_recursive (notmuch, path, &st, state); + status = add_files_recursive (notmuch, path, state); if (timer_is_active) { /* Now stop the timer. */ @@ -380,21 +446,21 @@ count_files (const char *path, int *count) struct dirent *entry = NULL; char *next; struct stat st; - struct dirent **namelist = NULL; - int n_entries = scandir (path, &namelist, 0, ino_cmp); + struct dirent **fs_entries = NULL; + int num_fs_entries = scandir (path, &fs_entries, 0, dirent_sort_inode); int i = 0; - if (n_entries == -1) { + if (num_fs_entries == -1) { fprintf (stderr, "Warning: failed to open directory %s: %s\n", path, strerror (errno)); goto DONE; } while (!interrupted) { - if (i == n_entries) + if (i == num_fs_entries) break; - entry= namelist[i++]; + entry = fs_entries[i++]; /* Ignore special directories to avoid infinite recursion. * Also ignore the .notmuch directory. @@ -433,8 +499,8 @@ count_files (const char *path, int *count) DONE: if (entry) free (entry); - if (namelist) - free (namelist); + if (fs_entries) + free (fs_entries); } int