+ size_t i;
+
+ for (i = 0; i < state->new_ignore_length; i++)
+ if (strcmp (entry, state->new_ignore[i]) == 0)
+ return TRUE;
+
+ return FALSE;
+}
+
+/* Examine 'path' recursively as follows:
+ *
+ * o Ask the filesystem for the mtime of 'path' (fs_mtime)
+ * o Ask the database for its timestamp of 'path' (db_mtime)
+ *
+ * o Ask the filesystem for files and directories within 'path'
+ * (via scandir and stored in fs_entries)
+ *
+ * o Pass 1: For each directory in fs_entries, recursively call into
+ * this same function.
+ *
+ * o Compare fs_mtime to db_mtime. If they are equivalent, terminate
+ * the algorithm at this point, (this directory has not been
+ * updated in the filesystem since the last database scan of PASS
+ * 2).
+ *
+ * o Ask the database for files and directories within 'path'
+ * (db_files and db_subdirs)
+ *
+ * o Pass 2: Walk fs_entries simultaneously with db_files and
+ * db_subdirs. Look for one of three interesting cases:
+ *
+ * 1. Regular file in fs_entries and not in db_files
+ * This is a new file to add_message into the database.
+ *
+ * 2. Filename in db_files not in fs_entries.
+ * This is a file that has been removed from the mail store.
+ *
+ * 3. Directory in db_subdirs not in fs_entries
+ * This is a directory that has been removed from the mail store.
+ *
+ * Note that the addition of a directory is not interesting here,
+ * since that will have been taken care of in pass 1. Also, we
+ * don't immediately act on file/directory removal since we must
+ * ensure that in the case of a rename that the new filename is
+ * added before the old filename is removed, (so that no
+ * information is lost from the database).
+ *
+ * o Tell the database to update its time of 'path' to 'fs_mtime'
+ * if fs_mtime isn't the current wall-clock time.
+ */
+static notmuch_status_t
+add_files (notmuch_database_t *notmuch,
+ const char *path,
+ add_files_state_t *state)
+{
+ DIR *dir = NULL;
+ struct dirent *entry = NULL;
+ char *next = NULL;
+ time_t fs_mtime, db_mtime;
+ notmuch_status_t status, ret = NOTMUCH_STATUS_SUCCESS;
+ notmuch_message_t *message = NULL;
+ struct dirent **fs_entries = NULL;
+ int i, num_fs_entries = 0, entry_type;
+ notmuch_directory_t *directory;
+ notmuch_filenames_t *db_files = NULL;
+ notmuch_filenames_t *db_subdirs = NULL;
+ time_t stat_time;
+ struct stat st;
+ notmuch_bool_t is_maildir;
+ const char **tag;
+
+ if (stat (path, &st)) {
+ fprintf (stderr, "Error reading directory %s: %s\n",
+ path, strerror (errno));
+ return NOTMUCH_STATUS_FILE_ERROR;
+ }
+ stat_time = time (NULL);
+
+ if (! S_ISDIR (st.st_mode)) {
+ fprintf (stderr, "Error: %s is not a directory.\n", path);
+ return NOTMUCH_STATUS_FILE_ERROR;
+ }
+
+ fs_mtime = st.st_mtime;
+
+ status = notmuch_database_get_directory (notmuch, path, &directory);
+ if (status) {
+ ret = status;
+ goto DONE;
+ }
+ db_mtime = directory ? notmuch_directory_get_mtime (directory) : 0;
+
+ /* If the database knows about this directory, then we sort based
+ * on strcmp to match the database sorting. Otherwise, we can do
+ * inode-based sorting for faster filesystem operation. */
+ num_fs_entries = scandir (path, &fs_entries, 0,
+ directory ?
+ dirent_sort_strcmp_name : dirent_sort_inode);
+
+ if (num_fs_entries == -1) {
+ fprintf (stderr, "Error opening directory %s: %s\n",
+ path, strerror (errno));
+ /* We consider this a fatal error because, if a user moved a
+ * message from another directory that we were able to scan
+ * into this directory, skipping this directory will cause
+ * that message to be lost. */
+ ret = NOTMUCH_STATUS_FILE_ERROR;
+ goto DONE;
+ }
+
+ /* Pass 1: Recurse into all sub-directories. */
+ is_maildir = _entries_resemble_maildir (path, fs_entries, num_fs_entries);
+
+ for (i = 0; i < num_fs_entries; i++) {
+ if (interrupted)
+ break;
+
+ entry = fs_entries[i];
+
+ /* Ignore any files/directories the user has configured to
+ * ignore. We do this before dirent_type both for performance
+ * and because we don't care if dirent_type fails on entries
+ * that are explicitly ignored.
+ */
+ if (_entry_in_ignore_list (entry->d_name, state)) {
+ if (state->debug)
+ printf ("(D) add_files_recursive, pass 1: explicitly ignoring %s/%s\n",
+ path, entry->d_name);
+ continue;
+ }
+
+ /* We only want to descend into directories (and symlinks to
+ * directories). */
+ entry_type = dirent_type (path, entry);
+ if (entry_type == -1) {
+ /* Be pessimistic, e.g. so we don't lose lots of mail just
+ * because a user broke a symlink. */
+ fprintf (stderr, "Error reading file %s/%s: %s\n",
+ path, entry->d_name, strerror (errno));
+ return NOTMUCH_STATUS_FILE_ERROR;
+ } else if (entry_type != S_IFDIR) {
+ continue;
+ }
+
+ /* Ignore special directories to avoid infinite recursion.
+ * Also ignore the .notmuch directory and any "tmp" directory
+ * that appears within a maildir.
+ */
+ if (strcmp (entry->d_name, ".") == 0 ||
+ strcmp (entry->d_name, "..") == 0 ||
+ (is_maildir && strcmp (entry->d_name, "tmp") == 0) ||
+ strcmp (entry->d_name, ".notmuch") == 0)
+ continue;
+
+ next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name);
+ status = add_files (notmuch, next, state);
+ if (status) {
+ ret = status;
+ goto DONE;
+ }
+ talloc_free (next);
+ next = NULL;
+ }
+
+ /* If the directory's modification time in the filesystem is the
+ * same as what we recorded in the database the last time we
+ * scanned it, then we can skip the second pass entirely.
+ *
+ * We test for strict equality here to avoid a bug that can happen
+ * if the system clock jumps backward, (preventing new mail from
+ * being discovered until the clock catches up and the directory
+ * is modified again).
+ */
+ if (directory && fs_mtime == db_mtime)
+ goto DONE;
+
+ /* If the database has never seen this directory before, we can
+ * simply leave db_files and db_subdirs NULL. */
+ if (directory) {
+ db_files = notmuch_directory_get_child_files (directory);
+ db_subdirs = notmuch_directory_get_child_directories (directory);
+ }
+
+ /* Pass 2: Scan for new files, removed files, and removed directories. */
+ for (i = 0; i < num_fs_entries; i++)
+ {
+ if (interrupted)
+ break;
+
+ entry = fs_entries[i];
+
+ /* Ignore files & directories user has configured to be ignored */
+ if (_entry_in_ignore_list (entry->d_name, state)) {
+ if (state->debug)
+ printf ("(D) add_files_recursive, pass 2: explicitly ignoring %s/%s\n",
+ path,
+ entry->d_name);
+ continue;
+ }
+
+ /* Check if we've walked past any names in db_files or
+ * db_subdirs. If so, these have been deleted. */
+ while (notmuch_filenames_valid (db_files) &&
+ strcmp (notmuch_filenames_get (db_files), entry->d_name) < 0)
+ {
+ char *absolute = talloc_asprintf (state->removed_files,
+ "%s/%s", path,
+ notmuch_filenames_get (db_files));
+
+ _filename_list_add (state->removed_files, absolute);
+
+ notmuch_filenames_move_to_next (db_files);
+ }
+
+ while (notmuch_filenames_valid (db_subdirs) &&
+ strcmp (notmuch_filenames_get (db_subdirs), entry->d_name) <= 0)
+ {
+ const char *filename = notmuch_filenames_get (db_subdirs);
+
+ if (strcmp (filename, entry->d_name) < 0)
+ {
+ char *absolute = talloc_asprintf (state->removed_directories,
+ "%s/%s", path, filename);
+
+ _filename_list_add (state->removed_directories, absolute);
+ }
+
+ notmuch_filenames_move_to_next (db_subdirs);
+ }
+
+ /* Only add regular files (and symlinks to regular files). */
+ entry_type = dirent_type (path, entry);
+ if (entry_type == -1) {
+ fprintf (stderr, "Error reading file %s/%s: %s\n",
+ path, entry->d_name, strerror (errno));
+ return NOTMUCH_STATUS_FILE_ERROR;
+ } else if (entry_type != S_IFREG) {
+ continue;
+ }
+
+ /* Don't add a file that we've added before. */
+ if (notmuch_filenames_valid (db_files) &&
+ strcmp (notmuch_filenames_get (db_files), entry->d_name) == 0)
+ {
+ notmuch_filenames_move_to_next (db_files);
+ continue;
+ }
+
+ /* We're now looking at a regular file that doesn't yet exist
+ * in the database, so add it. */
+ next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name);
+
+ state->processed_files++;
+
+ if (state->verbose) {
+ if (state->output_is_a_tty)
+ printf("\r\033[K");
+
+ printf ("%i/%i: %s",
+ state->processed_files,
+ state->total_files,
+ next);
+
+ putchar((state->output_is_a_tty) ? '\r' : '\n');
+ fflush (stdout);
+ }
+
+ status = notmuch_database_begin_atomic (notmuch);
+ if (status) {
+ ret = status;
+ goto DONE;
+ }
+
+ status = notmuch_database_add_message (notmuch, next, &message);
+ switch (status) {
+ /* success */
+ case NOTMUCH_STATUS_SUCCESS:
+ state->added_messages++;
+ notmuch_message_freeze (message);
+ for (tag=state->new_tags; *tag != NULL; tag++)
+ notmuch_message_add_tag (message, *tag);
+ if (state->synchronize_flags == TRUE)
+ notmuch_message_maildir_flags_to_tags (message);
+ notmuch_message_thaw (message);
+ break;
+ /* Non-fatal issues (go on to next file) */
+ case NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID:
+ if (state->synchronize_flags == TRUE)
+ notmuch_message_maildir_flags_to_tags (message);
+ break;
+ case NOTMUCH_STATUS_FILE_NOT_EMAIL:
+ fprintf (stderr, "Note: Ignoring non-mail file: %s\n",
+ next);
+ break;
+ /* Fatal issues. Don't process anymore. */
+ case NOTMUCH_STATUS_READ_ONLY_DATABASE:
+ case NOTMUCH_STATUS_XAPIAN_EXCEPTION:
+ case NOTMUCH_STATUS_OUT_OF_MEMORY:
+ fprintf (stderr, "Error: %s. Halting processing.\n",
+ notmuch_status_to_string (status));
+ ret = status;
+ goto DONE;
+ default:
+ case NOTMUCH_STATUS_FILE_ERROR:
+ case NOTMUCH_STATUS_NULL_POINTER:
+ case NOTMUCH_STATUS_TAG_TOO_LONG:
+ case NOTMUCH_STATUS_UNBALANCED_FREEZE_THAW:
+ case NOTMUCH_STATUS_UNBALANCED_ATOMIC:
+ case NOTMUCH_STATUS_LAST_STATUS:
+ INTERNAL_ERROR ("add_message returned unexpected value: %d", status);
+ goto DONE;
+ }
+
+ status = notmuch_database_end_atomic (notmuch);
+ if (status) {
+ ret = status;
+ goto DONE;
+ }
+
+ if (message) {
+ notmuch_message_destroy (message);
+ message = NULL;
+ }
+
+ if (do_print_progress) {
+ do_print_progress = 0;
+ generic_print_progress ("Processed", "files", state->tv_start,
+ state->processed_files, state->total_files);
+ }
+
+ talloc_free (next);
+ next = NULL;
+ }
+
+ if (interrupted)
+ goto DONE;
+
+ /* Now that we've walked the whole filesystem list, anything left
+ * over in the database lists has been deleted. */
+ while (notmuch_filenames_valid (db_files))
+ {
+ char *absolute = talloc_asprintf (state->removed_files,
+ "%s/%s", path,
+ notmuch_filenames_get (db_files));
+
+ _filename_list_add (state->removed_files, absolute);
+
+ notmuch_filenames_move_to_next (db_files);
+ }
+
+ while (notmuch_filenames_valid (db_subdirs))
+ {
+ char *absolute = talloc_asprintf (state->removed_directories,
+ "%s/%s", path,
+ notmuch_filenames_get (db_subdirs));
+
+ _filename_list_add (state->removed_directories, absolute);
+
+ notmuch_filenames_move_to_next (db_subdirs);
+ }
+
+ /* If the directory's mtime is the same as the wall-clock time
+ * when we stat'ed the directory, we skip updating the mtime in
+ * the database because a message could be delivered later in this
+ * same second. This may lead to unnecessary re-scans, but it
+ * avoids overlooking messages. */
+ if (fs_mtime != stat_time)
+ _filename_list_add (state->directory_mtimes, path)->mtime = fs_mtime;
+
+ DONE:
+ if (next)
+ talloc_free (next);
+ if (dir)
+ closedir (dir);
+ if (fs_entries) {
+ for (i = 0; i < num_fs_entries; i++)
+ free (fs_entries[i]);
+
+ free (fs_entries);
+ }
+ if (db_subdirs)
+ notmuch_filenames_destroy (db_subdirs);
+ if (db_files)
+ notmuch_filenames_destroy (db_files);
+ if (directory)
+ notmuch_directory_destroy (directory);
+
+ return ret;
+}
+
+static void
+setup_progress_printing_timer (void)
+{
+ struct sigaction action;
+ struct itimerval timerval;
+
+ /* Setup our handler for SIGALRM */
+ memset (&action, 0, sizeof (struct sigaction));
+ action.sa_handler = handle_sigalrm;
+ sigemptyset (&action.sa_mask);
+ action.sa_flags = SA_RESTART;
+ sigaction (SIGALRM, &action, NULL);
+
+ /* Then start a timer to send SIGALRM once per second. */
+ timerval.it_interval.tv_sec = 1;
+ timerval.it_interval.tv_usec = 0;
+ timerval.it_value.tv_sec = 1;
+ timerval.it_value.tv_usec = 0;
+ setitimer (ITIMER_REAL, &timerval, NULL);
+}
+
+static void
+stop_progress_printing_timer (void)
+{
+ struct sigaction action;
+ struct itimerval timerval;
+
+ /* Now stop the timer. */
+ timerval.it_interval.tv_sec = 0;
+ timerval.it_interval.tv_usec = 0;
+ timerval.it_value.tv_sec = 0;
+ timerval.it_value.tv_usec = 0;
+ setitimer (ITIMER_REAL, &timerval, NULL);
+
+ /* And disable the signal handler. */
+ action.sa_handler = SIG_IGN;
+ sigaction (SIGALRM, &action, NULL);
+}
+
+
+/* XXX: This should be merged with the add_files function since it
+ * shares a lot of logic with it. */
+/* Recursively count all regular files in path and all sub-directories
+ * of path. The result is added to *count (which should be
+ * initialized to zero by the top-level caller before calling
+ * count_files). */
+static void
+count_files (const char *path, int *count, add_files_state_t *state)
+{
+ struct dirent *entry = NULL;
+ char *next;
+ struct stat st;
+ struct dirent **fs_entries = NULL;
+ int num_fs_entries = scandir (path, &fs_entries, 0, dirent_sort_inode);
+ int i = 0;
+
+ if (num_fs_entries == -1) {
+ fprintf (stderr, "Warning: failed to open directory %s: %s\n",
+ path, strerror (errno));
+ goto DONE;
+ }
+
+ while (!interrupted) {
+ if (i == num_fs_entries)
+ break;
+
+ entry = fs_entries[i++];
+
+ /* Ignore special directories to avoid infinite recursion.
+ * Also ignore the .notmuch directory and files/directories
+ * the user has configured to be ignored.
+ */
+ if (strcmp (entry->d_name, ".") == 0 ||
+ strcmp (entry->d_name, "..") == 0 ||
+ strcmp (entry->d_name, ".notmuch") == 0 ||
+ _entry_in_ignore_list (entry->d_name, state))
+ {
+ if (_entry_in_ignore_list (entry->d_name, state) && state->debug)
+ printf ("(D) count_files: explicitly ignoring %s/%s\n",
+ path,
+ entry->d_name);
+ continue;
+ }
+
+ if (asprintf (&next, "%s/%s", path, entry->d_name) == -1) {
+ next = NULL;
+ fprintf (stderr, "Error descending from %s to %s: Out of memory\n",
+ path, entry->d_name);
+ continue;
+ }
+
+ stat (next, &st);
+
+ if (S_ISREG (st.st_mode)) {
+ *count = *count + 1;
+ if (*count % 1000 == 0) {
+ printf ("Found %d files so far.\r", *count);
+ fflush (stdout);
+ }
+ } else if (S_ISDIR (st.st_mode)) {
+ count_files (next, count, state);
+ }
+
+ free (next);
+ }
+
+ DONE:
+ if (fs_entries) {
+ for (i = 0; i < num_fs_entries; i++)
+ free (fs_entries[i]);
+
+ free (fs_entries);
+ }
+}
+
+static void
+upgrade_print_progress (void *closure,
+ double progress)
+{
+ add_files_state_t *state = closure;
+
+ printf ("Upgrading database: %.2f%% complete", progress * 100.0);
+
+ if (progress > 0) {
+ struct timeval tv_now;
+ double elapsed, time_remaining;
+
+ gettimeofday (&tv_now, NULL);
+
+ elapsed = notmuch_time_elapsed (state->tv_start, tv_now);
+ time_remaining = (elapsed / progress) * (1.0 - progress);
+ printf (" (");
+ notmuch_time_print_formatted_seconds (time_remaining);
+ printf (" remaining)");
+ }
+
+ printf (". \r");
+
+ fflush (stdout);
+}
+
+/* Remove one message filename from the database. */
+static notmuch_status_t
+remove_filename (notmuch_database_t *notmuch,
+ const char *path,
+ add_files_state_t *add_files_state)
+{
+ notmuch_status_t status;
+ notmuch_message_t *message;
+ status = notmuch_database_begin_atomic (notmuch);
+ if (status)
+ return status;
+ status = notmuch_database_find_message_by_filename (notmuch, path, &message);
+ if (status || message == NULL)
+ goto DONE;
+
+ status = notmuch_database_remove_message (notmuch, path);
+ if (status == NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID) {
+ add_files_state->renamed_messages++;
+ if (add_files_state->synchronize_flags == TRUE)
+ notmuch_message_maildir_flags_to_tags (message);
+ status = NOTMUCH_STATUS_SUCCESS;
+ } else if (status == NOTMUCH_STATUS_SUCCESS) {
+ add_files_state->removed_messages++;
+ }
+ notmuch_message_destroy (message);
+
+ DONE:
+ notmuch_database_end_atomic (notmuch);
+ return status;
+}
+
+/* Recursively remove all filenames from the database referring to
+ * 'path' (or to any of its children). */
+static notmuch_status_t
+_remove_directory (void *ctx,
+ notmuch_database_t *notmuch,
+ const char *path,
+ add_files_state_t *add_files_state)
+{
+ notmuch_status_t status = NOTMUCH_STATUS_SUCCESS;
+ notmuch_directory_t *directory;
+ notmuch_filenames_t *files, *subdirs;
+ char *absolute;
+
+ status = notmuch_database_get_directory (notmuch, path, &directory);
+ if (status || !directory)
+ return status;
+
+ for (files = notmuch_directory_get_child_files (directory);
+ notmuch_filenames_valid (files);
+ notmuch_filenames_move_to_next (files))
+ {
+ absolute = talloc_asprintf (ctx, "%s/%s", path,
+ notmuch_filenames_get (files));
+ status = remove_filename (notmuch, absolute, add_files_state);
+ talloc_free (absolute);
+ if (status)
+ goto DONE;
+ }
+
+ for (subdirs = notmuch_directory_get_child_directories (directory);
+ notmuch_filenames_valid (subdirs);
+ notmuch_filenames_move_to_next (subdirs))
+ {
+ absolute = talloc_asprintf (ctx, "%s/%s", path,
+ notmuch_filenames_get (subdirs));
+ status = _remove_directory (ctx, notmuch, absolute, add_files_state);
+ talloc_free (absolute);
+ if (status)
+ goto DONE;
+ }
+
+ DONE:
+ notmuch_directory_destroy (directory);
+ return status;