-# Match files by size and SHA-256; then delete duplicates
-sub builtin_remove_dups($) {
- my ($maildir) = @_;
- my (%size_to_files, %sha_to_files);
-
- # Group files by matching sizes
- foreach my $file (glob("$maildir/cur/*")) {
- my $size = -s $file;
- push(@{$size_to_files{$size}}, $file) if $size;
- }
-
- foreach my $same_size_files (values %size_to_files) {
- # Don't run sha unless there is another file of the same size
- next if scalar(@$same_size_files) < 2;
- %sha_to_files = ();
-
- # Group files with matching sizes by SHA-256
- foreach my $file (@$same_size_files) {
- open(my $fh, '<', $file) or next;
- binmode($fh);
- my $sha256hash = Digest::SHA->new(256)->addfile($fh)->hexdigest;
- close($fh);
-
- push(@{$sha_to_files{$sha256hash}}, $file);
- }
-
- # Remove duplicates
- foreach my $same_sha_files (values %sha_to_files) {
- next if scalar(@$same_sha_files) < 2;
- unlink(@{$same_sha_files}[1..$#$same_sha_files]);
- }
- }
-}
-
-# Use either fdupes or the built-in scanner to detect and remove duplicate
-# search results in the maildir
-sub remove_duplicates($) {
- my ($maildir) = @_;
-
- my $fdupes = which("fdupes");
- if ($fdupes) {
- system("$fdupes --hardlinks --symlinks --delete --noprompt"
- . " --quiet $maildir/cur/ > /dev/null");
- } else {
- builtin_remove_dups($maildir);
- }
-}
-