aboutsummaryrefslogtreecommitdiff
path: root/dups
diff options
context:
space:
mode:
Diffstat (limited to 'dups')
-rwxr-xr-xdups59
1 files changed, 39 insertions, 20 deletions
diff --git a/dups b/dups
index 4f38482..58aae1c 100755
--- a/dups
+++ b/dups
@@ -25,9 +25,7 @@ my @preferences;
# Map from size to list of filenames
my %sizes;
-# Map from md5 to list of filenames
-my %md5s;
-
+# 'wanted' subroutine of File::Find::find. Add the file name to its size key in the %sizes hash
sub register_file_size {
return if -d "$_"; # Skip directories
@@ -37,11 +35,13 @@ sub register_file_size {
push @{$sizes{$size}}, $filename;
}
+# Return true iff '$file' is anywhere inside '$dir'
sub indir($file, $dir) {
my ($diffpath) = Cwd::abs_path($file) =~ /^$dir(.*)$/;
return !!$diffpath;
}
+# Take @files and filter it with the best results, discarding the ones from the disliked directories
sub guess_best_choices(@files) {
my @best_guesses = ();
@@ -69,7 +69,7 @@ sub keep($choice, @rest) {
unlink foreach @delete;
}
-print STDERR "Collecting file sizes...\n";
+# Parse CLI args
while (my $arg = shift) {
if ($arg eq "-y" || $arg eq "--yes") {
$yes = 1;
@@ -78,29 +78,48 @@ while (my $arg = shift) {
} elsif ($arg eq "-d" || $arg eq "--dislike") {
push @preferences, {"type" => "dislike", "dir" => Cwd::abs_path(shift) . "/"};
} else {
+ print STDERR "Collecting file sizes...\n";
find(\&register_file_size, $arg);
}
}
-print STDERR "Computing md5s of files with same size...\n";
-my @progress = (0, 0, scalar(keys(%sizes)));
-foreach my $size (keys(%sizes)) {
- $progress[0]++;
- $progress[1] = 0;
- my @same_size_files = @{$sizes{$size}};
- next unless @same_size_files gt 1; # Discard unique sizes
-
- foreach my $file (@same_size_files) {
- $progress[1]++;
- print STDERR "$progress[0].$progress[1] / $progress[2]\r";
- STDERR->flush();
-
- my ($md5) = capture("md5sum", "-z", $file) =~ m/^([^ ]+)/;
- $md5s{$md5} ||= [];
- push @{$md5s{$md5}}, $file;
+my $progressidx = 0;
+my $progressbar = '/-\|';
+sub progressbar($curr, $total) {
+ return sprintf "\r[%s] %s/%s", substr($progressbar, $progressidx++ % length($progressbar), 1), $curr, $total;
+}
+
+# Convert a hash of sizes to a hash of md5s
+sub sizes_to_md5s($sizes) {
+ my %md5s;
+ # progress: (curr_size, total_sizes)
+ my @progress = (1, scalar(keys(%$sizes)));
+ foreach my $size (keys(%$sizes)) {
+ my @same_size_files = @{$sizes->{$size}};
+
+ printf STDERR progressbar($progress[0], $progress[1]);
+ # Compute only md5 of non-unique sizes
+ if (@same_size_files gt 1) {
+ foreach my $file (@same_size_files) {
+ printf STDERR progressbar($progress[0], $progress[1]) unless $progress[1] == 0;
+ STDERR->flush();
+
+ my ($md5) = capture("md5sum", "-z", $file) =~ m/^([^ ]+)/;
+ $md5s{$md5} ||= [];
+ push @{$md5s{$md5}}, $file;
+ }
+ }
+
+ $progress[0]++;
}
+
+ return \%md5s;
}
+print STDERR "Computing md5s of files with same size...\n";
+# Map from md5 to list of filenames
+my %md5s = %{sizes_to_md5s(\%sizes)};
+
print "\n";
foreach my $md5 (keys(%md5s)) {