diff options
author | Guillermo Ramos | 2023-09-26 13:28:05 +0200 |
---|---|---|
committer | Guillermo Ramos | 2023-09-26 13:53:11 +0200 |
commit | d5d95d418f5959a70f7fe79210562f98c3fad5e9 (patch) | |
tree | 606e7e23d47cb67b59a21fe0ddf771a78189e730 /dups | |
parent | 58862a4455b9679b97a59b458d63e8e1cf523989 (diff) | |
download | cli-d5d95d418f5959a70f7fe79210562f98c3fad5e9.tar.gz |
dups: refactor
Diffstat (limited to 'dups')
-rwxr-xr-x | dups | 59 |
1 files changed, 39 insertions, 20 deletions
@@ -25,9 +25,7 @@ my @preferences; # Map from size to list of filenames my %sizes; -# Map from md5 to list of filenames -my %md5s; - +# 'wanted' subroutine of File::Find::find. Add the file name to its size key in the %sizes hash sub register_file_size { return if -d "$_"; # Skip directories @@ -37,11 +35,13 @@ sub register_file_size { push @{$sizes{$size}}, $filename; } +# Return true iff '$file' is anywhere inside '$dir' sub indir($file, $dir) { my ($diffpath) = Cwd::abs_path($file) =~ /^$dir(.*)$/; return !!$diffpath; } +# Take @files and filter it with the best results, discarding the ones from the disliked directories sub guess_best_choices(@files) { my @best_guesses = (); @@ -69,7 +69,7 @@ sub keep($choice, @rest) { unlink foreach @delete; } -print STDERR "Collecting file sizes...\n"; +# Parse CLI args while (my $arg = shift) { if ($arg eq "-y" || $arg eq "--yes") { $yes = 1; @@ -78,29 +78,48 @@ while (my $arg = shift) { } elsif ($arg eq "-d" || $arg eq "--dislike") { push @preferences, {"type" => "dislike", "dir" => Cwd::abs_path(shift) . "/"}; } else { + print STDERR "Collecting file sizes...\n"; find(\®ister_file_size, $arg); } } -print STDERR "Computing md5s of files with same size...\n"; -my @progress = (0, 0, scalar(keys(%sizes))); -foreach my $size (keys(%sizes)) { - $progress[0]++; - $progress[1] = 0; - my @same_size_files = @{$sizes{$size}}; - next unless @same_size_files gt 1; # Discard unique sizes - - foreach my $file (@same_size_files) { - $progress[1]++; - print STDERR "$progress[0].$progress[1] / $progress[2]\r"; - STDERR->flush(); - - my ($md5) = capture("md5sum", "-z", $file) =~ m/^([^ ]+)/; - $md5s{$md5} ||= []; - push @{$md5s{$md5}}, $file; +my $progressidx = 0; +my $progressbar = '/-\|'; +sub progressbar($curr, $total) { + return sprintf "\r[%s] %s/%s", substr($progressbar, $progressidx++ % length($progressbar), 1), $curr, $total; +} + +# Convert a hash of sizes to a hash of md5s +sub sizes_to_md5s($sizes) { + my %md5s; + # progress: (curr_size, total_sizes) + my @progress = (1, scalar(keys(%$sizes))); + foreach my $size (keys(%$sizes)) { + my @same_size_files = @{$sizes->{$size}}; + + printf STDERR progressbar($progress[0], $progress[1]); + # Compute only md5 of non-unique sizes + if (@same_size_files gt 1) { + foreach my $file (@same_size_files) { + printf STDERR progressbar($progress[0], $progress[1]) unless $progress[1] == 0; + STDERR->flush(); + + my ($md5) = capture("md5sum", "-z", $file) =~ m/^([^ ]+)/; + $md5s{$md5} ||= []; + push @{$md5s{$md5}}, $file; + } + } + + $progress[0]++; } + + return \%md5s; } +print STDERR "Computing md5s of files with same size...\n"; +# Map from md5 to list of filenames +my %md5s = %{sizes_to_md5s(\%sizes)}; + print "\n"; foreach my $md5 (keys(%md5s)) { |