From e5922e46abc2ef23ea0354a38fdeff1041aedd6f Mon Sep 17 00:00:00 2001 From: Guillermo Ramos Date: Tue, 22 Aug 2023 20:14:16 +0200 Subject: dups: WIP rework --- dups | 101 +++++++++++++++++++++++++++++++++---------------------------------- 1 file changed, 49 insertions(+), 52 deletions(-) diff --git a/dups b/dups index bf59ee9..4f38482 100755 --- a/dups +++ b/dups @@ -3,8 +3,11 @@ use strict; use warnings; +use v5.36; + use List::Util qw; use File::Find qw; + use IPC::System::Simple qw; @ARGV ge 1 || die "Usage: $0 [opts] [ ...] @@ -19,45 +22,40 @@ my $yes = 0; my $debug = 0; my @preferences; +# Map from size to list of filenames my %sizes; + +# Map from md5 to list of filenames my %md5s; -sub fill_sizes { +sub register_file_size { return if -d "$_"; # Skip directories + my $filename = $File::Find::name; my $size = capture("stat", "--printf=%s", $_); $sizes{$size} ||= []; - push @{$sizes{$size}}, $File::Find::name; + push @{$sizes{$size}}, $filename; } -sub closeness { - my ($file, $dir) = @_; +sub indir($file, $dir) { my ($diffpath) = Cwd::abs_path($file) =~ /^$dir(.*)$/; - if ($diffpath) { - return () = $diffpath =~ /\//g; - } else { - return -1; - } + return !!$diffpath; } -sub guess_best_choices { - my @best_guesses = @_; +sub guess_best_choices(@files) { + my @best_guesses = (); + # Remove some choice based on the 'dislike' preference foreach my $pref (@preferences) { if ($pref->{"type"} eq "dislike") { print STDERR "PREF: DISLIKE $pref->{dir}\n" if $debug; - my $best_index = 0; - my @files = @best_guesses; foreach my $file (@files) { - my $dislikability = closeness($file, $pref->{"dir"}); - print STDERR "CLOSENESS($file, $pref->{dir}) = $dislikability\n" if $debug; - if ($best_index != -1 && ($dislikability == -1 || $dislikability > $best_index)) { - @best_guesses = ($file); - $best_index = $dislikability; - } elsif ($dislikability == $best_index) { + if (indir($file, $pref->{"dir"})) { + print STDERR "Discarding $file...\n" if $debug; + } else { push @best_guesses, $file; } - print STDERR "BEST: [$best_index] @best_guesses\n" if $debug; + print STDERR "BEST: @best_guesses\n" if $debug; } } } @@ -65,8 +63,7 @@ sub guess_best_choices { return @best_guesses; } -sub keep { - my ($choice, @rest) = @_; +sub keep($choice, @rest) { my @delete = grep { $_ ne $choice } @rest; print STDERR "\tKEEPING $choice, DELETING @delete\n" if $debug; unlink foreach @delete; @@ -81,7 +78,7 @@ while (my $arg = shift) { } elsif ($arg eq "-d" || $arg eq "--dislike") { push @preferences, {"type" => "dislike", "dir" => Cwd::abs_path(shift) . "/"}; } else { - find(\&fill_sizes, $arg); + find(\®ister_file_size, $arg); } } @@ -115,35 +112,35 @@ foreach my $md5 (keys(%md5s)) { print "\t$file\n"; } - if (my @best_choices = guess_best_choices(@same_md5_files)) { - if (@best_choices == 1) { - my $best_choice = $best_choices[0]; - my $gogo = $yes; - unless ($gogo) { - print "\n\tBest choice is '$best_choice', do you want to delete the others? (yes/y) "; - my $resp = ; - chomp $resp; - $gogo = $resp eq "yes" || $resp eq "y"; - } - if ($gogo) { - keep($best_choice, @same_md5_files); - } - } else { - print "\n\tPlease choose one to keep (or press 'enter' to skip):\n"; - for (my $i = 0; $i < @best_choices; $i++) { - my $choice = $best_choices[$i]; - print "\t [$i] $choice\n"; - } - print "\t> "; - my $index = ; - chomp $index; - if ($index ne "") { - $index = int($index); - if ($index >= 0 && $index < @best_choices) { - keep($best_choices[$index], @same_md5_files); - } else { - print "\n!!\t Index outside of range, ignoring\n"; - } + my @best_choices = guess_best_choices(@same_md5_files); + @best_choices = @same_md5_files unless @best_choices; + if (@best_choices == 1) { + my $best_choice = $best_choices[0]; + my $gogo = $yes; + unless ($gogo) { + print "\n\tBest choice is '$best_choice', do you want to delete the others? (yes/y) "; + my $resp = ; + chomp $resp; + $gogo = $resp eq "yes" || $resp eq "y"; + } + if ($gogo) { + keep($best_choice, @same_md5_files); + } + } else { + print "\n\tPlease choose one to keep (or press 'enter' to skip):\n"; + for (my $i = 0; $i < @best_choices; $i++) { + my $choice = $best_choices[$i]; + print "\t [$i] $choice\n"; + } + print "\t> "; + my $index = ; + chomp $index; + if ($index ne "") { + $index = int($index); + if ($index >= 0 && $index < @best_choices) { + keep($best_choices[$index], @same_md5_files); + } else { + print "\n!!\t Index outside of range, ignoring\n"; } } } -- cgit v1.2.3