diff options
Diffstat (limited to 'dups')
-rwxr-xr-x | dups | 104 |
1 files changed, 99 insertions, 5 deletions
@@ -3,10 +3,21 @@ use strict; use warnings; +use List::Util qw<min>; use File::Find qw<find>; use IPC::System::Simple qw<capture>; -@ARGV ge 1 || die "Usage: $0 <dir1> [<dir2> ...]\n"; +@ARGV ge 1 || die "Usage: $0 [opts] <dir1> [<dir2> ...] + +Opts: --dislike|-d <path> Dislike directory (reduce priority) + --yes |-y Don't ask for confirmation when there's a single choice + --debug Print extra debug info +"; + +# CLI options +my $yes = 0; +my $debug = 0; +my @preferences; my %sizes; my %md5s; @@ -19,8 +30,60 @@ sub fill_sizes { push @{$sizes{$size}}, $File::Find::name; } +sub closeness { + my ($file, $dir) = @_; + my ($diffpath) = Cwd::abs_path($file) =~ /^$dir(.*)$/; + if ($diffpath) { + return () = $diffpath =~ /\//g; + } else { + return -1; + } +} + +sub guess_best_choices { + my @best_guesses = @_; + + foreach my $pref (@preferences) { + if ($pref->{"type"} eq "dislike") { + print STDERR "PREF: DISLIKE $pref->{dir}\n" if $debug; + my $best_index = 0; + my @files = @best_guesses; + foreach my $file (@files) { + my $dislikability = closeness($file, $pref->{"dir"}); + print STDERR "CLOSENESS($file, $pref->{dir}) = $dislikability\n" if $debug; + if ($best_index != -1 && ($dislikability == -1 || $dislikability > $best_index)) { + @best_guesses = ($file); + $best_index = $dislikability; + } elsif ($dislikability == $best_index) { + push @best_guesses, $file; + } + print STDERR "BEST: [$best_index] @best_guesses\n" if $debug; + } + } + } + + return @best_guesses; +} + +sub keep { + my ($choice, @rest) = @_; + my @delete = grep { $_ ne $choice } @rest; + print STDERR "\tKEEPING $choice, DELETING @delete\n" if $debug; + unlink foreach @delete; +} + print STDERR "Collecting file sizes...\n"; -find(\&fill_sizes, $_) foreach (@ARGV); +while (my $arg = shift) { + if ($arg eq "-y" || $arg eq "--yes") { + $yes = 1; + } elsif ($arg eq "--debug") { + $debug = 1; + } elsif ($arg eq "-d" || $arg eq "--dislike") { + push @preferences, {"type" => "dislike", "dir" => Cwd::abs_path(shift) . "/"}; + } else { + find(\&fill_sizes, $arg); + } +} print STDERR "Computing md5s of files with same size...\n"; my @progress = (0, 0, scalar(keys(%sizes))); @@ -30,7 +93,6 @@ foreach my $size (keys(%sizes)) { my @same_size_files = @{$sizes{$size}}; next unless @same_size_files gt 1; # Discard unique sizes - foreach my $file (@same_size_files) { $progress[1]++; print STDERR "$progress[0].$progress[1] / $progress[2]\r"; @@ -42,15 +104,47 @@ foreach my $size (keys(%sizes)) { } } +print "\n"; + foreach my $md5 (keys(%md5s)) { my @same_md5_files = @{$md5s{$md5}}; next unless @same_md5_files gt 1; # Discard unique hashes - print "Found duplicate files:\n"; + print "\nFound duplicate files:\n"; foreach my $file (@same_md5_files) { print "\t$file\n"; } - printf "\n"; + + if (my @best_choices = guess_best_choices(@same_md5_files)) { + if (@best_choices == 1) { + my $best_choice = $best_choices[0]; + my $gogo = $yes; + unless ($gogo) { + print "\n\tBest choice is '$best_choice', do you want to delete the others? (yes/y) "; + my $resp = <STDIN>; + chomp $resp; + $gogo = $resp eq "yes" || $resp eq "y"; + } + if ($gogo) { + keep($best_choice, @same_md5_files); + } + } else { + print "\n\tPlease choose one to keep (or press 'enter' to skip):\n"; + for (my $i = 0; $i < @best_choices; $i++) { + my $choice = $best_choices[$i]; + print "\t [$i] $choice\n"; + } + print "\t> "; + my $index = <STDIN>; + chomp $index; + if ($index ne "") { + $index = int($index); + if ($index >= 0 && $index < @best_choices) { + keep($best_choices[$index], @same_md5_files); + } + } + } + } } printf STDERR "Done!\n"; |