aboutsummaryrefslogtreecommitdiff
path: root/dups
diff options
context:
space:
mode:
Diffstat (limited to 'dups')
-rwxr-xr-xdups104
1 files changed, 99 insertions, 5 deletions
diff --git a/dups b/dups
index bbf357e..4049682 100755
--- a/dups
+++ b/dups
@@ -3,10 +3,21 @@
use strict;
use warnings;
+use List::Util qw<min>;
use File::Find qw<find>;
use IPC::System::Simple qw<capture>;
-@ARGV ge 1 || die "Usage: $0 <dir1> [<dir2> ...]\n";
+@ARGV ge 1 || die "Usage: $0 [opts] <dir1> [<dir2> ...]
+
+Opts: --dislike|-d <path> Dislike directory (reduce priority)
+ --yes |-y Don't ask for confirmation when there's a single choice
+ --debug Print extra debug info
+";
+
+# CLI options
+my $yes = 0;
+my $debug = 0;
+my @preferences;
my %sizes;
my %md5s;
@@ -19,8 +30,60 @@ sub fill_sizes {
push @{$sizes{$size}}, $File::Find::name;
}
+sub closeness {
+ my ($file, $dir) = @_;
+ my ($diffpath) = Cwd::abs_path($file) =~ /^$dir(.*)$/;
+ if ($diffpath) {
+ return () = $diffpath =~ /\//g;
+ } else {
+ return -1;
+ }
+}
+
+sub guess_best_choices {
+ my @best_guesses = @_;
+
+ foreach my $pref (@preferences) {
+ if ($pref->{"type"} eq "dislike") {
+ print STDERR "PREF: DISLIKE $pref->{dir}\n" if $debug;
+ my $best_index = 0;
+ my @files = @best_guesses;
+ foreach my $file (@files) {
+ my $dislikability = closeness($file, $pref->{"dir"});
+ print STDERR "CLOSENESS($file, $pref->{dir}) = $dislikability\n" if $debug;
+ if ($best_index != -1 && ($dislikability == -1 || $dislikability > $best_index)) {
+ @best_guesses = ($file);
+ $best_index = $dislikability;
+ } elsif ($dislikability == $best_index) {
+ push @best_guesses, $file;
+ }
+ print STDERR "BEST: [$best_index] @best_guesses\n" if $debug;
+ }
+ }
+ }
+
+ return @best_guesses;
+}
+
+sub keep {
+ my ($choice, @rest) = @_;
+ my @delete = grep { $_ ne $choice } @rest;
+ print STDERR "\tKEEPING $choice, DELETING @delete\n" if $debug;
+ unlink foreach @delete;
+}
+
print STDERR "Collecting file sizes...\n";
-find(\&fill_sizes, $_) foreach (@ARGV);
+while (my $arg = shift) {
+ if ($arg eq "-y" || $arg eq "--yes") {
+ $yes = 1;
+ } elsif ($arg eq "--debug") {
+ $debug = 1;
+ } elsif ($arg eq "-d" || $arg eq "--dislike") {
+ push @preferences, {"type" => "dislike", "dir" => Cwd::abs_path(shift) . "/"};
+ } else {
+ find(\&fill_sizes, $arg);
+ }
+}
print STDERR "Computing md5s of files with same size...\n";
my @progress = (0, 0, scalar(keys(%sizes)));
@@ -30,7 +93,6 @@ foreach my $size (keys(%sizes)) {
my @same_size_files = @{$sizes{$size}};
next unless @same_size_files gt 1; # Discard unique sizes
-
foreach my $file (@same_size_files) {
$progress[1]++;
print STDERR "$progress[0].$progress[1] / $progress[2]\r";
@@ -42,15 +104,47 @@ foreach my $size (keys(%sizes)) {
}
}
+print "\n";
+
foreach my $md5 (keys(%md5s)) {
my @same_md5_files = @{$md5s{$md5}};
next unless @same_md5_files gt 1; # Discard unique hashes
- print "Found duplicate files:\n";
+ print "\nFound duplicate files:\n";
foreach my $file (@same_md5_files) {
print "\t$file\n";
}
- printf "\n";
+
+ if (my @best_choices = guess_best_choices(@same_md5_files)) {
+ if (@best_choices == 1) {
+ my $best_choice = $best_choices[0];
+ my $gogo = $yes;
+ unless ($gogo) {
+ print "\n\tBest choice is '$best_choice', do you want to delete the others? (yes/y) ";
+ my $resp = <STDIN>;
+ chomp $resp;
+ $gogo = $resp eq "yes" || $resp eq "y";
+ }
+ if ($gogo) {
+ keep($best_choice, @same_md5_files);
+ }
+ } else {
+ print "\n\tPlease choose one to keep (or press 'enter' to skip):\n";
+ for (my $i = 0; $i < @best_choices; $i++) {
+ my $choice = $best_choices[$i];
+ print "\t [$i] $choice\n";
+ }
+ print "\t> ";
+ my $index = <STDIN>;
+ chomp $index;
+ if ($index ne "") {
+ $index = int($index);
+ if ($index >= 0 && $index < @best_choices) {
+ keep($best_choices[$index], @same_md5_files);
+ }
+ }
+ }
+ }
}
printf STDERR "Done!\n";