aboutsummaryrefslogtreecommitdiff
path: root/dups
diff options
context:
space:
mode:
Diffstat (limited to 'dups')
-rwxr-xr-xdups101
1 files changed, 49 insertions, 52 deletions
diff --git a/dups b/dups
index bf59ee9..4f38482 100755
--- a/dups
+++ b/dups
@@ -3,8 +3,11 @@
use strict;
use warnings;
+use v5.36;
+
use List::Util qw<min>;
use File::Find qw<find>;
+
use IPC::System::Simple qw<capture>;
@ARGV ge 1 || die "Usage: $0 [opts] <dir1> [<dir2> ...]
@@ -19,45 +22,40 @@ my $yes = 0;
my $debug = 0;
my @preferences;
+# Map from size to list of filenames
my %sizes;
+
+# Map from md5 to list of filenames
my %md5s;
-sub fill_sizes {
+sub register_file_size {
return if -d "$_"; # Skip directories
+ my $filename = $File::Find::name;
my $size = capture("stat", "--printf=%s", $_);
$sizes{$size} ||= [];
- push @{$sizes{$size}}, $File::Find::name;
+ push @{$sizes{$size}}, $filename;
}
-sub closeness {
- my ($file, $dir) = @_;
+sub indir($file, $dir) {
my ($diffpath) = Cwd::abs_path($file) =~ /^$dir(.*)$/;
- if ($diffpath) {
- return () = $diffpath =~ /\//g;
- } else {
- return -1;
- }
+ return !!$diffpath;
}
-sub guess_best_choices {
- my @best_guesses = @_;
+sub guess_best_choices(@files) {
+ my @best_guesses = ();
+ # Remove some choice based on the 'dislike' preference
foreach my $pref (@preferences) {
if ($pref->{"type"} eq "dislike") {
print STDERR "PREF: DISLIKE $pref->{dir}\n" if $debug;
- my $best_index = 0;
- my @files = @best_guesses;
foreach my $file (@files) {
- my $dislikability = closeness($file, $pref->{"dir"});
- print STDERR "CLOSENESS($file, $pref->{dir}) = $dislikability\n" if $debug;
- if ($best_index != -1 && ($dislikability == -1 || $dislikability > $best_index)) {
- @best_guesses = ($file);
- $best_index = $dislikability;
- } elsif ($dislikability == $best_index) {
+ if (indir($file, $pref->{"dir"})) {
+ print STDERR "Discarding $file...\n" if $debug;
+ } else {
push @best_guesses, $file;
}
- print STDERR "BEST: [$best_index] @best_guesses\n" if $debug;
+ print STDERR "BEST: @best_guesses\n" if $debug;
}
}
}
@@ -65,8 +63,7 @@ sub guess_best_choices {
return @best_guesses;
}
-sub keep {
- my ($choice, @rest) = @_;
+sub keep($choice, @rest) {
my @delete = grep { $_ ne $choice } @rest;
print STDERR "\tKEEPING $choice, DELETING @delete\n" if $debug;
unlink foreach @delete;
@@ -81,7 +78,7 @@ while (my $arg = shift) {
} elsif ($arg eq "-d" || $arg eq "--dislike") {
push @preferences, {"type" => "dislike", "dir" => Cwd::abs_path(shift) . "/"};
} else {
- find(\&fill_sizes, $arg);
+ find(\&register_file_size, $arg);
}
}
@@ -115,35 +112,35 @@ foreach my $md5 (keys(%md5s)) {
print "\t$file\n";
}
- if (my @best_choices = guess_best_choices(@same_md5_files)) {
- if (@best_choices == 1) {
- my $best_choice = $best_choices[0];
- my $gogo = $yes;
- unless ($gogo) {
- print "\n\tBest choice is '$best_choice', do you want to delete the others? (yes/y) ";
- my $resp = <STDIN>;
- chomp $resp;
- $gogo = $resp eq "yes" || $resp eq "y";
- }
- if ($gogo) {
- keep($best_choice, @same_md5_files);
- }
- } else {
- print "\n\tPlease choose one to keep (or press 'enter' to skip):\n";
- for (my $i = 0; $i < @best_choices; $i++) {
- my $choice = $best_choices[$i];
- print "\t [$i] $choice\n";
- }
- print "\t> ";
- my $index = <STDIN>;
- chomp $index;
- if ($index ne "") {
- $index = int($index);
- if ($index >= 0 && $index < @best_choices) {
- keep($best_choices[$index], @same_md5_files);
- } else {
- print "\n!!\t Index outside of range, ignoring\n";
- }
+ my @best_choices = guess_best_choices(@same_md5_files);
+ @best_choices = @same_md5_files unless @best_choices;
+ if (@best_choices == 1) {
+ my $best_choice = $best_choices[0];
+ my $gogo = $yes;
+ unless ($gogo) {
+ print "\n\tBest choice is '$best_choice', do you want to delete the others? (yes/y) ";
+ my $resp = <STDIN>;
+ chomp $resp;
+ $gogo = $resp eq "yes" || $resp eq "y";
+ }
+ if ($gogo) {
+ keep($best_choice, @same_md5_files);
+ }
+ } else {
+ print "\n\tPlease choose one to keep (or press 'enter' to skip):\n";
+ for (my $i = 0; $i < @best_choices; $i++) {
+ my $choice = $best_choices[$i];
+ print "\t [$i] $choice\n";
+ }
+ print "\t> ";
+ my $index = <STDIN>;
+ chomp $index;
+ if ($index ne "") {
+ $index = int($index);
+ if ($index >= 0 && $index < @best_choices) {
+ keep($best_choices[$index], @same_md5_files);
+ } else {
+ print "\n!!\t Index outside of range, ignoring\n";
}
}
}