blob: bbf357e4219f11a667e3e2820de30f2102854528 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
|
#!/usr/bin/env perl
use strict;
use warnings;
use File::Find qw<find>;
use IPC::System::Simple qw<capture>;
@ARGV ge 1 || die "Usage: $0 <dir1> [<dir2> ...]\n";
my %sizes;
my %md5s;
sub fill_sizes {
return if -d "$_"; # Skip directories
my $size = capture("stat", "--printf=%s", $_);
$sizes{$size} ||= [];
push @{$sizes{$size}}, $File::Find::name;
}
print STDERR "Collecting file sizes...\n";
find(\&fill_sizes, $_) foreach (@ARGV);
print STDERR "Computing md5s of files with same size...\n";
my @progress = (0, 0, scalar(keys(%sizes)));
foreach my $size (keys(%sizes)) {
$progress[0]++;
$progress[1] = 0;
my @same_size_files = @{$sizes{$size}};
next unless @same_size_files gt 1; # Discard unique sizes
foreach my $file (@same_size_files) {
$progress[1]++;
print STDERR "$progress[0].$progress[1] / $progress[2]\r";
STDERR->flush();
my ($md5) = capture("md5sum", "-z", $file) =~ m/^([^ ]+)/;
$md5s{$md5} ||= [];
push @{$md5s{$md5}}, $file;
}
}
foreach my $md5 (keys(%md5s)) {
my @same_md5_files = @{$md5s{$md5}};
next unless @same_md5_files gt 1; # Discard unique hashes
print "Found duplicate files:\n";
foreach my $file (@same_md5_files) {
print "\t$file\n";
}
printf "\n";
}
printf STDERR "Done!\n";
|