diff options
author | Guillermo Ramos | 2021-04-13 18:17:14 +0200 |
---|---|---|
committer | Guillermo Ramos | 2021-04-13 18:17:14 +0200 |
commit | 04446011f364909096cb49f2b66e4df96d713209 (patch) | |
tree | 6c2bbb722d63487a766d91e5ecc9560f01442f44 /dups | |
download | cli-04446011f364909096cb49f2b66e4df96d713209.tar.gz |
Initial commit
Diffstat (limited to 'dups')
-rwxr-xr-x | dups | 56 |
1 files changed, 56 insertions, 0 deletions
@@ -0,0 +1,56 @@ +#!/usr/bin/env perl + +use strict; +use warnings; + +use File::Find qw<find>; +use IPC::System::Simple qw<capture>; + +@ARGV ge 1 || die "Usage: $0 <dir1> [<dir2> ...]\n"; + +my %sizes; +my %md5s; + +sub fill_sizes { + return if -d "$_"; # Skip directories + + my $size = capture("stat", "--printf=%s", $_); + $sizes{$size} ||= []; + push @{$sizes{$size}}, $File::Find::name; +} + +print STDERR "Collecting file sizes...\n"; +find(\&fill_sizes, $_) foreach (@ARGV); + +print STDERR "Computing md5s of files with same size...\n"; +my @progress = (0, 0, scalar(keys(%sizes))); +foreach my $size (keys(%sizes)) { + $progress[0]++; + $progress[1] = 0; + my @same_size_files = @{$sizes{$size}}; + next unless @same_size_files gt 1; # Discard unique sizes + + + foreach my $file (@same_size_files) { + $progress[1]++; + print STDERR "$progress[0].$progress[1] / $progress[2]\r"; + STDERR->flush(); + + my ($md5) = capture("md5sum", "-z", $file) =~ m/^([^ ]+)/; + $md5s{$md5} ||= []; + push @{$md5s{$md5}}, $file; + } +} + +foreach my $md5 (keys(%md5s)) { + my @same_md5_files = @{$md5s{$md5}}; + next unless @same_md5_files gt 1; # Discard unique hashes + + print "Found duplicate files:\n"; + foreach my $file (@same_md5_files) { + print "\t$file\n"; + } + printf "\n"; +} + +printf STDERR "Done!\n"; |