#!/usr/bin/env perl use strict; use warnings; use File::Find qw; use IPC::System::Simple qw; @ARGV ge 1 || die "Usage: $0 [ ...]\n"; my %sizes; my %md5s; sub fill_sizes { return if -d "$_"; # Skip directories my $size = capture("stat", "--printf=%s", $_); $sizes{$size} ||= []; push @{$sizes{$size}}, $File::Find::name; } print STDERR "Collecting file sizes...\n"; find(\&fill_sizes, $_) foreach (@ARGV); print STDERR "Computing md5s of files with same size...\n"; my @progress = (0, 0, scalar(keys(%sizes))); foreach my $size (keys(%sizes)) { $progress[0]++; $progress[1] = 0; my @same_size_files = @{$sizes{$size}}; next unless @same_size_files gt 1; # Discard unique sizes foreach my $file (@same_size_files) { $progress[1]++; print STDERR "$progress[0].$progress[1] / $progress[2]\r"; STDERR->flush(); my ($md5) = capture("md5sum", "-z", $file) =~ m/^([^ ]+)/; $md5s{$md5} ||= []; push @{$md5s{$md5}}, $file; } } foreach my $md5 (keys(%md5s)) { my @same_md5_files = @{$md5s{$md5}}; next unless @same_md5_files gt 1; # Discard unique hashes print "Found duplicate files:\n"; foreach my $file (@same_md5_files) { print "\t$file\n"; } printf "\n"; } printf STDERR "Done!\n";