From 04446011f364909096cb49f2b66e4df96d713209 Mon Sep 17 00:00:00 2001 From: Guillermo Ramos Date: Tue, 13 Apr 2021 18:17:14 +0200 Subject: Initial commit --- dups | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100755 dups (limited to 'dups') diff --git a/dups b/dups new file mode 100755 index 0000000..bbf357e --- /dev/null +++ b/dups @@ -0,0 +1,56 @@ +#!/usr/bin/env perl + +use strict; +use warnings; + +use File::Find qw; +use IPC::System::Simple qw; + +@ARGV ge 1 || die "Usage: $0 [ ...]\n"; + +my %sizes; +my %md5s; + +sub fill_sizes { + return if -d "$_"; # Skip directories + + my $size = capture("stat", "--printf=%s", $_); + $sizes{$size} ||= []; + push @{$sizes{$size}}, $File::Find::name; +} + +print STDERR "Collecting file sizes...\n"; +find(\&fill_sizes, $_) foreach (@ARGV); + +print STDERR "Computing md5s of files with same size...\n"; +my @progress = (0, 0, scalar(keys(%sizes))); +foreach my $size (keys(%sizes)) { + $progress[0]++; + $progress[1] = 0; + my @same_size_files = @{$sizes{$size}}; + next unless @same_size_files gt 1; # Discard unique sizes + + + foreach my $file (@same_size_files) { + $progress[1]++; + print STDERR "$progress[0].$progress[1] / $progress[2]\r"; + STDERR->flush(); + + my ($md5) = capture("md5sum", "-z", $file) =~ m/^([^ ]+)/; + $md5s{$md5} ||= []; + push @{$md5s{$md5}}, $file; + } +} + +foreach my $md5 (keys(%md5s)) { + my @same_md5_files = @{$md5s{$md5}}; + next unless @same_md5_files gt 1; # Discard unique hashes + + print "Found duplicate files:\n"; + foreach my $file (@same_md5_files) { + print "\t$file\n"; + } + printf "\n"; +} + +printf STDERR "Done!\n"; -- cgit v1.2.3