#!/usr/bin/perl use strict; use warnings; use Digest::MD5; use FileHandle; use Getopt::Std; use Cwd; my $context=new Digest::MD5; my %filelist; sub md5sum ($); sub processdirectory ($); sub md5sum ($) { my $testfile = shift; my $filehandler=new FileHandle "<$testfile"; binmode($filehandler); $context->reset(); $context->addfile($filehandler); return $context->hexdigest(); } sub processdirectory ($) { my $targetdir = shift; my $startdir = cwd; my $dirhandle; printf "processing $targetdir\n"; chdir ($targetdir); opendir($dirhandle, ".") || die "couldn't open directory"; my @direntries = readdir($dirhandle); foreach my $filename (@direntries) { if (($filename ne "\.") && ($filename ne "\.\.")) { if (-d($filename)) { processdirectory("$filename"); } elsif (not -l($filename)) { my $fullfilename = cwd . "/" . $filename; $filelist{$fullfilename} = (stat($filename))[7]; } } } #end foreach closedir($dirhandle); chdir ($startdir); } # processdirectory # main my %potentialduplicates; my $lastfilesize = -1; my $lastfilehash = "none"; my $lastfilename; my $file; foreach my $direntry (@ARGV) { processdirectory($direntry); } foreach $file (sort {$filelist{$b} <=> $filelist{$a} } keys %filelist) { if ($lastfilesize == $filelist{$file}) { $potentialduplicates{$file} = md5sum($file); $potentialduplicates{$lastfilename} = md5sum($lastfilename); } $lastfilename = $file; $lastfilesize = $filelist{$file}; } foreach $file (sort {$potentialduplicates{$b} cmp $potentialduplicates{$a} } keys %potentialduplicates) { if ($lastfilehash eq $potentialduplicates{$file}) { print $file . " and " . $lastfilename . " are duplicates with a hash of " . $lastfilehash . "\n"; } $lastfilename = $file; $lastfilehash = $potentialduplicates{$file}; }