-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfinddups_md5
More file actions
executable file
·118 lines (106 loc) · 2.42 KB
/
finddups_md5
File metadata and controls
executable file
·118 lines (106 loc) · 2.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/perl -w
use strict;
use Digest::MD5 qw(md5); # Prefer not to do explosive number of diffs...
use MIME::Base64;
# Find duplicate files, either under the current directory or a named
# directory.
main();
#########################
sub main
{
my $cdir = handle_args(@ARGV);
if(defined $cdir)
{chdir($cdir) || die "Could not enter $cdir:$!\n";}
my %cstor; # Storage for filenames-to-data...
print "Reading data...\n";
parsedir(\%cstor, '.');
print "Data read. Processing...\n";
map
{
print "Duplicate MD5: [" . format_md5($_) . "]\n";
print "\tFiles:\n"
. join("\n", map{"\t[$_]"} (keys %{$cstor{MD5}{$_}}) );
print "\n\n";
} (grep {keys %{$cstor{MD5}{$_}} > 1} keys %{$cstor{MD5}}) # All keys with more than one file associated with them..
}
sub parsedir
{
my ($cstor, $dir) = @_;
opendir(DIR, $dir) || die "Could not open directory $dir:$!\n";
my @files = grep {!/^\./} readdir(DIR);
closedir(DIR);
foreach my $file (@files)
{
if(-l "$dir/$file")
{
# print "LINK: $dir/$file\n";
}
elsif(-d "$dir/$file")
{
parsedir($cstor, "$dir/$file");
}
elsif(-f "$dir/$file")
{
# print "FILE: $dir/$file\n";
my ($dev, $inode, undef, undef, undef, undef, undef, $size) = stat("$dir/$file");
$$cstor{PRI}{"$dir/$file"}{size} = $size; # Index by file..
$$cstor{PRI}{"$dir/$file"}{ident} = "$dev-$inode";
if(keys %{$$cstor{SIZ}{$size}})
{ # We've seen a file of this size before...
my $md5sum = get_md5("$dir/$file");
$$cstor{MD5}{$md5sum}{"$dir/$file"} = 1; # Note our md5sum..
if(keys %{$$cstor{SIZ}{$size}} > 1) # All others are initialised, so just initialise this one, as above
{
}
else
{ # lazy-initialise the other one, because we're completing a size-pair
my ($otherfile) = keys %{$$cstor{SIZ}{$size}};
my $omd5 = get_md5($otherfile);
$$cstor{MD5}{$omd5}{$otherfile} = 1;
}
}
$$cstor{SIZ}{$size}{"$dir/$file"} = 1; # Index by size
}
else
{
# print "UNKNOWN: $dir/$file\n";
}
}
}
sub handle_args
{
my @args = @_;
if(@args == 1)
{
if(-d $args[0])
{return $args[0];}
}
elsif(@args == 0)
{
return;
}
die "Usage: finddups [DIRNAME]\n";
}
sub get_md5
{
my ($file) = @_;
open(IFIL, $file) || die "Could not read $file:$!\n";
local $/; # slurrp
my $fdat = readline(IFIL);
close(IFIL);
return md5($fdat);
}
sub format_md5
{
my ($in) = @_;
my $fmt = encode_base64($in);
chomp $fmt;
$fmt =~ s/==$//g;
return $fmt;
}
#sub passchomp
#{
#my ($in) = @_;
#chomp $in;
#return $in;
#}