Bugzilla – Attachment 50785 Details for
Bug 118717
remove duplicate files from ftp servers
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Requests
|
IDP Log In
|
Forgot Password
The mentioned script
linkdup (text/plain), 3.36 KB, created by
Forgotten User OS1JNCFbCX
on 2005-09-24 19:59:28 UTC
(
hide
)
Description:
The mentioned script
Filename:
MIME Type:
Creator:
Forgotten User OS1JNCFbCX
Created:
2005-09-24 19:59:28 UTC
Size:
3.36 KB
patch
obsolete
>#!/usr/bin/perl -w ># ># linkdup - replace duplicate files by hardlinks ># Copyright (C) 2005 Robert Schiele <rschiele@uni-mannheim.de> ># ># This program is free software; you can redistribute it and/or modify it ># under the terms of the GNU General Public License as published by the Free ># Software Foundation; either version 2 of the License, or (at your option) ># any later version. ># ># This program is distributed in the hope that it will be useful, but WITHOUT ># ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ># FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ># more details. ># ># You should have received a copy of the GNU General Public License along with ># this program; if not, write to the Free Software Foundation, Inc., 51 ># Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA > >use strict; > ># calculate the name for the cache >my $progname = $0; >$progname =~ s|.*/||; >my $pathname = ($ENV{HOME} or $ENV{LOGDIR}) . "/.$progname"; >my $cachename = "$pathname/cache"; > ># information to be stored into cache >my %sizebyname; >my %mtimebyname; >my %md5byname; > ># store the memory cache to disk >sub storecache() >{ > print "Storing cache...\n"; > mkdir $pathname; > open CACHEFILE, "| gzip -c > \"$cachename\""; > for (keys %sizebyname) { > print CACHEFILE "$sizebyname{$_} $mtimebyname{$_} $md5byname{$_} $_\n"; > } > close CACHEFILE; >} > ># reread the memory cache from disk and skip files that do no longer exist >sub readcache() >{ > if (-r $cachename) { > print "Reading cache...\n"; > open CACHEFILE, "gzip -cd \"$cachename\" |"; > while (<CACHEFILE>) { > chomp; > my ($size, $mtime, $md5, $file) = split(' ', $_, 4); > next unless -r $file; > $sizebyname{$file} = $size; > $mtimebyname{$file} = $mtime; > $md5byname{$file} = $md5; > } > close CACHEFILE; > } >} > ># information gathered by checking files >my $savings = 0; >my %inobyname; >my %namebycontents; > ># check file whether it is a duplicate and relink in that case >sub checkfile($) >{ > my $file = shift; > my ($dummy, $dev, $ino, $size, $mtime); > ($dev, $ino, $dummy, $dummy, $dummy, $dummy, $dummy, $size, $dummy, > $mtime) = stat($file); > my $md5; > if (defined $sizebyname{$file} and $sizebyname{$file} == $size and > $mtimebyname{$file} == $mtime) { > $md5 = $md5byname{$file}; > } else { > print "Calculating MD5 for $file...\n"; > $md5 = `md5sum "$file" | head -c 32`; > } > if (defined $namebycontents{"$dev-$md5-$size"}) { > my $otherfile = $namebycontents{"$dev-$md5-$size"}; > if ($inobyname{$otherfile} != $ino) { > print "Duplicate found $otherfile -> $file...\n"; > unlink($file); > link($otherfile, $file); > $savings += $size; > $ino = $inobyname{$otherfile}; > $mtime = $mtimebyname{$otherfile}; > } > } else { > $namebycontents{"$dev-$md5-$size"} = $file; > } > $inobyname{$file} = $ino; > $mtimebyname{$file} = $mtime; > $sizebyname{$file} = $size; > $md5byname{$file} = $md5; >} > ># check every regular file within a specified directory >sub scandir($) >{ > my $reldir = shift; > my $dir = `readlink -e "$reldir"` or > print "Unable to scan $reldir!\n" and > return; > chomp $dir; > print "Scanning $dir...\n"; > open FILELIST, "find \"$dir\" -type f |"; > while (<FILELIST>) { > chomp; > checkfile($_); > } > close FILELIST; >} > ># do the actual work >readcache; >for (@ARGV) { > scandir($_); >} >storecache; >print "Savings: $savings\n";
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
Actions:
View
Attachments on
bug 118717
: 50785