Author: poeml Date: 2009-06-26 04:59:01 -0600 (Fri, 26 Jun 2009) New Revision: 7622 Modified: trunk/tools/download-redirector-v2/tools/metalink-hasher.py Log: metalink-hasher: - Automatic removal of old hashes was implemented, which don't have a pendant in the file tree anymore. - A number of things were optimized to run more efficiently on huge trees, mainly by eliminating all redundant stat() calls. - sha256 was added to the list of digests to generated. - The need to specify the -b (--base-dir) option was eliminated, so the command is easier to use. The option is still there, I'm not sure if it's still useful. The command could be simplified further by removing the "update" subcommand, because since automatic cleanup is now implemented there maybe isn't a need for other subcommands anymore. Modified: trunk/tools/download-redirector-v2/tools/metalink-hasher.py =================================================================== --- trunk/tools/download-redirector-v2/tools/metalink-hasher.py 2009-06-26 10:34:36 UTC (rev 7621) +++ trunk/tools/download-redirector-v2/tools/metalink-hasher.py 2009-06-26 10:59:01 UTC (rev 7622) @@ -31,52 +31,42 @@ __url__ = 'http://mirrorbrain.org' -import os, os.path +import os +import os.path +import stat +import shutil import cmdln import re import subprocess +import errno ML_EXTENSION = '.metalink-hashes' line_mask = re.compile('.*</*(verification|hash|pieces).*>.*') -def make_hashes(src, dst, opts): - src_dir = os.path.dirname(src) - src_dir_mode = os.stat(src_dir).st_mode - dst_dir = os.path.dirname(dst) +def make_hashes(src, src_statinfo, dst, opts): - dst = dst + ML_EXTENSION - - if not opts.dry_run: - if not os.path.isdir(dst_dir): - os.makedirs(dst_dir, mode = 0755) - if opts.copy_permissions: - os.chmod(dst_dir, src_dir_mode) - else: - os.chmod(dst_dir, 0755) - - src_mtime = os.path.getmtime(src) try: - dst_mtime = os.path.getmtime(dst) - dst_size = os.path.getsize(dst) + dst_statinfo = os.stat(dst) + dst_mtime = dst_statinfo.st_mtime + dst_size = dst_statinfo.st_size except OSError: dst_mtime = dst_size = 0 # file missing - if dst_mtime >= src_mtime and dst_size != 0: + if dst_mtime >= src_statinfo.st_mtime and dst_size != 0: if opts.verbose: - print 'up to date:', src + print 'Up to date: %r' % dst return cmd = [ 'metalink', '--nomirrors', '-d', 'md5', '-d', 'sha1', + '-d', 'sha256', '-d', 'sha1pieces', src ] - if opts.verbose or opts.dry_run: - print ' '.join(cmd) - if opts.dry_run: + print 'Would run: ', ' '.join(cmd) return o = subprocess.Popen(cmd, stdout=subprocess.PIPE, @@ -101,7 +91,7 @@ d.close() if opts.copy_permissions: - os.chmod(dst, os.stat(src).st_mode) + os.chmod(dst, src_statinfo.st_mode) else: os.chmod(dst, 0644) @@ -128,15 +118,22 @@ def do_update(self, subcmd, opts, startdir): """${cmd_name}: Update the hash pieces that are included in metalinks - Example: + Examples: + metalink-hasher update /srv/mirrors/mozilla -t /srv/metalink-hashes/srv/mirrors/mozilla + metalink-hasher update \\ - -f '.*.(torrent|iso)$' \\ - -t /var/lib/apache2/metalink-hashes/srv/ftp/pub/opensuse/distribution/11.0/iso \\ - -b /srv/ftp-stage/pub/opensuse/distribution/11.0/iso \\ - /srv/ftp-stage/pub/opensuse/distribution/11.0/iso \\ - -n + -t /srv/metalink-hashes/srv/ftp/pub/opensuse/repositories/home:/poeml \\ + /srv/ftp-stage/pub/opensuse/repositories/home:/poeml \\ + -i '^.*/repoview/.*$' + metalink-hasher update \\ + -f '.*.(torrent|iso)$' \\ + -t /var/lib/apache2/metalink-hashes/srv/ftp/pub/opensuse/distribution/11.0/iso \\ + -b /srv/ftp-stage/pub/opensuse/distribution/11.0/iso \\ + /srv/ftp-stage/pub/opensuse/distribution/11.0/iso \\ + -n + ${cmd_usage} ${cmd_option_list} """ @@ -144,7 +141,8 @@ if not opts.target_dir: sys.exit('You must specify the target directory (-t)') if not opts.base_dir: - sys.exit('You must specify the base directory (-b)') + opts.base_dir = startdir + #sys.exit('You must specify the base directory (-b)') if not opts.target_dir.startswith('/'): sys.exit('The target directory must be an absolut path') @@ -155,42 +153,92 @@ opts.target_dir = opts.target_dir.rstrip('/') opts.base_dir = opts.base_dir.rstrip('/') - directories = [startdir] + directories_todo = [startdir] if opts.ignore_mask: opts.ignore_mask = re.compile(opts.ignore_mask) if opts.file_mask: opts.file_mask = re.compile(opts.file_mask) - while len(directories)>0: - directory = directories.pop() + while len(directories_todo) > 0: + src_dir = directories_todo.pop() - for name in os.listdir(directory): + src_dir_mode = os.stat(src_dir).st_mode - fullpath = os.path.join(directory,name) + dst_dir = os.path.join(opts.target_dir, src_dir[len(opts.base_dir):].lstrip('/')) - if os.path.islink(fullpath): + if not opts.dry_run: + if not os.path.isdir(dst_dir): + os.makedirs(dst_dir, mode = 0755) + if opts.copy_permissions: + os.chmod(dst_dir, src_dir_mode) + else: + os.chmod(dst_dir, 0755) + + src_names = os.listdir(src_dir) + src_names.sort() + try: + dst_names = os.listdir(dst_dir) + dst_names.sort() + except OSError, e: + if e.errno == errno.ENOENT: + sys.exit('\nSorry, cannot really continue in dry-run mode, because directory %r does not exist.\n' + 'You might want to create it:\n' + ' mkdir %s' % (dst_dir, dst_dir)) + + for i in dst_names: + i_path = os.path.join(dst_dir, i) + # removal of obsolete files + if i.endswith(ML_EXTENSION): + realname = i[:-len(ML_EXTENSION)] + if (realname not in src_names) \ + or (opts.ignore_mask and re.match(opts.ignore_mask, i_path)): + print 'Unlinking obsolete %r' % i_path + if not opts.dry_run: + try: + os.unlink(i_path) + except: + print 'unlinking failed:', i_path + # removal of obsolete directories + else: + if i not in src_names: + if os.path.isdir(i_path): + print 'Recursively removing obsolete directory %r' % i_path + if not opts.dry_run: + shutil.rmtree(i_path) + else: + print 'Unlinking obsolete %r' % i_path + if not opts.dry_run: + os.unlink(i_path) + + for src_name in src_names: + + src = os.path.join(src_dir, src_name) + + if opts.ignore_mask and re.match(opts.ignore_mask, src): continue - if opts.ignore_mask and re.match(opts.ignore_mask, fullpath): + # stat only once + src_statinfo = os.lstat(src) + if stat.S_ISLNK(src_statinfo.st_mode): + #print 'ignoring link', src continue - if os.path.isfile(fullpath): - if not opts.file_mask or re.match(opts.file_mask, name): - #print fullpath - if opts.base_dir: - target = fullpath[len(opts.base_dir):] - else: - target = fullpath - target = os.path.join(opts.target_dir, target.lstrip('/')) - if opts.verbose: - print 'target:', target - make_hashes(fullpath, target, opts=opts) + if stat.S_ISREG(src_statinfo.st_mode): + if not opts.file_mask or re.match(opts.file_mask, src_name): - elif os.path.isdir(fullpath): - directories.append(fullpath) # It's a directory, store it. + dst_name = src[len(opts.base_dir):].lstrip('/') + dst = os.path.join(opts.target_dir, dst_name) + #if opts.verbose: + # print 'dst:', dst + make_hashes(src, src_statinfo, dst + ML_EXTENSION, opts=opts) + + elif stat.S_ISDIR(src_statinfo.st_mode): + directories_todo.append(src) # It's a directory, store it. + + if __name__ == '__main__': import sys metalinks = Metalinks() _______________________________________________ Opensuse-svn mailing list Opensuse-svn_at_forge.novell.com http://forge.novell.com/mailman/listinfo/opensuse-svn _______________________________________________ mirrorbrain-commits mailing list Archive: http://mirrorbrain.org/archive/mirrorbrain-commits/ Note: To remove yourself from this list, send a mail with the content unsubscribe to the address mirrorbrain-commits-request_at_mirrorbrain.orgReceived on 2009-06-26Z10:59:37
This archive was generated by hypermail 2.2.0 : 2009-07-10Z19:18:13 GMT