[mirrorbrain-commits] [opensuse-svn] r7622 - trunk/tools/download-redirector-v2/tools

From: Novell Forge SVN <noreply_at_novell.com>
Date: Fri, 26 Jun 2009 04:59:03 -0600 (MDT)
Author: poeml
Date: 2009-06-26 04:59:01 -0600 (Fri, 26 Jun 2009)
New Revision: 7622

Modified:
   trunk/tools/download-redirector-v2/tools/metalink-hasher.py
Log:
metalink-hasher:
- Automatic removal of old hashes was implemented, which don't have a pendant
  in the file tree anymore.
- A number of things were optimized to run more efficiently on huge trees, mainly 
  by eliminating all redundant stat() calls.
- sha256 was added to the list of digests to generated.
- The need to specify the -b (--base-dir) option was eliminated, so the command is
  easier to use. The option is still there, I'm not sure if it's still useful.
  The command could be simplified further by removing the "update" subcommand, because
  since automatic cleanup is now implemented there maybe isn't a need for other
  subcommands anymore.


Modified: trunk/tools/download-redirector-v2/tools/metalink-hasher.py
===================================================================
--- trunk/tools/download-redirector-v2/tools/metalink-hasher.py	2009-06-26 10:34:36 UTC (rev 7621)
+++ trunk/tools/download-redirector-v2/tools/metalink-hasher.py	2009-06-26 10:59:01 UTC (rev 7622)
@@ -31,52 +31,42 @@
 __url__ = 'http://mirrorbrain.org'
 
 
-import os, os.path
+import os
+import os.path
+import stat
+import shutil
 import cmdln
 import re
 import subprocess
+import errno
 
 ML_EXTENSION = '.metalink-hashes'
 line_mask = re.compile('.*</*(verification|hash|pieces).*>.*')
 
-def make_hashes(src, dst, opts):
-    src_dir = os.path.dirname(src)
-    src_dir_mode = os.stat(src_dir).st_mode
-    dst_dir = os.path.dirname(dst)
+def make_hashes(src, src_statinfo, dst, opts):
 
-    dst = dst + ML_EXTENSION
-
-    if not opts.dry_run:
-        if not os.path.isdir(dst_dir):
-            os.makedirs(dst_dir, mode = 0755)
-        if opts.copy_permissions:
-            os.chmod(dst_dir, src_dir_mode)
-        else:
-            os.chmod(dst_dir, 0755)
-
-    src_mtime = os.path.getmtime(src)
     try:
-        dst_mtime = os.path.getmtime(dst)
-        dst_size = os.path.getsize(dst)
+        dst_statinfo = os.stat(dst)
+        dst_mtime = dst_statinfo.st_mtime
+        dst_size = dst_statinfo.st_size
     except OSError:
         dst_mtime = dst_size = 0 # file missing
 
-    if dst_mtime >= src_mtime and dst_size != 0:
+    if dst_mtime >= src_statinfo.st_mtime and dst_size != 0:
         if opts.verbose:
-            print 'up to date:', src
+            print 'Up to date: %r' % dst
         return 
 
     cmd = [ 'metalink',
             '--nomirrors', 
             '-d', 'md5', 
             '-d', 'sha1', 
+            '-d', 'sha256', 
             '-d', 'sha1pieces',
             src ]
 
-    if opts.verbose or opts.dry_run:
-        print ' '.join(cmd)
-
     if opts.dry_run: 
+        print 'Would run: ', ' '.join(cmd)
         return
 
     o = subprocess.Popen(cmd, stdout=subprocess.PIPE,
@@ -101,7 +91,7 @@
     d.close()
 
     if opts.copy_permissions:
-        os.chmod(dst, os.stat(src).st_mode)
+        os.chmod(dst, src_statinfo.st_mode)
     else:
         os.chmod(dst, 0644)
 
@@ -128,15 +118,22 @@
     def do_update(self, subcmd, opts, startdir):
         """${cmd_name}: Update the hash pieces that are included in metalinks
 
-        Example:
+        Examples:
 
+        metalink-hasher update /srv/mirrors/mozilla -t /srv/metalink-hashes/srv/mirrors/mozilla
+
         metalink-hasher update \\
-           -f '.*.(torrent|iso)$' \\
-          -t /var/lib/apache2/metalink-hashes/srv/ftp/pub/opensuse/distribution/11.0/iso \\
-          -b /srv/ftp-stage/pub/opensuse/distribution/11.0/iso \\
-          /srv/ftp-stage/pub/opensuse/distribution/11.0/iso \\
-          -n
+            -t /srv/metalink-hashes/srv/ftp/pub/opensuse/repositories/home:/poeml \\
+            /srv/ftp-stage/pub/opensuse/repositories/home:/poeml \\
+            -i '^.*/repoview/.*$'
 
+        metalink-hasher update \\
+            -f '.*.(torrent|iso)$' \\
+            -t /var/lib/apache2/metalink-hashes/srv/ftp/pub/opensuse/distribution/11.0/iso \\
+            -b /srv/ftp-stage/pub/opensuse/distribution/11.0/iso \\
+            /srv/ftp-stage/pub/opensuse/distribution/11.0/iso \\
+            -n
+
         ${cmd_usage}
         ${cmd_option_list}
         """
@@ -144,7 +141,8 @@
         if not opts.target_dir:
             sys.exit('You must specify the target directory (-t)')
         if not opts.base_dir:
-            sys.exit('You must specify the base directory (-b)')
+            opts.base_dir = startdir
+            #sys.exit('You must specify the base directory (-b)')
 
         if not opts.target_dir.startswith('/'):
             sys.exit('The target directory must be an absolut path')
@@ -155,42 +153,92 @@
         opts.target_dir = opts.target_dir.rstrip('/')
         opts.base_dir = opts.base_dir.rstrip('/')
 
-        directories = [startdir]
+        directories_todo = [startdir]
 
         if opts.ignore_mask: 
             opts.ignore_mask = re.compile(opts.ignore_mask)
         if opts.file_mask: 
             opts.file_mask = re.compile(opts.file_mask)
 
-        while len(directories)>0:
-            directory = directories.pop()
+        while len(directories_todo) > 0:
+            src_dir = directories_todo.pop()
 
-            for name in os.listdir(directory):
+            src_dir_mode = os.stat(src_dir).st_mode
 
-                fullpath = os.path.join(directory,name)
+            dst_dir = os.path.join(opts.target_dir, src_dir[len(opts.base_dir):].lstrip('/'))
 
-                if os.path.islink(fullpath):
+            if not opts.dry_run:
+                if not os.path.isdir(dst_dir):
+                    os.makedirs(dst_dir, mode = 0755)
+                if opts.copy_permissions:
+                    os.chmod(dst_dir, src_dir_mode)
+                else:
+                    os.chmod(dst_dir, 0755)
+
+            src_names = os.listdir(src_dir)
+            src_names.sort()
+            try:
+                dst_names = os.listdir(dst_dir)
+                dst_names.sort()
+            except OSError, e:
+                if e.errno == errno.ENOENT:
+                    sys.exit('\nSorry, cannot really continue in dry-run mode, because directory %r does not exist.\n'
+                             'You might want to create it:\n'
+                             '  mkdir %s' % (dst_dir, dst_dir))
+
+            for i in dst_names:
+                i_path = os.path.join(dst_dir, i)
+                # removal of obsolete files
+                if i.endswith(ML_EXTENSION):
+                    realname = i[:-len(ML_EXTENSION)]
+                    if (realname not in src_names) \
+                       or (opts.ignore_mask and re.match(opts.ignore_mask, i_path)):
+                        print 'Unlinking obsolete %r' % i_path
+                        if not opts.dry_run: 
+                            try:
+                                os.unlink(i_path)
+                            except:
+                                print 'unlinking failed:', i_path
+                # removal of obsolete directories
+                else:
+                    if i not in src_names:
+                        if os.path.isdir(i_path):
+                            print 'Recursively removing obsolete directory %r' % i_path
+                            if not opts.dry_run: 
+                                shutil.rmtree(i_path)
+                        else:
+                            print 'Unlinking obsolete %r' % i_path
+                            if not opts.dry_run: 
+                                os.unlink(i_path)
+
+            for src_name in src_names:
+
+                src = os.path.join(src_dir, src_name)
+
+                if opts.ignore_mask and re.match(opts.ignore_mask, src):
                     continue
 
-                if opts.ignore_mask and re.match(opts.ignore_mask, fullpath):
+                # stat only once
+                src_statinfo = os.lstat(src)
+                if stat.S_ISLNK(src_statinfo.st_mode):
+                    #print 'ignoring link', src
                     continue
 
-                if os.path.isfile(fullpath):
-                    if not opts.file_mask or re.match(opts.file_mask, name):
-                        #print fullpath
-                        if opts.base_dir:
-                            target = fullpath[len(opts.base_dir):]
-                        else:
-                            target = fullpath
-                        target = os.path.join(opts.target_dir, target.lstrip('/'))
-                        if opts.verbose:
-                            print 'target:', target
-                        make_hashes(fullpath, target, opts=opts)
+                if stat.S_ISREG(src_statinfo.st_mode):
+                    if not opts.file_mask or re.match(opts.file_mask, src_name):
 
-                elif os.path.isdir(fullpath):
-                    directories.append(fullpath)  # It's a directory, store it.
+                        dst_name = src[len(opts.base_dir):].lstrip('/')
+                        dst = os.path.join(opts.target_dir, dst_name)
+                        #if opts.verbose:
+                        #    print 'dst:', dst
 
+                        make_hashes(src, src_statinfo, dst + ML_EXTENSION, opts=opts)
 
+
+                elif stat.S_ISDIR(src_statinfo.st_mode):
+                    directories_todo.append(src)  # It's a directory, store it.
+
+
 if __name__ == '__main__':
     import sys
     metalinks = Metalinks()

_______________________________________________
Opensuse-svn mailing list
Opensuse-svn_at_forge.novell.com
http://forge.novell.com/mailman/listinfo/opensuse-svn


_______________________________________________
mirrorbrain-commits mailing list
Archive: http://mirrorbrain.org/archive/mirrorbrain-commits/

Note: To remove yourself from this list, send a mail with the content
 	unsubscribe
to the address mirrorbrain-commits-request_at_mirrorbrain.org
Received on 2009-06-26Z10:59:37

This archive was generated by hypermail 2.2.0 : 2009-07-10Z19:18:13 GMT