[mirrorbrain-commits] r7787 - in /trunk: mod_mirrorbrain/mod_mirrorbrain.c tools/metalink-hasher.py

From: <poeml_at_mirrorbrain.org>
Date: Fri, 04 Sep 2009 14:27:35 -0000
Author: poeml
Date: Fri Sep  4 16:27:34 2009
New Revision: 7787

URL: http://svn.mirrorbrain.org/viewvc/mirrorbrain?rev=7787&view=rev
Log:
metalink-hasher: 
- new implementation of the metalink hash cache. In addition to the
  modification time, the file inode number is now used as criterion to
  invalidate cached metalink hashes. (See https://bugzilla.novell.com/536495)
mod_mirrorbrain:
- use the new inode-wise metalink hashes, but fall back to the previous scheme,
  so that the transition should be seamless.
- bump version to 2.10.0.

Modified:
    trunk/mod_mirrorbrain/mod_mirrorbrain.c
    trunk/tools/metalink-hasher.py

Modified: trunk/mod_mirrorbrain/mod_mirrorbrain.c
URL: http://svn.mirrorbrain.org/viewvc/mirrorbrain/trunk/mod_mirrorbrain/mod_mirrorbrain.c?rev=7787&r1=7786&r2=7787&view=diff
==============================================================================
--- trunk/mod_mirrorbrain/mod_mirrorbrain.c (original)
+++ trunk/mod_mirrorbrain/mod_mirrorbrain.c Fri Sep  4 16:27:34 2009
_at_@ -73,7 +73,7 @@
 #define UNSET (-1)
 #endif
 
-#define MOD_MIRRORBRAIN_VER "2.9.2"
+#define MOD_MIRRORBRAIN_VER "2.10.0"
 #define VERSION_COMPONENT "mod_mirrorbrain/"MOD_MIRRORBRAIN_VER
 
 #ifdef NO_MOD_GEOIP
_at_@ -810,7 +810,7 @@
             } 
         }
 
-        /* is the requested file too small? DECLINED */
+        /* is the requested file too small to be worth a redirect? */
         if (!mirrorlist && !metalink_forced && (r->finfo.size < cfg->min_size)) {
             debugLog(r, cfg, "File '%s' too small (%d bytes, less than %d)", 
                     r->filename, (int) r->finfo.size, (int) cfg->min_size);
_at_@ -1579,24 +1579,58 @@
 
         /* inject hashes, if they are prepared on-disk */
         apr_finfo_t sb;
-        const char *hashfilename;
-        hashfilename = apr_pstrcat(r->pool, 
+        const char *hashfilename;     /* the new hash filename contains the inode of the file */
+        const char *old_hashfilename; /* for a transition period - will be depreciated later */
+        hashfilename = apr_psprintf(r->pool, "%s%s.inode_%lu", 
+                                   scfg->metalink_hashes_prefix ? scfg->metalink_hashes_prefix : "", 
+                                   r->filename, 
+                                   r->finfo.inode);
+        old_hashfilename = apr_pstrcat(r->pool, 
                                    scfg->metalink_hashes_prefix ? scfg->metalink_hashes_prefix : "", 
                                    r->filename, 
                                    ".metalink-hashes", 
                                    NULL);
-        if (apr_stat(&sb, hashfilename, APR_FINFO_MIN, r->pool) == APR_SUCCESS
-            && (sb.filetype == APR_REG) && (sb.mtime >= r->finfo.mtime)) {
-            debugLog(r, cfg, "Found up-to-date hashfile '%s', injecting", hashfilename);
-
-            apr_file_t *fh;
-            rv = apr_file_open(&fh, hashfilename, APR_READ, APR_OS_DEFAULT, r->pool);
-            if (rv == APR_SUCCESS) {
-                ap_send_fd(fh, r, 0, sb.size, &len);
-
-                apr_file_close(fh);
+
+        if (apr_stat(&sb, hashfilename, APR_FINFO_MIN, r->pool) == APR_SUCCESS && (sb.filetype == APR_REG)) {
+            debugLog(r, cfg, "hashfile '%s' exists", hashfilename);
+
+            if (sb.mtime >= r->finfo.mtime) {
+                debugLog(r, cfg, "hashfile '%s' up to date, injecting", hashfilename);
+
+                apr_file_t *fh;
+                rv = apr_file_open(&fh, hashfilename, APR_READ, APR_OS_DEFAULT, r->pool);
+                if (rv == APR_SUCCESS) {
+                    ap_send_fd(fh, r, 0, sb.size, &len);
+                    apr_file_close(fh);
+                } else {
+                    ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, 
+                                  "[mod_mirrorbrain] could not open hashfile '%s'.", hashfilename);
+                }
+            } else {
+                debugLog(r, cfg, "hashfile '%s' outdated, ignoring", hashfilename);
             }
-        }
+
+        } else if (apr_stat(&sb, old_hashfilename, APR_FINFO_MIN, r->pool) == APR_SUCCESS && (sb.filetype == APR_REG)) {
+            debugLog(r, cfg, "old_hashfile '%s' exists", old_hashfilename);
+
+            if (sb.mtime >= r->finfo.mtime) {
+                debugLog(r, cfg, "old_hashfile '%s' up to date, injecting", old_hashfilename);
+
+                apr_file_t *fh;
+                rv = apr_file_open(&fh, old_hashfilename, APR_READ, APR_OS_DEFAULT, r->pool);
+                if (rv == APR_SUCCESS) {
+                    ap_send_fd(fh, r, 0, sb.size, &len);
+                    apr_file_close(fh);
+                } else {
+                    ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, 
+                                  "[mod_mirrorbrain] could not open old_hashfile '%s'.", old_hashfilename);
+                }
+            } else {
+                debugLog(r, cfg, "old_hashfile '%s' outdated, ignoring", old_hashfilename);
+            }
+        } else {
+            debugLog(r, cfg, "no hash file found (%s, %s)", hashfilename, old_hashfilename);
+        } 
 
         ap_rputs(     "      <resources>\n\n", r);
 

Modified: trunk/tools/metalink-hasher.py
URL: http://svn.mirrorbrain.org/viewvc/mirrorbrain/trunk/tools/metalink-hasher.py?rev=7787&r1=7786&r2=7787&view=diff
==============================================================================
--- trunk/tools/metalink-hasher.py (original)
+++ trunk/tools/metalink-hasher.py Fri Sep  4 16:27:34 2009
_at_@ -40,62 +40,98 @@
 import subprocess
 import errno
 
-ML_EXTENSION = '.metalink-hashes'
 line_mask = re.compile('.*</*(verification|hash|pieces).*>.*')
 
-def make_hashes(src, src_statinfo, dst, opts):
-
-    try:
-        dst_statinfo = os.stat(dst)
-        dst_mtime = dst_statinfo.st_mtime
-        dst_size = dst_statinfo.st_size
-    except OSError:
-        dst_mtime = dst_size = 0 # file missing
-
-    if dst_mtime >= src_statinfo.st_mtime and dst_size != 0:
-        if opts.verbose:
-            print 'Up to date: %r' % dst
-        return 
-
-    cmd = [ 'metalink',
-            '--nomirrors', 
-            '-d', 'md5', 
-            '-d', 'sha1', 
-            '-d', 'sha256', 
-            '-d', 'sha1pieces',
-            src ]
-
-    if opts.dry_run: 
-        print 'Would run: ', ' '.join(cmd)
-        return
-
-    sys.stdout.flush()
-    o = subprocess.Popen(cmd, stdout=subprocess.PIPE,
-                    close_fds=True).stdout
-    lines = []
-    for line in o.readlines():
-        if re.match(line_mask, line):
-            line = line.replace('\t\t', ' ' * 6)
-            lines.append(line)
-
-
-    # if present, add PGP signature into the <verification> block
-    if os.path.exists(src + '.asc'):
-        sig = open(src + '.asc').read()
-        sig = '        <signature type="pgp" file="%s.asc">\n' % os.path.basename(src) + \
-              sig + \
-              '\n        </signature>\n'
-
-        lines.insert(1, sig)
-
-    d = open(dst, 'wb')
-    d.write(''.join(lines))
-    d.close()
-
-    if opts.copy_permissions:
-        os.chmod(dst, src_statinfo.st_mode)
-    else:
-        os.chmod(dst, 0644)
+class Hasheable:
+    """represent a file and its metadata"""
+    def __init__(self, basename, src_dir=None, dst_dir=None):
+        self.basename = basename
+        if src_dir:
+            self.src_dir = src_dir
+        else:
+            self.src_dir = os.path.dirname(self.basename)
+
+        self.src = os.path.join(src_dir, self.basename)
+
+        self.finfo = os.lstat(self.src)
+        self.mtime = self.finfo.st_mtime
+        self.size  = self.finfo.st_size
+        self.inode = self.finfo.st_ino
+        self.mode  = self.finfo.st_mode
+
+        self.dst_dir = dst_dir
+
+        self.dst_basename = '%s.inode_%s' % (self.basename, self.inode)
+        self.dst = os.path.join(self.dst_dir, self.dst_basename)
+
+    def islink(self):
+        return stat.S_ISLNK(self.mode)
+    def isreg(self):
+        return stat.S_ISREG(self.mode)
+    def isdir(self):
+        return stat.S_ISDIR(self.mode)
+
+    def do_hashes(self, verbose=False, dry_run=False, copy_permissions=True):
+        try:
+            dst_statinfo = os.stat(self.dst)
+            dst_mtime = dst_statinfo.st_mtime
+            dst_size = dst_statinfo.st_size
+        except OSError:
+            dst_mtime = dst_size = 0 # file missing
+
+        if dst_mtime >= self.mtime and dst_size != 0:
+            if verbose:
+                print 'Up to date: %r' % self.dst
+            return 
+
+        cmd = [ 'metalink',
+                '--nomirrors', 
+                '-d', 'md5', 
+                '-d', 'sha1', 
+                '-d', 'sha256', 
+                '-d', 'sha1pieces',
+                self.src ]
+
+        if dry_run: 
+            print 'Would run: ', ' '.join(cmd)
+            return
+
+        sys.stdout.flush()
+        o = subprocess.Popen(cmd, stdout=subprocess.PIPE,
+                        close_fds=True).stdout
+        lines = []
+        for line in o.readlines():
+            if re.match(line_mask, line):
+                line = line.replace('\t\t', ' ' * 6)
+                lines.append(line)
+
+
+        # if present, add PGP signature into the <verification> block
+        if os.path.exists(self.src + '.asc'):
+            sig = open(self.src + '.asc').read()
+            sig = '        <signature type="pgp" file="%s.asc">\n' % self.basename + \
+                  sig + \
+                  '\n        </signature>\n'
+
+            lines.insert(1, sig)
+
+        d = open(self.dst, 'wb')
+        d.write(''.join(lines))
+        d.close()
+
+        if copy_permissions:
+            os.chmod(self.dst, self.mode)
+        else:
+            os.chmod(self.dst, 0644)
+
+    #def __eq__(self, other):
+    #    return self.basename == other.basename
+    #def __eq__(self, basename):
+    #    return self.basename == basename
+        
+    def __str__(self):
+        return self.basename
+
 
 
 class Metalinks(cmdln.Cmdln):
_at_@ -190,8 +226,6 @@
                 else:
                     os.chmod(dst_dir, 0755)
 
-            src_names = set(os.listdir(src_dir))
-            #print 'doing', src_dir
             try:
                 dst_names = os.listdir(dst_dir)
                 dst_names.sort()
_at_@ -201,75 +235,85 @@
                              'You might want to create it:\n'
                              '  mkdir %s' % (dst_dir, dst_dir))
 
-            for i in dst_names:
-                i_path = os.path.join(dst_dir, i)
-                # removal of obsolete files
-                if i.endswith(ML_EXTENSION):
-                    realname = i[:-len(ML_EXTENSION)]
-                    if (realname not in src_names) \
-                       or (opts.ignore_mask and re.match(opts.ignore_mask, i_path)):
-                        print 'Unlinking obsolete %r' % i_path
-                        if not opts.dry_run: 
-                            try:
-                                os.unlink(i_path)
-                            except OSError, e:
-                                sys.stderr.write('Unlink failed for %r: %s\n' \
-                                                    % (i_path, os.strerror(e.errno)))
-                        unlinked_files += 1
-                # removal of obsolete directories
-                else:
-                    if i not in src_names or (opts.ignore_mask and re.match(opts.ignore_mask, i_path)):
-                        if os.path.isdir(i_path):
-                            print 'Recursively removing obsolete directory %r' % i_path
-                            if not opts.dry_run: 
-                                try:
-                                    shutil.rmtree(i_path)
-                                except OSError, e:
-                                    if e.errno == errno.EACCES:
-                                        sys.stderr.write('Recursive removing failed for %r (%s). Ignoring.\n' \
-                                                            % (i_path, os.strerror(e.errno)))
-                                    else:
-                                        sys.exit('Recursive removing failed for %r: %s\n' \
-                                                            % (i_path, os.strerror(e.errno)))
-                            unlinked_dirs += 1
-                        else:
-                            print 'Unlinking obsolete %r' % i_path
-                            if not opts.dry_run: 
-                                os.unlink(i_path)
-                            unlinked_files += 1
-
-            for src_name in sorted(src_names):
-
-                src = os.path.join(src_dir, src_name)
+
+            # a set offers the fastest access for "foo in ..." lookups
+            src_basenames = set(os.listdir(src_dir))
+            #print 'doing', src_dir
+
+            dst_keep = set()
+
+            for src_basename in sorted(src_basenames):
+                src = os.path.join(src_dir, src_basename)
 
                 if opts.ignore_mask and re.match(opts.ignore_mask, src):
                     continue
 
                 # stat only once
                 try:
-                    src_statinfo = os.lstat(src)
+                    hasheable = Hasheable(src_basename, src_dir=src_dir, dst_dir=dst_dir)
                 except OSError, e:
                     if e.errno == errno.ENOENT:
                         sys.stderr.write('File vanished: %r\n' % src)
                         continue
 
-                if stat.S_ISLNK(src_statinfo.st_mode):
-                    #print 'ignoring link', src
+                if hasheable.islink():
+                    print 'ignoring link', src
                     continue
 
-                if stat.S_ISREG(src_statinfo.st_mode):
-                    if not opts.file_mask or re.match(opts.file_mask, src_name):
-
-                        dst_name = src[len(opts.base_dir):].lstrip('/')
-                        dst = os.path.join(opts.target_dir, dst_name)
+                elif hasheable.isreg():
+                    if not opts.file_mask or re.match(opts.file_mask, src_basename):
                         #if opts.verbose:
                         #    print 'dst:', dst
-
-                        make_hashes(src, src_statinfo, dst + ML_EXTENSION, opts=opts)
-
-
-                elif stat.S_ISDIR(src_statinfo.st_mode):
+                        hasheable.do_hashes(verbose=opts.verbose, 
+                                            dry_run=opts.dry_run, 
+                                            copy_permissions=opts.copy_permissions)
+                        dst_keep.add(hasheable.dst_basename)
+
+                elif hasheable.isdir():
                     directories_todo.append(src)  # It's a directory, store it.
+                    dst_keep.add(hasheable.basename)
+
+
+            dst_remove = set(dst_names) - dst_keep
+
+            # print 'files to keep:'
+            # print dst_keep
+            # print
+            # print 'files to remove:'
+            # print dst_remove
+            # print
+
+            for i in sorted(dst_remove):
+                i_path = os.path.join(dst_dir, i)
+                #print i_path
+
+                if (opts.ignore_mask and re.match(opts.ignore_mask, i_path)):
+                    print 'ignoring, not removing %s', i_path
+                    continue
+
+                if os.path.isdir(i_path):
+                    print 'Recursively removing obsolete directory %r' % i_path
+                    if not opts.dry_run: 
+                        try:
+                            shutil.rmtree(i_path)
+                        except OSError, e:
+                            if e.errno == errno.EACCES:
+                                sys.stderr.write('Recursive removing failed for %r (%s). Ignoring.\n' \
+                                                    % (i_path, os.strerror(e.errno)))
+                            else:
+                                sys.exit('Recursive removing failed for %r: %s\n' \
+                                                    % (i_path, os.strerror(e.errno)))
+                    unlinked_dirs += 1
+                    
+                else:
+                    print 'Unlinking obsolete %r' % i_path
+                    if not opts.dry_run: 
+                        try:
+                            os.unlink(i_path)
+                        except OSError, e:
+                            sys.stderr.write('Unlink failed for %r: %s\n' \
+                                                % (i_path, os.strerror(e.errno)))
+                    unlinked_files += 1
 
 
         if  unlinked_files or unlinked_dirs:




_______________________________________________
mirrorbrain-commits mailing list
Archive: http://mirrorbrain.org/archive/mirrorbrain-commits/

Note: To remove yourself from this list, send a mail with the content
 	unsubscribe
to the address mirrorbrain-commits-request_at_mirrorbrain.org
Received on Fri Sep 04 2009 - 14:27:37 GMT

This archive was generated by hypermail 2.3.0 : Mon Feb 20 2012 - 23:47:04 GMT