Author: poeml Date: Fri Sep 4 16:27:34 2009 New Revision: 7787 URL: http://svn.mirrorbrain.org/viewvc/mirrorbrain?rev=7787&view=rev Log: metalink-hasher: - new implementation of the metalink hash cache. In addition to the modification time, the file inode number is now used as criterion to invalidate cached metalink hashes. (See https://bugzilla.novell.com/536495) mod_mirrorbrain: - use the new inode-wise metalink hashes, but fall back to the previous scheme, so that the transition should be seamless. - bump version to 2.10.0. Modified: trunk/mod_mirrorbrain/mod_mirrorbrain.c trunk/tools/metalink-hasher.py Modified: trunk/mod_mirrorbrain/mod_mirrorbrain.c URL: http://svn.mirrorbrain.org/viewvc/mirrorbrain/trunk/mod_mirrorbrain/mod_mirrorbrain.c?rev=7787&r1=7786&r2=7787&view=diff ============================================================================== --- trunk/mod_mirrorbrain/mod_mirrorbrain.c (original) +++ trunk/mod_mirrorbrain/mod_mirrorbrain.c Fri Sep 4 16:27:34 2009 @@ -73,7 +73,7 @@ #define UNSET (-1) #endif -#define MOD_MIRRORBRAIN_VER "2.9.2" +#define MOD_MIRRORBRAIN_VER "2.10.0" #define VERSION_COMPONENT "mod_mirrorbrain/"MOD_MIRRORBRAIN_VER #ifdef NO_MOD_GEOIP @@ -810,7 +810,7 @@ } } - /* is the requested file too small? DECLINED */ + /* is the requested file too small to be worth a redirect? */ if (!mirrorlist && !metalink_forced && (r->finfo.size < cfg->min_size)) { debugLog(r, cfg, "File '%s' too small (%d bytes, less than %d)", r->filename, (int) r->finfo.size, (int) cfg->min_size); @@ -1579,24 +1579,58 @@ /* inject hashes, if they are prepared on-disk */ apr_finfo_t sb; - const char *hashfilename; - hashfilename = apr_pstrcat(r->pool, + const char *hashfilename; /* the new hash filename contains the inode of the file */ + const char *old_hashfilename; /* for a transition period - will be depreciated later */ + hashfilename = apr_psprintf(r->pool, "%s%s.inode_%lu", + scfg->metalink_hashes_prefix ? scfg->metalink_hashes_prefix : "", + r->filename, + r->finfo.inode); + old_hashfilename = apr_pstrcat(r->pool, scfg->metalink_hashes_prefix ? scfg->metalink_hashes_prefix : "", r->filename, ".metalink-hashes", NULL); - if (apr_stat(&sb, hashfilename, APR_FINFO_MIN, r->pool) == APR_SUCCESS - && (sb.filetype == APR_REG) && (sb.mtime >= r->finfo.mtime)) { - debugLog(r, cfg, "Found up-to-date hashfile '%s', injecting", hashfilename); - - apr_file_t *fh; - rv = apr_file_open(&fh, hashfilename, APR_READ, APR_OS_DEFAULT, r->pool); - if (rv == APR_SUCCESS) { - ap_send_fd(fh, r, 0, sb.size, &len); - - apr_file_close(fh); + + if (apr_stat(&sb, hashfilename, APR_FINFO_MIN, r->pool) == APR_SUCCESS && (sb.filetype == APR_REG)) { + debugLog(r, cfg, "hashfile '%s' exists", hashfilename); + + if (sb.mtime >= r->finfo.mtime) { + debugLog(r, cfg, "hashfile '%s' up to date, injecting", hashfilename); + + apr_file_t *fh; + rv = apr_file_open(&fh, hashfilename, APR_READ, APR_OS_DEFAULT, r->pool); + if (rv == APR_SUCCESS) { + ap_send_fd(fh, r, 0, sb.size, &len); + apr_file_close(fh); + } else { + ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, + "[mod_mirrorbrain] could not open hashfile '%s'.", hashfilename); + } + } else { + debugLog(r, cfg, "hashfile '%s' outdated, ignoring", hashfilename); } - } + + } else if (apr_stat(&sb, old_hashfilename, APR_FINFO_MIN, r->pool) == APR_SUCCESS && (sb.filetype == APR_REG)) { + debugLog(r, cfg, "old_hashfile '%s' exists", old_hashfilename); + + if (sb.mtime >= r->finfo.mtime) { + debugLog(r, cfg, "old_hashfile '%s' up to date, injecting", old_hashfilename); + + apr_file_t *fh; + rv = apr_file_open(&fh, old_hashfilename, APR_READ, APR_OS_DEFAULT, r->pool); + if (rv == APR_SUCCESS) { + ap_send_fd(fh, r, 0, sb.size, &len); + apr_file_close(fh); + } else { + ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, + "[mod_mirrorbrain] could not open old_hashfile '%s'.", old_hashfilename); + } + } else { + debugLog(r, cfg, "old_hashfile '%s' outdated, ignoring", old_hashfilename); + } + } else { + debugLog(r, cfg, "no hash file found (%s, %s)", hashfilename, old_hashfilename); + } ap_rputs( " <resources>\n\n", r); Modified: trunk/tools/metalink-hasher.py URL: http://svn.mirrorbrain.org/viewvc/mirrorbrain/trunk/tools/metalink-hasher.py?rev=7787&r1=7786&r2=7787&view=diff ============================================================================== --- trunk/tools/metalink-hasher.py (original) +++ trunk/tools/metalink-hasher.py Fri Sep 4 16:27:34 2009 @@ -40,62 +40,98 @@ import subprocess import errno -ML_EXTENSION = '.metalink-hashes' line_mask = re.compile('.*</*(verification|hash|pieces).*>.*') -def make_hashes(src, src_statinfo, dst, opts): - - try: - dst_statinfo = os.stat(dst) - dst_mtime = dst_statinfo.st_mtime - dst_size = dst_statinfo.st_size - except OSError: - dst_mtime = dst_size = 0 # file missing - - if dst_mtime >= src_statinfo.st_mtime and dst_size != 0: - if opts.verbose: - print 'Up to date: %r' % dst - return - - cmd = [ 'metalink', - '--nomirrors', - '-d', 'md5', - '-d', 'sha1', - '-d', 'sha256', - '-d', 'sha1pieces', - src ] - - if opts.dry_run: - print 'Would run: ', ' '.join(cmd) - return - - sys.stdout.flush() - o = subprocess.Popen(cmd, stdout=subprocess.PIPE, - close_fds=True).stdout - lines = [] - for line in o.readlines(): - if re.match(line_mask, line): - line = line.replace('\t\t', ' ' * 6) - lines.append(line) - - - # if present, add PGP signature into the <verification> block - if os.path.exists(src + '.asc'): - sig = open(src + '.asc').read() - sig = ' <signature type="pgp" file="%s.asc">\n' % os.path.basename(src) + \ - sig + \ - '\n </signature>\n' - - lines.insert(1, sig) - - d = open(dst, 'wb') - d.write(''.join(lines)) - d.close() - - if opts.copy_permissions: - os.chmod(dst, src_statinfo.st_mode) - else: - os.chmod(dst, 0644) +class Hasheable: + """represent a file and its metadata""" + def __init__(self, basename, src_dir=None, dst_dir=None): + self.basename = basename + if src_dir: + self.src_dir = src_dir + else: + self.src_dir = os.path.dirname(self.basename) + + self.src = os.path.join(src_dir, self.basename) + + self.finfo = os.lstat(self.src) + self.mtime = self.finfo.st_mtime + self.size = self.finfo.st_size + self.inode = self.finfo.st_ino + self.mode = self.finfo.st_mode + + self.dst_dir = dst_dir + + self.dst_basename = '%s.inode_%s' % (self.basename, self.inode) + self.dst = os.path.join(self.dst_dir, self.dst_basename) + + def islink(self): + return stat.S_ISLNK(self.mode) + def isreg(self): + return stat.S_ISREG(self.mode) + def isdir(self): + return stat.S_ISDIR(self.mode) + + def do_hashes(self, verbose=False, dry_run=False, copy_permissions=True): + try: + dst_statinfo = os.stat(self.dst) + dst_mtime = dst_statinfo.st_mtime + dst_size = dst_statinfo.st_size + except OSError: + dst_mtime = dst_size = 0 # file missing + + if dst_mtime >= self.mtime and dst_size != 0: + if verbose: + print 'Up to date: %r' % self.dst + return + + cmd = [ 'metalink', + '--nomirrors', + '-d', 'md5', + '-d', 'sha1', + '-d', 'sha256', + '-d', 'sha1pieces', + self.src ] + + if dry_run: + print 'Would run: ', ' '.join(cmd) + return + + sys.stdout.flush() + o = subprocess.Popen(cmd, stdout=subprocess.PIPE, + close_fds=True).stdout + lines = [] + for line in o.readlines(): + if re.match(line_mask, line): + line = line.replace('\t\t', ' ' * 6) + lines.append(line) + + + # if present, add PGP signature into the <verification> block + if os.path.exists(self.src + '.asc'): + sig = open(self.src + '.asc').read() + sig = ' <signature type="pgp" file="%s.asc">\n' % self.basename + \ + sig + \ + '\n </signature>\n' + + lines.insert(1, sig) + + d = open(self.dst, 'wb') + d.write(''.join(lines)) + d.close() + + if copy_permissions: + os.chmod(self.dst, self.mode) + else: + os.chmod(self.dst, 0644) + + #def __eq__(self, other): + # return self.basename == other.basename + #def __eq__(self, basename): + # return self.basename == basename + + def __str__(self): + return self.basename + class Metalinks(cmdln.Cmdln): @@ -190,8 +226,6 @@ else: os.chmod(dst_dir, 0755) - src_names = set(os.listdir(src_dir)) - #print 'doing', src_dir try: dst_names = os.listdir(dst_dir) dst_names.sort() @@ -201,75 +235,85 @@ 'You might want to create it:\n' ' mkdir %s' % (dst_dir, dst_dir)) - for i in dst_names: - i_path = os.path.join(dst_dir, i) - # removal of obsolete files - if i.endswith(ML_EXTENSION): - realname = i[:-len(ML_EXTENSION)] - if (realname not in src_names) \ - or (opts.ignore_mask and re.match(opts.ignore_mask, i_path)): - print 'Unlinking obsolete %r' % i_path - if not opts.dry_run: - try: - os.unlink(i_path) - except OSError, e: - sys.stderr.write('Unlink failed for %r: %s\n' \ - % (i_path, os.strerror(e.errno))) - unlinked_files += 1 - # removal of obsolete directories - else: - if i not in src_names or (opts.ignore_mask and re.match(opts.ignore_mask, i_path)): - if os.path.isdir(i_path): - print 'Recursively removing obsolete directory %r' % i_path - if not opts.dry_run: - try: - shutil.rmtree(i_path) - except OSError, e: - if e.errno == errno.EACCES: - sys.stderr.write('Recursive removing failed for %r (%s). Ignoring.\n' \ - % (i_path, os.strerror(e.errno))) - else: - sys.exit('Recursive removing failed for %r: %s\n' \ - % (i_path, os.strerror(e.errno))) - unlinked_dirs += 1 - else: - print 'Unlinking obsolete %r' % i_path - if not opts.dry_run: - os.unlink(i_path) - unlinked_files += 1 - - for src_name in sorted(src_names): - - src = os.path.join(src_dir, src_name) + + # a set offers the fastest access for "foo in ..." lookups + src_basenames = set(os.listdir(src_dir)) + #print 'doing', src_dir + + dst_keep = set() + + for src_basename in sorted(src_basenames): + src = os.path.join(src_dir, src_basename) if opts.ignore_mask and re.match(opts.ignore_mask, src): continue # stat only once try: - src_statinfo = os.lstat(src) + hasheable = Hasheable(src_basename, src_dir=src_dir, dst_dir=dst_dir) except OSError, e: if e.errno == errno.ENOENT: sys.stderr.write('File vanished: %r\n' % src) continue - if stat.S_ISLNK(src_statinfo.st_mode): - #print 'ignoring link', src + if hasheable.islink(): + print 'ignoring link', src continue - if stat.S_ISREG(src_statinfo.st_mode): - if not opts.file_mask or re.match(opts.file_mask, src_name): - - dst_name = src[len(opts.base_dir):].lstrip('/') - dst = os.path.join(opts.target_dir, dst_name) + elif hasheable.isreg(): + if not opts.file_mask or re.match(opts.file_mask, src_basename): #if opts.verbose: # print 'dst:', dst - - make_hashes(src, src_statinfo, dst + ML_EXTENSION, opts=opts) - - - elif stat.S_ISDIR(src_statinfo.st_mode): + hasheable.do_hashes(verbose=opts.verbose, + dry_run=opts.dry_run, + copy_permissions=opts.copy_permissions) + dst_keep.add(hasheable.dst_basename) + + elif hasheable.isdir(): directories_todo.append(src) # It's a directory, store it. + dst_keep.add(hasheable.basename) + + + dst_remove = set(dst_names) - dst_keep + + # print 'files to keep:' + # print dst_keep + # print + # print 'files to remove:' + # print dst_remove + # print + + for i in sorted(dst_remove): + i_path = os.path.join(dst_dir, i) + #print i_path + + if (opts.ignore_mask and re.match(opts.ignore_mask, i_path)): + print 'ignoring, not removing %s', i_path + continue + + if os.path.isdir(i_path): + print 'Recursively removing obsolete directory %r' % i_path + if not opts.dry_run: + try: + shutil.rmtree(i_path) + except OSError, e: + if e.errno == errno.EACCES: + sys.stderr.write('Recursive removing failed for %r (%s). Ignoring.\n' \ + % (i_path, os.strerror(e.errno))) + else: + sys.exit('Recursive removing failed for %r: %s\n' \ + % (i_path, os.strerror(e.errno))) + unlinked_dirs += 1 + + else: + print 'Unlinking obsolete %r' % i_path + if not opts.dry_run: + try: + os.unlink(i_path) + except OSError, e: + sys.stderr.write('Unlink failed for %r: %s\n' \ + % (i_path, os.strerror(e.errno))) + unlinked_files += 1 if unlinked_files or unlinked_dirs: _______________________________________________ mirrorbrain-commits mailing list Archive: http://mirrorbrain.org/archive/mirrorbrain-commits/ Note: To remove yourself from this list, send a mail with the content unsubscribe to the address mirrorbrain-commits-request_at_mirrorbrain.orgReceived on Fri Sep 04 2009 - 14:27:37 GMT
This archive was generated by hypermail 2.2.0 : Fri Sep 04 2009 - 14:45:15 GMT