Author: poeml Date: Mon Aug 30 19:47:27 2010 New Revision: 8078 URL: http://svn.mirrorbrain.org/viewvc/mirrorbrain?rev=8078&view=rev Log: mb makehashes: - make the creation of chunked hashes configurable (for metalinks and torrents), by introducing new config parameter for /etc/mirrorbrain.conf (default: chunked_hashes = 1) - make the chunk size configurable (default: chunk_size = 262144) Modified: trunk/mb/mb.py trunk/mb/mb/conf.py trunk/mb/mb/hashes.py Modified: trunk/mb/mb.py URL: http://svn.mirrorbrain.org/viewvc/mirrorbrain/trunk/mb/mb.py?rev=8078&r1=8077&r2=8078&view=diff ============================================================================== --- trunk/mb/mb.py (original) +++ trunk/mb/mb.py Mon Aug 30 19:47:27 2010 _at_@ -1039,6 +1039,8 @@ dst_dir=dst_dir, base_dir=opts.base_dir, do_zsync_hashes=self.config.dbconfig.get('zsync_hashes'), + do_chunked_hashes=self.config.dbconfig.get('chunked_hashes'), + chunk_size=self.config.dbconfig.get('chunk_size')) except OSError, e: if e.errno == errno.ENOENT: sys.stderr.write('File vanished: %r\n' % src) Modified: trunk/mb/mb/conf.py URL: http://svn.mirrorbrain.org/viewvc/mirrorbrain/trunk/mb/mb/conf.py?rev=8078&r1=8077&r2=8078&view=diff ============================================================================== --- trunk/mb/mb/conf.py (original) +++ trunk/mb/mb/conf.py Mon Aug 30 19:47:27 2010 _at_@ -6,7 +6,11 @@ import mb.mberr -boolean_opts = ['zsync_hashes'] +boolean_opts = [ 'zsync_hashes', 'chunked_hashes' ] + +DEFAULTS = { 'zsync_hashes': False, + 'chunked_hashes': True, + 'chunk_size': 262144 } class Config: """this class sets up a number dictionaries that contain configuration _at_@ -68,6 +72,18 @@ raise mb.mberr.ConfigError('cannot parse setting in [%s] section: %r' % (i, b + str(e)), conffile) except ConfigParser.NoOptionError, e: pass + # set default values where the config didn't define anything + for d in DEFAULTS: + try: + self.general[i][d] + except: + self.general[i][d] = DEFAULTS[d] + + self.general[i]['chunk_size'] = int(self.general[i]['chunk_size']) + if self.general[i]['zsync_hashes']: + # must be a multiple of 2048 and 4096 for zsync checksumming + assert self.general[i]['chunk_size'] % 4096 == 0 + Modified: trunk/mb/mb/hashes.py URL: http://svn.mirrorbrain.org/viewvc/mirrorbrain/trunk/mb/mb/hashes.py?rev=8078&r1=8077&r2=8078&view=diff ============================================================================== --- trunk/mb/mb/hashes.py (original) +++ trunk/mb/mb/hashes.py Mon Aug 30 19:47:27 2010 _at_@ -24,14 +24,12 @@ PIECESIZE = 262144 SHA1_DIGESTSIZE = 20 -# must be a multiple of 2048 and 4096 for zsync checksumming -assert PIECESIZE % 4096 == 0 - class Hasheable: """represent a file and its metadata""" def __init__(self, basename, src_dir=None, dst_dir=None, - base_dir=None, do_zsync_hashes=False): + base_dir=None, do_zsync_hashes=False, + do_chunked_hashes=True, chunk_size=PIECESIZE): self.basename = basename if src_dir: self.src_dir = src_dir _at_@ -56,6 +54,8 @@ self.hb = HashBag(src=self.src, parent=self) self.hb.do_zsync_hashes = do_zsync_hashes + self.hb.do_chunked_hashes = do_chunked_hashes + self.hb.chunk_size = chunk_size def islink(self): return stat.S_ISLNK(self.mode) _at_@ -162,7 +162,7 @@ self.hb.md5hex or '', self.hb.sha1hex or '', self.hb.sha256hex or '', - PIECESIZE, + self.hb.chunk_size, ''.join(self.hb.pieceshex), self.hb.btihhex or '', self.hb.pgp or '', _at_@ -201,7 +201,7 @@ WHERE file_id = %s""", [int(self.mtime), self.size, self.hb.md5hex or '', self.hb.sha1hex or '', self.hb.sha256hex or '', - PIECESIZE, ''.join(self.hb.pieceshex), + self.hb.chunk_size, ''.join(self.hb.pieceshex), self.hb.btihhex or '', self.hb.pgp or '', self.hb.zblocksize, _at_@ -272,10 +272,10 @@ return None while 1 + 1 == 2: - buf = f.read(PIECESIZE) + buf = f.read(self.chunk_size) if not buf: break - if len(buf) != PIECESIZE: + if len(buf) != self.chunk_size: if not short_read_before: short_read_before = True else: _at_@ -287,10 +287,12 @@ s256.update(buf) self.npieces += 1 - self.pieces.append(sha1.sha1(buf).digest()) - self.pieceshex.append(sha1.sha1(buf).hexdigest()) - - self.zs_get_block_sums(buf) + if self.do_chunked_hashes: + self.pieces.append(sha1.sha1(buf).digest()) + self.pieceshex.append(sha1.sha1(buf).hexdigest()) + + if self.do_zsync_hashes: + self.zs_get_block_sums(buf) f.close() _at_@ -302,7 +304,8 @@ self.sha256 = s256.digest() self.sha256hex = s256.hexdigest() - self.calc_btih() + if self.do_chunked_hashes: + self.calc_btih() # if present, grab PGP signature if os.path.exists(self.src + '.asc'): _at_@ -360,14 +363,15 @@ r.append(self.pgp) r.append(' </signature>') - r.append(' <pieces length="%s" type="sha1">' % (PIECESIZE)) - - n = 0 - for piece in self.pieceshex: - r.append(' <hash piece="%s">%s</hash>' % (n, piece)) - n += 1 - - r.append(' </pieces>\n </verification>\n') + if self.do_chunked_hashes: + r.append(' <pieces length="%s" type="sha1">' % (self.chunk_size)) + + n = 0 + for piece in self.pieceshex: + r.append(' <hash piece="%s">%s</hash>' % (n, piece)) + n += 1 + + r.append(' </pieces>\n </verification>\n') return '\n'.join(r) _at_@ -444,7 +448,7 @@ buf = ['d', '6:length', 'i', str(self.h.size), 'e', '4:name', str(len(self.basename)), ':', self.basename, - '12:piece length', 'i', str(PIECESIZE), 'e', + '12:piece length', 'i', str(self.chunk_size), 'e', '6:pieces', str(len(self.pieces) * SHA1_DIGESTSIZE), ':', ''.join(self.pieces), 'e'] _______________________________________________ mirrorbrain-commits mailing list Archive: http://mirrorbrain.org/archive/mirrorbrain-commits/ Note: To remove yourself from this list, send a mail with the content unsubscribe to the address mirrorbrain-commits-request_at_mirrorbrain.orgReceived on Mon Aug 30 2010 - 17:47:28 GMT
This archive was generated by hypermail 2.3.0 : Mon Feb 20 2012 - 23:47:04 GMT