[mirrorbrain-commits] r8078 - in /trunk/mb: mb.py mb/conf.py mb/hashes.py

From: <poeml_at_mirrorbrain.org>
Date: Mon, 30 Aug 2010 17:47:27 -0000
Author: poeml
Date: Mon Aug 30 19:47:27 2010
New Revision: 8078

URL: http://svn.mirrorbrain.org/viewvc/mirrorbrain?rev=8078&view=rev
Log:
mb makehashes:
- make the creation of chunked hashes configurable (for metalinks and
  torrents), by introducing new config parameter for /etc/mirrorbrain.conf
  (default: chunked_hashes = 1)
- make the chunk size configurable (default: chunk_size = 262144)

Modified:
    trunk/mb/mb.py
    trunk/mb/mb/conf.py
    trunk/mb/mb/hashes.py

Modified: trunk/mb/mb.py
URL: http://svn.mirrorbrain.org/viewvc/mirrorbrain/trunk/mb/mb.py?rev=8078&r1=8077&r2=8078&view=diff
==============================================================================
--- trunk/mb/mb.py (original)
+++ trunk/mb/mb.py Mon Aug 30 19:47:27 2010
_at_@ -1039,6 +1039,8 @@
                                                     dst_dir=dst_dir,
                                                     base_dir=opts.base_dir,
                                                     do_zsync_hashes=self.config.dbconfig.get('zsync_hashes'),
+                                                    do_chunked_hashes=self.config.dbconfig.get('chunked_hashes'),
+                                                    chunk_size=self.config.dbconfig.get('chunk_size'))
                 except OSError, e:
                     if e.errno == errno.ENOENT:
                         sys.stderr.write('File vanished: %r\n' % src)

Modified: trunk/mb/mb/conf.py
URL: http://svn.mirrorbrain.org/viewvc/mirrorbrain/trunk/mb/mb/conf.py?rev=8078&r1=8077&r2=8078&view=diff
==============================================================================
--- trunk/mb/mb/conf.py (original)
+++ trunk/mb/mb/conf.py Mon Aug 30 19:47:27 2010
_at_@ -6,7 +6,11 @@
 import mb.mberr
 
 
-boolean_opts = ['zsync_hashes']
+boolean_opts = [ 'zsync_hashes', 'chunked_hashes' ]
+
+DEFAULTS = { 'zsync_hashes': False,
+             'chunked_hashes': True,
+             'chunk_size': 262144 }
 
 class Config:
     """this class sets up a number dictionaries that contain configuration 
_at_@ -68,6 +72,18 @@
                     raise mb.mberr.ConfigError('cannot parse setting in [%s] section: %r' % (i, b + str(e)), conffile)
                 except ConfigParser.NoOptionError, e:
                     pass
+            # set default values where the config didn't define anything
+            for d in DEFAULTS:
+                try: 
+                    self.general[i][d]
+                except:
+                    self.general[i][d] = DEFAULTS[d]
+
+            self.general[i]['chunk_size'] = int(self.general[i]['chunk_size'])
+            if self.general[i]['zsync_hashes']:
+                # must be a multiple of 2048 and 4096 for zsync checksumming
+                assert self.general[i]['chunk_size'] % 4096 == 0
+
 
 
 

Modified: trunk/mb/mb/hashes.py
URL: http://svn.mirrorbrain.org/viewvc/mirrorbrain/trunk/mb/mb/hashes.py?rev=8078&r1=8077&r2=8078&view=diff
==============================================================================
--- trunk/mb/mb/hashes.py (original)
+++ trunk/mb/mb/hashes.py Mon Aug 30 19:47:27 2010
_at_@ -24,14 +24,12 @@
 PIECESIZE = 262144
 SHA1_DIGESTSIZE = 20
 
-# must be a multiple of 2048 and 4096 for zsync checksumming
-assert PIECESIZE % 4096 == 0
-
 
 class Hasheable:
     """represent a file and its metadata"""
     def __init__(self, basename, src_dir=None, dst_dir=None,
-                 base_dir=None, do_zsync_hashes=False):
+                 base_dir=None, do_zsync_hashes=False,
+                 do_chunked_hashes=True, chunk_size=PIECESIZE):
         self.basename = basename
         if src_dir:
             self.src_dir = src_dir
_at_@ -56,6 +54,8 @@
 
         self.hb = HashBag(src=self.src, parent=self)
         self.hb.do_zsync_hashes = do_zsync_hashes
+        self.hb.do_chunked_hashes = do_chunked_hashes
+        self.hb.chunk_size = chunk_size
 
     def islink(self):
         return stat.S_ISLNK(self.mode)
_at_@ -162,7 +162,7 @@
                        self.hb.md5hex or '',
                        self.hb.sha1hex or '',
                        self.hb.sha256hex or '',
-                       PIECESIZE,
+                       self.hb.chunk_size,
                        ''.join(self.hb.pieceshex),
                        self.hb.btihhex or '',
                        self.hb.pgp or '',
_at_@ -201,7 +201,7 @@
                          WHERE file_id = %s""",
                       [int(self.mtime), self.size,
                        self.hb.md5hex or '', self.hb.sha1hex or '', self.hb.sha256hex or '',
-                       PIECESIZE, ''.join(self.hb.pieceshex),
+                       self.hb.chunk_size, ''.join(self.hb.pieceshex),
                        self.hb.btihhex or '',
                        self.hb.pgp or '', 
                        self.hb.zblocksize,
_at_@ -272,10 +272,10 @@
             return None
 
         while 1 + 1 == 2:
-            buf = f.read(PIECESIZE)
+            buf = f.read(self.chunk_size)
             if not buf: break
 
-            if len(buf) != PIECESIZE:
+            if len(buf) != self.chunk_size:
                 if not short_read_before:
                     short_read_before = True
                 else:
_at_@ -287,10 +287,12 @@
                 s256.update(buf)
 
             self.npieces += 1
-            self.pieces.append(sha1.sha1(buf).digest())
-            self.pieceshex.append(sha1.sha1(buf).hexdigest())
-
-            self.zs_get_block_sums(buf)
+            if self.do_chunked_hashes:
+                self.pieces.append(sha1.sha1(buf).digest())
+                self.pieceshex.append(sha1.sha1(buf).hexdigest())
+
+            if self.do_zsync_hashes:
+                self.zs_get_block_sums(buf)
 
         f.close()
 
_at_@ -302,7 +304,8 @@
             self.sha256 = s256.digest()
             self.sha256hex = s256.hexdigest()
 
-        self.calc_btih()
+        if self.do_chunked_hashes:
+            self.calc_btih()
 
         # if present, grab PGP signature
         if os.path.exists(self.src + '.asc'):
_at_@ -360,14 +363,15 @@
             r.append(self.pgp)
             r.append('        </signature>')
 
-        r.append('        <pieces length="%s" type="sha1">' % (PIECESIZE))
-
-        n = 0
-        for piece in self.pieceshex:
-            r.append('            <hash piece="%s">%s</hash>' % (n, piece))
-            n += 1
-
-        r.append('        </pieces>\n      </verification>\n')
+        if self.do_chunked_hashes:
+            r.append('        <pieces length="%s" type="sha1">' % (self.chunk_size))
+
+            n = 0
+            for piece in self.pieceshex:
+                r.append('            <hash piece="%s">%s</hash>' % (n, piece))
+                n += 1
+
+            r.append('        </pieces>\n      </verification>\n')
 
         return '\n'.join(r)
 
_at_@ -444,7 +448,7 @@
         buf = ['d', 
                  '6:length', 'i', str(self.h.size), 'e',
                  '4:name', str(len(self.basename)), ':', self.basename, 
-                 '12:piece length', 'i', str(PIECESIZE), 'e',
+                 '12:piece length', 'i', str(self.chunk_size), 'e',
                  '6:pieces', str(len(self.pieces) * SHA1_DIGESTSIZE), ':', ''.join(self.pieces),
                'e']
 




_______________________________________________
mirrorbrain-commits mailing list
Archive: http://mirrorbrain.org/archive/mirrorbrain-commits/

Note: To remove yourself from this list, send a mail with the content
 	unsubscribe
to the address mirrorbrain-commits-request_at_mirrorbrain.org
Received on Mon Aug 30 2010 - 17:47:28 GMT

This archive was generated by hypermail 2.3.0 : Mon Feb 20 2012 - 23:47:04 GMT