Author: poeml Date: Mon Nov 23 15:38:17 2009 New Revision: 7862 URL: http://svn.mirrorbrain.org/viewvc/mirrorbrain?rev=7862&view=rev Log: null-rsync: - special rsync wrapper which creates a local file tree from a mirror, where all files contain only zeroes instead of real data. The files are created as sparse files, so only the metadata occupies actual space in the filesystem. Modification times and sizes are fully copied, so that even (native) rsync thinks that the file tree is identical. Added: trunk/tools/null-rsync (with props) Added: trunk/tools/null-rsync URL: http://svn.mirrorbrain.org/viewvc/mirrorbrain/trunk/tools/null-rsync?rev=7862&view=auto ============================================================================== --- trunk/tools/null-rsync (added) +++ trunk/tools/null-rsync Mon Nov 23 15:38:17 2009 _at_@ -1,0 +1,225 @@ +#!/usr/bin/python + +""" +Create a local file tree as copy from a remote server via rsync. + + + Copyright 2009 Peter Poeml + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + + +To get the file metadata over rsync, rsync's own itemized output is used for +that purpose: + +del. rw-r--r-- *deleting ultimate-edition-2.4-x86.iso 0 1970/01/01-01:00:00 +del. rw-r--r-- *deleting ultimate-edition-1.9-x86.iso 0 1970/01/01-01:00:00 +del. rw-r--r-- *deleting ultimate-edition-1.9-x64.iso 0 1970/01/01-01:00:00 +del. rw-r--r-- *deleting nothere 0 1970/01/01-01:00:00 +recv rwxr-xr-x .d..t...... ./ 4096 2009/11/06-00:46:02 +recv rw-r--r-- >f.st...... .htaccess 288 2009/11/05-23:56:46 +recv rwxr-xr-x cd+++++++++ firefox/ 4096 2005/09/06-22:00:35 +recv rwxr-sr-x cd+++++++++ firefox/releases/ 4096 2009/11/17-22:10:23 +recv rwxr-xr-x cd+++++++++ firefox/releases/3.0.15/ 4096 2009/10/26-19:23:12 +recv rw-r--r-- >f+++++++++ firefox/releases/3.0.15/KEY 3818 2009/10/19-18:17:29 +recv rw-r--r-- >f+++++++++ firefox/releases/3.0.15/MD5SUMS 54256 2009/10/26-19:21:21 + + + +All files are created as sparse files, so they don't take actual space in the +filesystem (besides metadata). + +We copy all permissions and timestamps where possible. + +Timestamps on symlinks can't be set by Python (its os.utime() implementation +always follows to the target). (It *would* work if Python would export +utimensat() with AT_SYMLINK_NOFOLLOW.) But doesnt' matter. + + +""" + +import sys +import os +import subprocess +import time +import stat + +verbose = True + +def perms_to_mode(p): + + m = 0 + + if p[0] == 'r': m |= stat.S_IRUSR + if p[1] == 'w': m |= stat.S_IWUSR + if p[2] == 'x': m |= stat.S_IXUSR + + if p[3] == 'r': m |= stat.S_IRGRP + if p[4] == 'w': m |= stat.S_IWGRP + if p[5] == 'x': m |= stat.S_IXGRP + elif p[5] == 'S': m |= stat.S_ISGID + elif p[5] == 's': m |= stat.S_ISGID | stat.S_IXGRP + + if p[6] == 'r': m |= stat.S_IROTH + if p[7] == 'w': m |= stat.S_IWOTH + if p[8] == 'x': m |= stat.S_IXOTH + + + # for security reasons, we probably don't want these: + # stat.S_ISUID + # stat.S_ISVTX + + return m + + +rsync_src = sys.argv[1] +rsync_dst = sys.argv[2] + +cmd = [ 'rsync', + '--no-motd', + # not -a because we don't want --devices --specials --owner --group + '-rlpt', + # upstream may have world-writable files/directories, but that doesn't mean + # that we want that locally + '--chmod=o-w', + '--out-format=%o %B %i %M %l %n%L', + '--delete', + '-n', + rsync_src, + rsync_dst ] + + +if not os.path.exists(rsync_dst): + os.mkdir(rsync_dst) +# remember directories to set mtime afterwards +mtime_dir_list = [] + +o = subprocess.Popen(cmd, stdout=subprocess.PIPE, + close_fds=True).stdout + +for line in o.readlines(): + + if verbose: + print line.rstrip() + + words = line.strip().split(None, 5) + # ['recv', 'rwxrwxrwx', 'cL+++++++++', '2009/05/31-03:31:46', '25', 'repo/1.0/11.1/suse/x86_64/yaz.rpm -> yaz-3.0.34-1.8.x86_64.rpm'] + + action = words[0] + perms = words[1] + attrs = words[2] + mtime_rsync = words[3] + size = int(words[4]) + name = words[5] + + if attrs[1] == 'L': + # symlink + name, name_to = name.split(' -> ') + + # for safety + path = os.path.join(rsync_dst, name) + canonical_path = os.path.realpath(path) + if not canonical_path.startswith(rsync_dst): + sys.exit("canonical path (%r) doesn't start with the rsync destination dir (%r)") + path = canonical_path.rstrip('/') + + if action == 'del.': + if name.endswith('/'): + #print 'unlinking directory', name + os.rmdir(os.path.join(rsync_dst, name)) + else: + #print 'unlinking file', name + os.unlink(os.path.join(rsync_dst, name)) + + elif action == 'recv': + + if attrs[1] == 'd': + if name == './': + # top-level dir + # recv rwxr-xr-x .d..t...... 2009/11/06-00:46:02 4096 ./ + #print 'ignoring top-level dir' + mtime_dir_list.append((path, mtime_rsync)) + + if attrs[0] == 'c': + # recv rwxr-xr-x cd+++++++++ 2005/09/06-22:00:35 4096 firefox/ + #print 'creating directory %r' % path + os.mkdir(path) + mtime_dir_list.append((path, mtime_rsync)) + + elif attrs[0] == '.': + pass + + else: + sys.exit('don\'t know how to handle this line: %r' % words) + + + elif attrs == 'cL+++++++++': + #print 'creating symlink from %s to %s' % (name, name_to) + os.symlink(name_to, path) + + + if attrs.startswith('>f') and attrs[3] in ['s', '+']: + # transfer a file + fd = open(path, 'w') + # writing info wastes massive space already; the mozilla file tree took + # 254 MB instead of 19 MB (real size: 25 G) + #info = 'This is only a pseudo file, containing nothing than zeros. ' + \ + # 'Same length as the original file.' + #if size > len(info): + # fd.write(info) + if size == 0: + fd.truncate() + else: + fd.seek(size - 1) + fd.write('\0') + fd.close() + + if attrs[5] in ['p', '+']: + if attrs[1] == 'L': + # not relevant for symlinks + pass + else: + #print '%s: setting permissions' % path + os.chmod(path, perms_to_mode(perms)) + + if attrs[4] in ['t', '+'] or attrs[3] in ['s', '+']: + if attrs[1] == 'L': + # not doable jor symlinks + # it *would* work if Python would export utimensat() with + # AT_SYMLINK_NOFOLLOW + pass + else: + t = time.strptime(mtime_rsync, '%Y/%m/%d-%H:%M:%S') + mtime = int(time.mktime(t)) + #print '%s: setting mtime (%s)' % (path, mtime_rsync) + os.utime(path, (mtime, mtime)) + + + else: + sys.exit('unknown action %r (line was: %r)' % (action, line)) + +while len(mtime_dir_list) > 0: + path, mtime_rsync = mtime_dir_list.pop() + print 'delayed setting of mtime on %r' % path + + t = time.strptime(mtime_rsync, '%Y/%m/%d-%H:%M:%S') + mtime = int(time.mktime(t)) + os.utime(path, (mtime, mtime)) + + + +if verbose: + print 'rsync command for validation:' + print 'rsync --no-motd -rlpt --chmod=o-w %s %s -i -n' % (rsync_src, rsync_dst) + _______________________________________________ mirrorbrain-commits mailing list Archive: http://mirrorbrain.org/archive/mirrorbrain-commits/ Note: To remove yourself from this list, send a mail with the content unsubscribe to the address mirrorbrain-commits-request_at_mirrorbrain.orgReceived on Mon Nov 23 2009 - 14:38:18 GMT
This archive was generated by hypermail 2.3.0 : Mon Feb 20 2012 - 23:47:04 GMT