Author: poeml Date: 2009-06-22 15:57:16 -0600 (Mon, 22 Jun 2009) New Revision: 7582 Modified: trunk/tools/download-redirector-v2/mirrordoctor/mb/testmirror.py trunk/tools/download-redirector-v2/mirrordoctor/mirrordoctor.py Log: mb scan: - for subdirectory scans, check on each server beforhand whether the directory to be scanned exists. This avoids lots of ugly messages in the scanner which look much like real errors. For this to work, extend mb.testmirror to - implement rsync probing for directories - improve FTP probing - small API change: the mb.testmirror.req method now returns 0 for failed rsync scans, not 1. This is more consistent with the return codes used for HTTP/FTP. TODO: parallelize the check for directories, because that would save quite some time. mb probefile: - fix disabling the usage of proxies; the mb.testmirror.dont_use_proxies method wasn't actually called to due missing braces. Modified: trunk/tools/download-redirector-v2/mirrordoctor/mb/testmirror.py =================================================================== --- trunk/tools/download-redirector-v2/mirrordoctor/mb/testmirror.py 2009-06-22 20:43:15 UTC (rev 7581) +++ trunk/tools/download-redirector-v2/mirrordoctor/mb/testmirror.py 2009-06-22 21:57:16 UTC (rev 7582) @@ -58,8 +58,17 @@ except: return (0, digest) - return (response.code, digest) + if url.startswith('http://'): + rc = response.code + elif url.startswith('ftp://'): + out = response.readline() + if len(out): + rc = 1 + else: + rc = 0 + return (rc, digest) + elif url.startswith('rsync://'): try: @@ -67,7 +76,14 @@ # note the -r; *some* option is needed because many rsync servers # don't reply properly if they don't get any option at all. # -t (as the most harmless option) also isn't sufficient. - cmd = 'rsync -r --timeout=%d %s %s/' % (TIMEOUT, url, tmpdir) + # + # replaced -r with -d, because it allows to probe for directories + # without transferring them recursively. With 92 mirrors tested, it + # worked just as well, with a single exception. (ftp3.gwdg.de, which + # presumabely runs a really old rsync server. The system seems to be + # SuSE Linux 8.2.) + # poeml, Mon Jun 22 18:10:33 CEST 2009 + cmd = 'rsync -d --timeout=%d %s %s/' % (TIMEOUT, url, tmpdir) (rc, out) = commands.getstatusoutput(cmd) targetfile = os.path.join(tmpdir, os.path.basename(filename)) worked = os.path.exists(targetfile) @@ -79,7 +95,7 @@ shutil.rmtree(tmpdir, ignore_errors=True) if rc != 0: - return (1, digest) + return (0, digest) if worked: return (200, digest) Modified: trunk/tools/download-redirector-v2/mirrordoctor/mirrordoctor.py =================================================================== --- trunk/tools/download-redirector-v2/mirrordoctor/mirrordoctor.py 2009-06-22 20:43:15 UTC (rev 7581) +++ trunk/tools/download-redirector-v2/mirrordoctor/mirrordoctor.py 2009-06-22 21:57:16 UTC (rev 7582) @@ -376,7 +376,7 @@ import mb.testmirror import os.path - mb.testmirror.dont_use_proxies + mb.testmirror.dont_use_proxies() if opts.mirror: mirrors = [ lookup_mirror(self, opts.mirror) ] @@ -609,6 +609,10 @@ mb scan [OPTS] IDENTIFIER [IDENTIFIER...] ${cmd_option_list} """ + from sqlobject.sqlbuilder import AND + import textwrap + import mb.testmirror + mb.testmirror.dont_use_proxies() cmd = [] cmd.append(opts.scanner or '/usr/bin/scanner') @@ -627,8 +631,6 @@ cmd.append('-d %s' % opts.directory) if opts.jobs: cmd += [ '-j', opts.jobs ] - if opts.all: - cmd.append('-a') else: cmd.append('-f') @@ -639,27 +641,76 @@ cmd += [ '--exclude-rsync %s' % i for i in self.config.dbconfig.get('scan_exclude_rsync', '').split() ] + if not opts.all and not args: + sys.exit('No mirrors specified for scanning. Either give identifiers, or use -a [-j N].') + mirrors = [] - for arg in args: - mirrors.append(lookup_mirror(self, arg)) + if opts.all: + mirrors = self.conn.Server.select( + AND(self.conn.Server.q.statusBaseurl, + self.conn.Server.q.enabled)) + else: + for arg in args: + mirrors.append(lookup_mirror(self, arg)) - cmd += [ mirror.identifier for mirror in mirrors ] + mirrors_to_scan = [] + mirrors_skipped = [] + if not opts.directory: + mirrors_to_scan = [ i for i in mirrors ] + else: + print 'Checking for existance of %r directory' % opts.directory + for mirror in mirrors: + # check whether the mirror has the requested directory, and if yes, add it + # to the list of mirrors to be scanned. Try URLs in order of efficacy for scanning. + has_dir = 0 + for u in [mirror.baseurlRsync, mirror.baseurlFtp, mirror.baseurl]: + if u == None or u == '': + continue + has_dir = mb.testmirror.req(u, opts.directory)[0] + if has_dir: + if self.options.debug: + print '%s: scheduling scan.' % mirror.identifier + mirrors_to_scan.append(mirror) + break + if not has_dir: + if self.options.debug: + print '%s: directory %s not found. Skipping.' % (mirror.identifier, opts.directory) + mirrors_skipped.append(mirror.identifier) + if len(mirrors_to_scan): + print 'Scheduling scan on:' + print textwrap.fill(', '.join([ i.identifier for i in mirrors_to_scan ]), + initial_indent=' ', subsequent_indent=' ') + + + if not len(mirrors_to_scan): + print 'No mirror to scan. Exiting.' + sys.exit(0) + + cmd += [ mirror.identifier for mirror in mirrors_to_scan ] + cmd = ' '.join(cmd) if self.options.debug: print cmd + sys.stdout.flush() import os rc = os.system(cmd) if opts.enable and rc == 0: import time comment = ('*** scanned and enabled at %s.' % (time.ctime())) - for mirror in mirrors: + for mirror in mirrors_to_scan: mirror.comment = ' '.join([mirror.comment or '', '\n\n' + comment]) + sys.stdout.flush() + if opts.directory and len(mirrors_skipped): + print 'Skipped mirrors:' + print textwrap.fill(', '.join(mirrors_skipped), + initial_indent=' ', subsequent_indent=' ') + def do_score(self, subcmd, opts, *args): """${cmd_name}: show or change the score of a mirror _______________________________________________ Opensuse-svn mailing list Opensuse-svn_at_forge.novell.com http://forge.novell.com/mailman/listinfo/opensuse-svn _______________________________________________ mirrorbrain-commits mailing list Archive: http://mirrorbrain.org/archive/mirrorbrain-commits/ Note: To remove yourself from this list, send a mail with the content unsubscribe to the address mirrorbrain-commits-request_at_mirrorbrain.orgReceived on 2009-06-22Z21:58:02
This archive was generated by hypermail 2.2.0 : 2009-07-10Z19:18:13 GMT