[mirrorbrain-commits] r8022 - /trunk/mirrorprobe/mirrorprobe.py

From: <poeml_at_mirrorbrain.org>
Date: Sat, 27 Mar 2010 15:19:14 -0000
Author: poeml
Date: Sat Mar 27 16:19:13 2010
New Revision: 8022

URL: http://svn.mirrorbrain.org/viewvc/mirrorbrain?rev=8022&view=rev
Log:
mirrorprobe:
- A hard-to-catch exception is now handled. If Python's socket module ran into
  a timeout while reading a chunked response, the exception would not be passed
  correctly to the upper layer, so it could not be caught by its name. We now
  wrap the entire thread into another exception, which would otherwise be bad
  practice, but is probably okay here, since we already catch all other exceptions.
  This should fix issue #46.

Modified:
    trunk/mirrorprobe/mirrorprobe.py

Modified: trunk/mirrorprobe/mirrorprobe.py
URL: http://svn.mirrorbrain.org/viewvc/mirrorbrain/trunk/mirrorprobe/mirrorprobe.py?rev=8022&r1=8021&r2=8022&view=diff
==============================================================================
--- trunk/mirrorprobe/mirrorprobe.py (original)
+++ trunk/mirrorprobe/mirrorprobe.py Sat Mar 27 16:19:13 2010
_at_@ -40,81 +40,88 @@
     """Try to reach host at baseurl. 
     Set status_baseurl_new."""
 
-    logging.debug("%s probing %s" % (threading.currentThread().getName(), mirror.identifier))
-
-    #req = urllib2.Request('http://old-cherry.suse.de') # never works
-    #req = urllib2.Request('http://doozer.poeml.de/')   # always works
-    req = urllib2.Request(mirror.baseurl)
-
-    req.add_header('User-Agent', USER_AGENT)
-    #req.get_method = lambda: "HEAD"
-
-    mirror.status_baseurl_new = False
-    mirror.timed_out = True
-    mirror.response_code = None
-    mirror.response = None
-
-    if mirror.baseurl == '':
-        return None
-
     try:
-        response = urllib2.urlopen(req)
-
-        try:
-            mirror.response_code = response.code
-            # if the web server redirects to an ftp:// URL, our response won't have a code attribute
-            # (except we are going via a proxy)
-        except AttributeError:
-            if response.url.startswith('ftp://'):
-                # count as success
-                mirror.response_code = 200
-            logging.debug('mirror %s redirects to ftp:// URL' % mirror.identifier)
-
-        logging.debug('%s got response for %s: %s' % (threading.currentThread().getName(), mirror.identifier, getattr(response, 'code', None)))
-
-        mirror.response = response.read()
-        mirror.status_baseurl_new = True
-
-
-    except ValueError, e:
-        if str(e).startswith('invalid literal for int()'):
-            mirror.response = 'response not read due to http://bugs.python.org/issue1205'
-            logging.info('mirror %s sends broken chunked reply, see http://bugs.python.org/issue1205' % mirror.identifier)
-
-    except socket.timeout, e:
-        mirror.response = 'socket timeout in reading response: %s' % e
-
-    except socket.error, e:
-        #errno, errstr = sys.exc_info()[:2]
-        mirror.response = "socket error: %s" % e
-
-    except httplib.BadStatusLine:
+
+        logging.debug("%s probing %s" % (threading.currentThread().getName(), mirror.identifier))
+
+        #req = urllib2.Request('http://old-cherry.suse.de') # never works
+        #req = urllib2.Request('http://doozer.poeml.de/')   # always works
+        req = urllib2.Request(mirror.baseurl)
+
+        req.add_header('User-Agent', USER_AGENT)
+        #req.get_method = lambda: "HEAD"
+
+        mirror.status_baseurl_new = False
+        mirror.timed_out = True
         mirror.response_code = None
         mirror.response = None
-        
-    except urllib2.HTTPError, e:
-        mirror.response_code = e.code
-        mirror.response = e.read()
-
-    except urllib2.URLError, e:
-        mirror.response_code = 0
-        mirror.response = "%s" % e.reason
-
-    except IOError, e:
-        # IOError: [Errno ftp error] (111, 'Connection refused')
-        if e.errno == 'ftp error':
+
+        if mirror.baseurl == '':
+            return None
+
+        try:
+            response = urllib2.urlopen(req)
+
+            try:
+                mirror.response_code = response.code
+                # if the web server redirects to an ftp:// URL, our response won't have a code attribute
+                # (except we are going via a proxy)
+            except AttributeError:
+                if response.url.startswith('ftp://'):
+                    # count as success
+                    mirror.response_code = 200
+                logging.debug('mirror %s redirects to ftp:// URL' % mirror.identifier)
+
+            logging.debug('%s got response for %s: %s' % (threading.currentThread().getName(), mirror.identifier, getattr(response, 'code', None)))
+
+            mirror.response = response.read()
+            mirror.status_baseurl_new = True
+
+
+        except ValueError, e:
+            if str(e).startswith('invalid literal for int()'):
+                mirror.response = 'response not read due to http://bugs.python.org/issue1205'
+                logging.info('mirror %s sends broken chunked reply, see http://bugs.python.org/issue1205' % mirror.identifier)
+
+        except socket.timeout, e:
+            mirror.response = 'socket timeout in reading response: %s' % e
+
+        except socket.error, e:
+            #errno, errstr = sys.exc_info()[:2]
+            mirror.response = "socket error: %s" % e
+
+        except httplib.BadStatusLine:
+            mirror.response_code = None
+            mirror.response = None
+            
+        except urllib2.HTTPError, e:
+            mirror.response_code = e.code
+            mirror.response = e.read()
+
+        except urllib2.URLError, e:
             mirror.response_code = 0
-            mirror.response = "%s: %s" % (e.errno, e.strerror)
-        else:
-            print mirror.identifier, mirror.baseurl, 'errno:', e.errno
+            mirror.response = "%s" % e.reason
+
+        except IOError, e:
+            # IOError: [Errno ftp error] (111, 'Connection refused')
+            if e.errno == 'ftp error':
+                mirror.response_code = 0
+                mirror.response = "%s: %s" % (e.errno, e.strerror)
+            else:
+                print mirror.identifier, mirror.baseurl, 'errno:', e.errno
+                raise
+
+        except:
+            print mirror.identifier, mirror.baseurl
             raise
 
     except:
-        print mirror.identifier, mirror.baseurl
-        raise
+        mirror.response_code = None
+        mirror.response = 'unknown error'
 
     # not reached, if the timeout goes off
     mirror.timed_out = False
+
 
 
 def main():




_______________________________________________
mirrorbrain-commits mailing list
Archive: http://mirrorbrain.org/archive/mirrorbrain-commits/

Note: To remove yourself from this list, send a mail with the content
 	unsubscribe
to the address mirrorbrain-commits-request_at_mirrorbrain.org
Received on Sat Mar 27 2010 - 15:19:15 GMT

This archive was generated by hypermail 2.3.0 : Mon Feb 20 2012 - 23:47:04 GMT