[mirrorbrain-commits] [mod_stats] r26 - in /trunk/tools: dlcount.py ooo.conf

From: <poeml_at_mirrorbrain.org>
Date: Wed, 25 Nov 2009 16:18:06 -0000
Author: poeml
Date: Wed Nov 25 17:18:05 2009
New Revision: 26

URL: http://svn.mirrorbrain.org/viewvc/mod_stats?rev=26&view=rev
Log:
the StatsPrefilter directive is parsed now, and prefiltering is applied
according to the config.

Modified:
    trunk/tools/dlcount.py
    trunk/tools/ooo.conf

Modified: trunk/tools/dlcount.py
URL: http://svn.mirrorbrain.org/viewvc/mod_stats/trunk/tools/dlcount.py?rev=26&r1=25&r2=26&view=diff
==============================================================================
--- trunk/tools/dlcount.py (original)
+++ trunk/tools/dlcount.py Wed Nov 25 17:18:05 2009
@@ -65,7 +65,9 @@
 __url__='http://mirrorbrain.org/'
 
 
+import sys
 import re
+import hashlib
 
 try:
     set
@@ -163,32 +165,47 @@
     known_directives = ['StatsDupWindow', 'StatsIgnoreIP', 'StatsPreFilter', 'StatsCount', 'StatsPostFilter']
     known_directives_lower = [ i.lower() for i in known_directives ]
 
-    # dictionary to hold the config
-    # each item is a list
-    cf = {}
+    # create a dictionary to hold the config
+    # each item is a list (because the directives could occur more than once)
+    # each list item will correspond to one directive occurrence
+    conf = {}
     for i in known_directives_lower:
-        cf[i] = list()
+        conf[i] = list()
 
     for line in open(filename):
+        # remove trailing and leading whitespace and newlines
         line = line.strip()
+        # ignore comment lines
         if line.startswith('#'):
             continue
 
-        d = line.split(None, 1)
-        if not len(d):
-            continue
-        if d[0].lower() not in known_directives_lower:
-            print 'not found:', d[0]
-            continue
-        d, val = d
-        d = d.lower()
-
-        print d, val
-        cf[d].append(val)
-
-    cf['statsdupwindow'] = int(cf['statsdupwindow'][0])
-
-    return cf
+        # split line into 1st word plus rest
+        # will fail if it's not a valid config line
+        try:
+            word, rest = line.split(None, 1)
+        except ValueError:
+            continue
+        if word.lower() not in known_directives_lower:
+            sys.exit('unknown config directive: %r' % word)
+            continue
+        d = word.lower()
+        val = rest
+
+        print word, val
+        conf[d].append(val)
+
+
+    parse_2_in_quotes = re.compile(r'"(.*)"\s+"(.*)"')
+    for i, item in enumerate(conf['statsprefilter']):
+        match = parse_2_in_quotes.match(item)
+        #print 'substitute %s by %s' % (match.group(1), match.group(2))
+        conf['statsprefilter'][i] = (re.compile(match.group(1)), match.group(2), match.group(1))
+        #print conf['statsprefilter'][i]
+
+    # this is jut a single integer
+    conf['statsdupwindow'] = int(conf['statsdupwindow'][0])
+
+    return conf
     
 
 
@@ -197,9 +214,6 @@
     Create a generator pipeline for the matching log file lines
     and process them.
     """
-    import re
-    import sys
-    import hashlib
 
     if not len(sys.argv[2:]):
         sys.exit('Usage: dlcount CONFIGFILE LOGFILE [LOGFILE ...]')
@@ -211,10 +225,8 @@
 
 
 
-    matchlist = [ 
-        # FIXME: grab list of regexp from config
-
-    ]
+    # FIXME: grab list of regexp from config
+    matchlist = []
     re_matchlist = []
     for match, sub in matchlist:
         re_matchlist.append((re.compile(match), sub, match))
@@ -259,6 +271,8 @@
 
         # apply prefiltering
         # FIXME
+        for m, s, mreg in conf['statsprefilter']:
+            url = m.sub(s, url)
 
         print '%-80s ' % url, 
 

Modified: trunk/tools/ooo.conf
URL: http://svn.mirrorbrain.org/viewvc/mod_stats/trunk/tools/ooo.conf?rev=26&r1=25&r2=26&view=diff
==============================================================================
--- trunk/tools/ooo.conf (original)
+++ trunk/tools/ooo.conf Wed Nov 25 17:18:05 2009
@@ -43,10 +43,8 @@
 # extended/3.1.1rc2/OOo_3.1.1rc2_20090820_Win32Intel_langpack_en-ZA.exe      -
 # extended/3.1.1rc2/OOo_3.1.1rc2_20090820_LinuxIntel_langpack_brx_deb.tar.gz
 # extended/developer/DEV300_m65/OOo-Dev-SDK_DEV300_m65_Win32Intel_install_en-US.exe
-StatsCount "^(?:stable|extended)/(?:developer/)?([^/]+)/(OOo|OOo-SDK|OOo-Dev|OOo-Dev-SDK)_(?P<realversion>[^_]+(?:_[0-9]+)?)_(.+)_(?P<lang>([a-zA-Z]{2}(-[a-zA-Z]{2})?|binfilter|core|l10n|extensions|system|testautomation|brx|dgo|kok|mai|mni|sat))(_deb|_rpm)?\.(exe|dmg|sh|tar\.gz|tar\.bz2)$" \
-           "prod: \2  os: \4  version: \1  realversion: \g<realversion>  lang: \g<lang>"
-StatsCount "^(?:stable|extended)/(?:developer/)?([^/]+)/(OOo|OOo-SDK|OOo-Dev|OOo-Dev-SDK)_\1_(.+)_(?P<lang>([a-zA-Z]{2}(-[a-zA-Z]{2})?|binfilter|core|l10n|extensions|system|testautomation|brx|dgo|kok|mai|mni|sat))(_deb|_rpm)?\.(exe|dmg|sh|tar\.gz|tar\.bz2)$" \
-           "prod: \2  os: \3  version: \1  lang: \g<lang>"
+StatsCount "^(?:stable|extended)/(?:developer/)?([^/]+)/(OOo|OOo-SDK|OOo-Dev|OOo-Dev-SDK)_(?P<realversion>[^_]+(?:_[0-9]+)?)_(.+)_(?P<lang>([a-zA-Z]{2}(-[a-zA-Z]{2})?|binfilter|core|l10n|extensions|system|testautomation|brx|dgo|kok|mai|mni|sat))(_deb|_rpm)?\.(exe|dmg|sh|tar\.gz|tar\.bz2)$"  "prod: \2  os: \4  version: \1  realversion: \g<realversion>  lang: \g<lang>"
+StatsCount "^(?:stable|extended)/(?:developer/)?([^/]+)/(OOo|OOo-SDK|OOo-Dev|OOo-Dev-SDK)_\1_(.+)_(?P<lang>([a-zA-Z]{2}(-[a-zA-Z]{2})?|binfilter|core|l10n|extensions|system|testautomation|brx|dgo|kok|mai|mni|sat))(_deb|_rpm)?\.(exe|dmg|sh|tar\.gz|tar\.bz2)$"  "prod: \2  os: \3  version: \1  lang: \g<lang>"
 
 
 # extended/3.1.1rc2/OOo_3.1.1rc2_20090820_LinuxX86-64_langpack_zh-CN.tar.gz




_______________________________________________
mirrorbrain-commits mailing list
Archive: http://mirrorbrain.org/archive/mirrorbrain-commits/

Note: To remove yourself from this list, send a mail with the content
 	unsubscribe
to the address mirrorbrain-commits-request_at_mirrorbrain.org
Received on Wed Nov 25 2009 - 16:18:08 GMT

This archive was generated by hypermail 2.2.0 : Wed Nov 25 2009 - 16:45:12 GMT