[mirrorbrain-commits] [mod_stats] r27 - in /trunk/tools: dlcount.py ooo.conf

From: <poeml_at_mirrorbrain.org>
Date: Wed, 25 Nov 2009 16:45:10 -0000
Author: poeml
Date: Wed Nov 25 17:45:08 2009
New Revision: 27

URL: http://svn.mirrorbrain.org/viewvc/mod_stats?rev=27&view=rev
Log:
- implemented handling of backslash-quoted double quotes in regexps
- postfiltering implemented (StatsPostfilter directive)
- the count parser is implemented, too

Modified:
    trunk/tools/dlcount.py
    trunk/tools/ooo.conf

Modified: trunk/tools/dlcount.py
URL: http://svn.mirrorbrain.org/viewvc/mod_stats/trunk/tools/dlcount.py?rev=27&r1=26&r2=27&view=diff
==============================================================================
--- trunk/tools/dlcount.py (original)
+++ trunk/tools/dlcount.py Wed Nov 25 17:45:08 2009
@@ -164,6 +164,7 @@
     """we'd need Apache's config parser here..."""
     known_directives = ['StatsDupWindow', 'StatsIgnoreIP', 'StatsPreFilter', 'StatsCount', 'StatsPostFilter']
     known_directives_lower = [ i.lower() for i in known_directives ]
+    parse_2_in_quotes = re.compile(r'"(.*)"\s+"(.*)"')
 
     # create a dictionary to hold the config
     # each item is a list (because the directives could occur more than once)
@@ -182,28 +183,39 @@
         # split line into 1st word plus rest
         # will fail if it's not a valid config line
         try:
-            word, rest = line.split(None, 1)
+            word, val = line.split(None, 1)
         except ValueError:
             continue
         if word.lower() not in known_directives_lower:
             sys.exit('unknown config directive: %r' % word)
             continue
-        d = word.lower()
-        val = rest
-
-        print word, val
-        conf[d].append(val)
-
-
-    parse_2_in_quotes = re.compile(r'"(.*)"\s+"(.*)"')
-    for i, item in enumerate(conf['statsprefilter']):
-        match = parse_2_in_quotes.match(item)
-        #print 'substitute %s by %s' % (match.group(1), match.group(2))
-        conf['statsprefilter'][i] = (re.compile(match.group(1)), match.group(2), match.group(1))
-        #print conf['statsprefilter'][i]
-
-    # this is jut a single integer
-    conf['statsdupwindow'] = int(conf['statsdupwindow'][0])
+        directive = word.lower()
+        val = val
+
+
+        # this is just a single integer
+        if directive in ['statsdupwindow']:
+            conf[directive] = int(val)
+
+        # these come with two args: a regexp and a substitution rule
+        elif directive in ['statsprefilter', 'statscount', 'statspostfilter']:
+            m = parse_2_in_quotes.match(val)
+            #print 'substitute %s by %s' % (m.group(1), m.group(2))
+            regex = m.group(1).replace('\\"', '"')
+            subst = m.group(2).replace('\\"', '"')
+            regex_compiled = re.compile(regex)
+            conf[directive].append((regex_compiled, subst, regex))
+            #print conf['statsprefilter'][i]
+
+        elif directive in ['statsignoreip']:
+            conf[directive].append(val)
+
+        else:
+            sys.exit('unparsed directive (implementation needed)', directive)
+
+
+    #for i, item in enumerate(conf['statsprefilter']):
+
 
     return conf
     
@@ -224,14 +236,6 @@
     pprint.pprint(conf)
 
 
-
-    # FIXME: grab list of regexp from config
-    matchlist = []
-    re_matchlist = []
-    for match, sub in matchlist:
-        re_matchlist.append((re.compile(match), sub, match))
-
-
     known = RingBuffer(conf['statsdupwindow'])
 
     filenames = sys.argv[2:]
@@ -268,16 +272,14 @@
             continue
         known.append(md)
 
-
         # apply prefiltering
-        # FIXME
         for m, s, mreg in conf['statsprefilter']:
             url = m.sub(s, url)
 
         print '%-80s ' % url, 
 
         matched = False
-        for m, s, mreg in re_matchlist:
+        for m, s, mreg in []: # conf['statscount']:
             if matched:
                 sys.exit('warning: %r matches\n   %r\nbut already matched a pevious regexp:\n   %r' % (url, mreg, matched))
             if m.match(url):
@@ -286,6 +288,10 @@
         if not matched:
             print '-'
 
+        # apply postfiltering
+        for m, s, mreg in conf['statspostfilter']:
+            url = m.sub(s, url)
+
 
     sys.exit(0)
 

Modified: trunk/tools/ooo.conf
URL: http://svn.mirrorbrain.org/viewvc/mod_stats/trunk/tools/ooo.conf?rev=27&r1=26&r2=27&view=diff
==============================================================================
--- trunk/tools/ooo.conf (original)
+++ trunk/tools/ooo.conf Wed Nov 25 17:45:08 2009
@@ -1,9 +1,9 @@
 # pretreatment (filtering, fixups), applied in order
-# syntax: suitable for inclusion into Apache config
+# the syntax is meant to be suitable for inclusion into Apache config
+# regexps must be in double quotes. Double quotes can be backslash-quoted.
 
 StatsDupWindow 200
 
-# ignore requests from this IP
 # that's osuosl.org's Bouncer host
 StatsIgnoreIP 140.211.167.212
 
@@ -54,4 +54,9 @@
 # localized/es/2.4.3/OOo_2.4.3_Win32Intel_install_es.exe      -
 
 
+# filtering to be applied after parsing (but before counting)
+#StatsPostfilter "foo" "bar"
+
+
+
 # vim: ft=apache ai ts=4 sw=4 smarttab expandtab smarttab




_______________________________________________
mirrorbrain-commits mailing list
Archive: http://mirrorbrain.org/archive/mirrorbrain-commits/

Note: To remove yourself from this list, send a mail with the content
 	unsubscribe
to the address mirrorbrain-commits-request_at_mirrorbrain.org
Received on Wed Nov 25 2009 - 16:45:13 GMT

This archive was generated by hypermail 2.2.0 : Wed Nov 25 2009 - 17:45:18 GMT