Author: poeml Date: Wed Nov 25 17:45:08 2009 New Revision: 27 URL: http://svn.mirrorbrain.org/viewvc/mod_stats?rev=27&view=rev Log: - implemented handling of backslash-quoted double quotes in regexps - postfiltering implemented (StatsPostfilter directive) - the count parser is implemented, too Modified: trunk/tools/dlcount.py trunk/tools/ooo.conf Modified: trunk/tools/dlcount.py URL: http://svn.mirrorbrain.org/viewvc/mod_stats/trunk/tools/dlcount.py?rev=27&r1=26&r2=27&view=diff ============================================================================== --- trunk/tools/dlcount.py (original) +++ trunk/tools/dlcount.py Wed Nov 25 17:45:08 2009 _at_@ -164,6 +164,7 @@ """we'd need Apache's config parser here...""" known_directives = ['StatsDupWindow', 'StatsIgnoreIP', 'StatsPreFilter', 'StatsCount', 'StatsPostFilter'] known_directives_lower = [ i.lower() for i in known_directives ] + parse_2_in_quotes = re.compile(r'"(.*)"\s+"(.*)"') # create a dictionary to hold the config # each item is a list (because the directives could occur more than once) _at_@ -182,28 +183,39 @@ # split line into 1st word plus rest # will fail if it's not a valid config line try: - word, rest = line.split(None, 1) + word, val = line.split(None, 1) except ValueError: continue if word.lower() not in known_directives_lower: sys.exit('unknown config directive: %r' % word) continue - d = word.lower() - val = rest - - print word, val - conf[d].append(val) - - - parse_2_in_quotes = re.compile(r'"(.*)"\s+"(.*)"') - for i, item in enumerate(conf['statsprefilter']): - match = parse_2_in_quotes.match(item) - #print 'substitute %s by %s' % (match.group(1), match.group(2)) - conf['statsprefilter'][i] = (re.compile(match.group(1)), match.group(2), match.group(1)) - #print conf['statsprefilter'][i] - - # this is jut a single integer - conf['statsdupwindow'] = int(conf['statsdupwindow'][0]) + directive = word.lower() + val = val + + + # this is just a single integer + if directive in ['statsdupwindow']: + conf[directive] = int(val) + + # these come with two args: a regexp and a substitution rule + elif directive in ['statsprefilter', 'statscount', 'statspostfilter']: + m = parse_2_in_quotes.match(val) + #print 'substitute %s by %s' % (m.group(1), m.group(2)) + regex = m.group(1).replace('\\"', '"') + subst = m.group(2).replace('\\"', '"') + regex_compiled = re.compile(regex) + conf[directive].append((regex_compiled, subst, regex)) + #print conf['statsprefilter'][i] + + elif directive in ['statsignoreip']: + conf[directive].append(val) + + else: + sys.exit('unparsed directive (implementation needed)', directive) + + + #for i, item in enumerate(conf['statsprefilter']): + return conf _at_@ -224,14 +236,6 @@ pprint.pprint(conf) - - # FIXME: grab list of regexp from config - matchlist = [] - re_matchlist = [] - for match, sub in matchlist: - re_matchlist.append((re.compile(match), sub, match)) - - known = RingBuffer(conf['statsdupwindow']) filenames = sys.argv[2:] _at_@ -268,16 +272,14 @@ continue known.append(md) - # apply prefiltering - # FIXME for m, s, mreg in conf['statsprefilter']: url = m.sub(s, url) print '%-80s ' % url, matched = False - for m, s, mreg in re_matchlist: + for m, s, mreg in []: # conf['statscount']: if matched: sys.exit('warning: %r matches\n %r\nbut already matched a pevious regexp:\n %r' % (url, mreg, matched)) if m.match(url): _at_@ -286,6 +288,10 @@ if not matched: print '-' + # apply postfiltering + for m, s, mreg in conf['statspostfilter']: + url = m.sub(s, url) + sys.exit(0) Modified: trunk/tools/ooo.conf URL: http://svn.mirrorbrain.org/viewvc/mod_stats/trunk/tools/ooo.conf?rev=27&r1=26&r2=27&view=diff ============================================================================== --- trunk/tools/ooo.conf (original) +++ trunk/tools/ooo.conf Wed Nov 25 17:45:08 2009 _at_@ -1,9 +1,9 @@ # pretreatment (filtering, fixups), applied in order -# syntax: suitable for inclusion into Apache config +# the syntax is meant to be suitable for inclusion into Apache config +# regexps must be in double quotes. Double quotes can be backslash-quoted. StatsDupWindow 200 -# ignore requests from this IP # that's osuosl.org's Bouncer host StatsIgnoreIP 140.211.167.212 _at_@ -54,4 +54,9 @@ # localized/es/2.4.3/OOo_2.4.3_Win32Intel_install_es.exe - +# filtering to be applied after parsing (but before counting) +#StatsPostfilter "foo" "bar" + + + # vim: ft=apache ai ts=4 sw=4 smarttab expandtab smarttab _______________________________________________ mirrorbrain-commits mailing list Archive: http://mirrorbrain.org/archive/mirrorbrain-commits/ Note: To remove yourself from this list, send a mail with the content unsubscribe to the address mirrorbrain-commits-request_at_mirrorbrain.orgReceived on Wed Nov 25 2009 - 16:45:13 GMT
This archive was generated by hypermail 2.3.0 : Mon Feb 20 2012 - 23:47:04 GMT