Author: poeml Date: Wed Nov 25 19:46:15 2009 New Revision: 36 URL: http://svn.mirrorbrain.org/viewvc/mod_stats?rev=36&view=rev Log: - implement StatsIgnoreMask directive silently ignore all files matching this regular expression. (all others will be counted and need to match one of the StatsCount expressions.) - fix logic of StatsIgnoreIP directive, where the breakout of the nested for loop didn't work Modified: trunk/tools/dlcount.py trunk/tools/ooo.conf Modified: trunk/tools/dlcount.py URL: http://svn.mirrorbrain.org/viewvc/mod_stats/trunk/tools/dlcount.py?rev=36&r1=35&r2=36&view=diff ============================================================================== --- trunk/tools/dlcount.py (original) +++ trunk/tools/dlcount.py Wed Nov 25 19:46:15 2009 @@ -162,8 +162,10 @@ def readconf(filename): """we'd need Apache's config parser here...""" - known_directives = ['StatsDupWindow', 'StatsIgnoreIP', 'StatsPreFilter', 'StatsCount', 'StatsPostFilter'] + known_directives = ['StatsDupWindow', 'StatsIgnoreIP', 'StatsIgnoreMask', 'StatsPreFilter', 'StatsCount', 'StatsPostFilter'] known_directives_lower = [ i.lower() for i in known_directives ] + # regular expressions to parse arguments + parse_1_in_quotes = re.compile(r'"(.*)"') parse_2_in_quotes = re.compile(r'"(.*)"\s+"(.*)"') # create a dictionary to hold the config @@ -196,6 +198,13 @@ # this is just a single integer if directive in ['statsdupwindow']: conf[directive] = int(val) + + # directives with one argument: a regexp + elif directive in ['statsignoremask']: + m = parse_1_in_quotes.match(val) + regex = m.group(1).replace('\\"', '"') + regex_compiled = re.compile(regex) + conf[directive].append((regex_compiled, regex)) # these come with two args: a regexp and a substitution rule elif directive in ['statsprefilter', 'statscount', 'statspostfilter']: @@ -205,7 +214,7 @@ subst = m.group(2).replace('\\"', '"') regex_compiled = re.compile(regex) conf[directive].append((regex_compiled, subst, regex)) - #print conf['statsprefilter'][i] + #print conf['statsprefilter'] elif directive in ['statsignoreip']: conf[directive].append(val) @@ -260,10 +269,22 @@ m.update(repr(req)) md = m.digest() + skip = False + for r, mreg in conf['statsignoremask']: + if r.match(url): + #print 'ignoring req %s because it matches %s' %(url, mreg) + skip = True + break + if skip: + continue + for i in conf['statsignoreip']: if ip.startswith(i): #print 'ignoring ip %s because it matches %s' %(ip, i) - continue + skip = True + break + if skip: + continue # was the requests seen recently? If yes, ignore it. # otherwise, put it into the ring buffer. Modified: trunk/tools/ooo.conf URL: http://svn.mirrorbrain.org/viewvc/mod_stats/trunk/tools/ooo.conf?rev=36&r1=35&r2=36&view=diff ============================================================================== --- trunk/tools/ooo.conf (original) +++ trunk/tools/ooo.conf Wed Nov 25 19:46:15 2009 @@ -6,6 +6,10 @@ # that's osuosl.org's Bouncer host StatsIgnoreIP 140.211.167.212 + +# silently ignore all files matching this regular expression. +# (all others will be counted and need to match one of the StatsCount expressions.) +StatsIgnoreMask "^.*\.(txt)" # strip prefixed protocol (normally only sent to proxies, but can occur in the wild) StatsPrefilter "^http://[^/]+/" "" @@ -74,7 +78,6 @@ # contrib/dictionaries/thes_es_ES.zip # contrib/dictionaries/thes_pt_PT_v2.zip -#StatsPrefilter "^contrib/dictionaries/thes_(.*)_v2.zip$" "contrib/dictionaries/thes_\1,v2.zip" StatsCount "^contrib/dictionaries/(thes)_(.*?)(_v2)?.zip$" "prod: \1 os: all version: all lang: \2" _______________________________________________ mirrorbrain-commits mailing list Archive: http://mirrorbrain.org/archive/mirrorbrain-commits/ Note: To remove yourself from this list, send a mail with the content unsubscribe to the address mirrorbrain-commits-request_at_mirrorbrain.orgReceived on Wed Nov 25 2009 - 18:46:20 GMT
This archive was generated by hypermail 2.2.0 : Wed Nov 25 2009 - 19:45:13 GMT