Author: poeml Date: 2009-03-09 03:16:18 -0600 (Mon, 09 Mar 2009) New Revision: 6748 Modified: trunk/tools/download-redirector-v2/mirrordoctor/mirrordoctor.py trunk/tools/download-redirector-v2/scanner/scanner.pl Log: scanner: - implement subdirectory scans with deletion of obsolete files. So far, this was only possible with a full scan. The new, array-based database scheme allows for efficient substring match for path names, and thus we can also do deletes now when only a subdir is scanned. - fix the application of the whitelist for top-level directory when a subdir-scan is initiated. - fix rsync scan returning no files at all, when top-level includes are in use and doing a subdir scan (--exclude=/* was added then) - the old scanner options -k and -x were superfluous and are gone now. - for rsync scans, commit after 50 found files already, so transactions are held open a little less long. Modified: trunk/tools/download-redirector-v2/mirrordoctor/mirrordoctor.py =================================================================== --- trunk/tools/download-redirector-v2/mirrordoctor/mirrordoctor.py 2009-03-08 23:43:26 UTC (rev 6747) +++ trunk/tools/download-redirector-v2/mirrordoctor/mirrordoctor.py 2009-03-09 09:16:18 UTC (rev 6748) @@ -621,7 +621,7 @@ if opts.enable: cmd += '-e ' if opts.directory: - cmd += '-k -x -d %s ' % opts.directory + cmd += '-d %s ' % opts.directory if opts.jobs: cmd += '-j %s ' % opts.jobs if opts.all: Modified: trunk/tools/download-redirector-v2/scanner/scanner.pl =================================================================== --- trunk/tools/download-redirector-v2/scanner/scanner.pl 2009-03-08 23:43:26 UTC (rev 6747) +++ trunk/tools/download-redirector-v2/scanner/scanner.pl 2009-03-09 09:16:18 UTC (rev 6748) @@ -133,7 +133,6 @@ my $start_dir = '/'; my $parallel = 1; my $list_only = 0; -my $extra_schedule_run = 0; my $keep_dead_files = 0; my $recursion_delay = 0; # seconds delay per *_readdir recuursion my $force_scan = 0; @@ -143,8 +142,6 @@ # FIXME: use DBI functions transaction handling my $do_transaction = 1; -# experimental other database scheme -my $use_file_array = 1; # save prepared statements my $sth_update; @@ -195,8 +192,6 @@ elsif ($arg =~ m{^-j}) { $parallel = shift; } elsif ($arg =~ m{^-e}) { $enable_after_scan++; } elsif ($arg =~ m{^-f}) { $force_scan++; } - elsif ($arg =~ m{^-x}) { $extra_schedule_run++; } - elsif ($arg =~ m{^-k}) { $keep_dead_files++; } elsif ($arg =~ m{^-d}) { $start_dir = shift; } elsif ($arg =~ m{^-b}) { $brain_instance = shift; } elsif ($arg =~ m{^-l}) { $list_only++; @@ -291,7 +286,6 @@ exit mirror_list(\@scan_list, $list_only-1) if $list_only; ################### -# Keep in sync with "$start_dir/%" in unless ($keep_dead_files) below! $start_dir =~ s{^/+}{}; # leading slash is implicit; leads to '' per default. $start_dir =~ s{/+$}{}; # trailing slashes likewise. ################## @@ -310,8 +304,6 @@ } push @cmd, '-f' if $force_scan; push @cmd, '-e' if $enable_after_scan; - push @cmd, '-x' if $extra_schedule_run; - push @cmd, '-k' if $keep_dead_files; push @cmd, '-d', $start_dir if length $start_dir; # We must not propagate -j here. # All other options we should propagate. @@ -330,38 +322,53 @@ } +if($do_transaction) { + $dbh->{AutoCommit} = 0; + #$dbh->{RaiseError} = 1; +} + for my $row (@scan_list) { print localtime(time) . " $row->{identifier}: starting\n" if $verbose; - if($do_transaction) { - $dbh->{AutoCommit} = 0; - #$dbh�>{RaiseError} = 1; + # already in a transaction? why?? + #if($do_transaction) { + # $dbh->begin_work or die "$DBI::errstr"; + #} + + if(length $start_dir) { + $sql = "CREATE TEMPORARY TABLE temp1 AS + SELECT id FROM filearr + WHERE path LIKE '$start_dir%' + AND $row->{id} = ANY(mirrors)"; + } else { + $sql = "CREATE TEMPORARY TABLE temp1 AS + SELECT id FROM filearr + WHERE $row->{id} = ANY(mirrors)"; } - if ($use_file_array) { - if (!$keep_dead_files) { - $sql = "CREATE TEMPORARY TABLE temp1 AS SELECT id FROM filearr WHERE $row->{id} = ANY(mirrors); CREATE INDEX temp1_key ON temp1 (id)"; - print "$sql\n" if $sqlverbose; - $dbh->do($sql) or die "$sql: ".$DBI::errstr; + print "$sql\n" if $sqlverbose; + $dbh->do($sql) or die "$sql: ".$DBI::errstr; - $sql = "SELECT COUNT(*) FROM temp1"; - print "$sql\n" if $sqlverbose; - my $ary_ref = $dbh->selectall_arrayref($sql) or die $dbh->errstr(); - my $file_count = defined($ary_ref->[0]) ? $ary_ref->[0][0] : 0; - print "$row->{identifier}: files before scan: $file_count\n"; - } else { - $sql = "SELECT COUNT(*) FROM filearr WHERE $row->{id} = ANY(mirrors)"; - print "$sql\n" if $sqlverbose; - my $ary_ref = $dbh->selectall_arrayref($sql) or die $dbh->errstr(); - my $file_count = defined($ary_ref->[0]) ? $ary_ref->[0][0] : 0; - print "$row->{identifier}: files before scan: $file_count\n"; - } + $sql = "CREATE INDEX temp1_key ON temp1 (id); + ANALYZE temp1; + SELECT COUNT(*) FROM temp1"; + print "$sql\n" if $sqlverbose; + + my $ary_ref = $dbh->selectall_arrayref($sql) or die $dbh->errstr(); + my $initial_file_count = defined($ary_ref->[0]) ? $ary_ref->[0][0] : 0; + if(length $start_dir) { + print localtime(time) . " $row->{identifier}: files in subdir $start_dir before scan: $initial_file_count\n"; + } else { + print localtime(time) . " $row->{identifier}: files before scan: $initial_file_count\n"; } if($do_transaction) { $dbh->commit or die "$DBI::errstr"; } + #$sql = "SELECT COUNT(*) FROM filearr WHERE $row->{id} = ANY(mirrors)"; + #print "$sql\n" if $sqlverbose; + my $start = int(gettimeofday * 1000); my $file_count = rsync_readdir($row->{identifier}, $row->{id}, $row->{baseurl_rsync}, $start_dir); if(!$file_count and $row->{baseurl_ftp}) { @@ -390,44 +397,28 @@ $start = time(); print localtime(time) . " $row->{identifier}: purging old files\n" if $verbose > 1; - if ($use_file_array) { - #$sql = "SELECT COUNT(*) FROM temp1"; - $sql = "SELECT COUNT(mirr_del_byid($row->{id}, id)) FROM temp1"; - print "$sql\n" if $sqlverbose; - $ary_ref = $dbh->selectall_arrayref($sql) or die $dbh->errstr(); - my $purge_file_count = defined($ary_ref->[0]) ? $ary_ref->[0][0] : 0; - print localtime(time) . " $row->{identifier}: files to be purged: $purge_file_count\n"; + #$sql = "SELECT COUNT(*) FROM temp1"; + $sql = "SELECT COUNT(mirr_del_byid($row->{id}, id)) FROM temp1"; + print "$sql\n" if $sqlverbose; + $ary_ref = $dbh->selectall_arrayref($sql) or die $dbh->errstr(); + my $purge_file_count = defined($ary_ref->[0]) ? $ary_ref->[0][0] : 0; + print localtime(time) . " $row->{identifier}: files to be purged: $purge_file_count\n"; - $sql = "SELECT COUNT(*) FROM filearr WHERE $row->{id} = ANY(mirrors);"; - print "$sql\n" if $sqlverbose; - my $ary_ref = $dbh->selectall_arrayref($sql) or die $dbh->errstr(); - $file_count = defined($ary_ref->[0]) ? $ary_ref->[0][0] : 0; - print localtime(time) . " $row->{identifier}: number of files: $file_count\n"; + $sql = "SELECT COUNT(*) FROM filearr WHERE $row->{id} = ANY(mirrors);"; + print "$sql\n" if $sqlverbose; + my $ary_ref = $dbh->selectall_arrayref($sql) or die $dbh->errstr(); + $file_count = defined($ary_ref->[0]) ? $ary_ref->[0][0] : 0; + print localtime(time) . " $row->{identifier}: number of files: $file_count\n"; - } else { - my $sql = "DELETE FROM file_server WHERE serverid = $row->{id} - AND timestamp_scanner <= (SELECT extract(epoch from last_scan) FROM server - WHERE id = $row->{id} limit 1)"; - - if(length $start_dir) { - ## let us hope subselects with paramaters work in mysql. - $sql .= " AND fileid IN (SELECT id FROM file WHERE path LIKE ?)"; - } - - # Keep in sync with $start_dir setup above! - my $sth = $dbh->prepare( $sql ); - print "$row->{identifier}: $sql\n" if $sqlverbose; - $sth->execute(length($start_dir) ? "$start_dir/%" : ()) or die "$row->{identifier}: $DBI::errstr"; - } - $duration = time() - $start; print localtime(time) . " $row->{identifier}: purged old files in " . $duration . "s.\n" if $verbose > 0; } - unless ($extra_schedule_run) { + # update the last_scan timestamp; but only if we did a complete scan. + unless ($start_dir) { $sql = "UPDATE server SET last_scan = NOW(), scan_fpm = $fpm WHERE id = $row->{id};"; print "$sql\n" if $sqlverbose; my $sth = $dbh->prepare( $sql ); @@ -442,6 +433,10 @@ print "$row->{identifier}: now enabled.\n" if $verbose > 0; } + $sql = "DROP TABLE temp1"; + print "$sql\n" if $sqlverbose; + $dbh->do($sql) or die "$sql: ".$DBI::errstr; + if($do_transaction) { $dbh->commit or die "$DBI::errstr"; } @@ -573,23 +568,30 @@ { my ($identifier, $id, $url, $name) = @_; + my $item; + my $urlraw = $url; my $re = ''; $re = $1 if $url =~ s{#(.*?)$}{}; print "$identifier: http_readdir: url=$url re=$re\n" if $verbose > 2; $url =~ s{/+$}{}; # we add our own trailing slashes... $name =~ s{/+$}{}; - my $item; - my $included = 0; - foreach my $item(@top_include_list) { - if ($name =~ $item) { - $included = 1; + # are we looking at a top-level directory name? + # (we recognize it by not containing slashes) + my $attop = 0; + $attop = 1 if (length $name) && !($name =~ "/"); + if ($attop && scalar(@top_include_list)) { + my $included = 0; + foreach my $item(@top_include_list) { + if ($name =~ $item) { + $included = 1; + } } + if (!$included) { + print "$identifier: not in top_include_list: $name\n";# if $verbose > 1; + return; + } } - if (scalar(@top_include_list) && ("$name/" ne "/") && !$included) { - print "$identifier: not in top_include_list: $name\n";# if $verbose > 1; - return; - } foreach $item(@norecurse_list) { $item =~ s/([^.])(\*)/$1.$2/g; @@ -634,7 +636,7 @@ ## we must be really sure it is a directory, when we come here. ## otherwise, we'll retrieve the contents of a file! sleep($recursion_delay) if $recursion_delay; - push @r, http_readdir($identifier, $id, $urlraw, $t); + push @r, http_readdir($identifier, $id, $urlraw, $t, 0); } else { ## it is a file. @@ -696,16 +698,6 @@ $ftp_timer = time; my $item; - my $included = 0; - foreach my $item(@top_include_list) { - if ($name =~ $item) { - $included = 1; - } - } - if (scalar(@top_include_list) && ("$name/" ne "/") && !$included) { - print "$identifier: not in top_include_list: $name\n";# if $verbose > 1; - return; - } # ignore paths matching those in @norecurse-list: for $item(@norecurse_list) { @@ -721,6 +713,25 @@ my $re = ''; $re = $1 if $url =~ s{#(.*?)$}{}; $url =~ s{/+$}{}; # we add our own trailing slashes... + + # are we looking at a top-level directory name? + # (we recognize it by not containing slashes) + my $attop = 0; + $attop = 1 if (length $name) && !($name =~ "/"); + if ($attop && scalar(@top_include_list)) { + my $included = 0; + foreach my $item(@top_include_list) { + if ($name =~ $item) { + $included = 1; + } + } + if (!$included) { + print "$identifier: not in top_include_list: $name\n";# if $verbose > 1; + return; + } + } + + my $toplevel = ($ftp) ? 0 : 1; $ftp = ftp_connect($identifier, "$url/$name", "anonymous", $scanner_email) unless defined $ftp; return unless defined $ftp; @@ -821,53 +832,26 @@ $path =~ s{//+}{/}g; # avoid double slashes. - if ($use_file_array) { - my $sql = "SELECT mirr_add_bypath(?, ?);"; - if (!defined $sth_mirr_addbypath) { - printf "\nPreparing add statement\n\n" if $sqlverbose; - $sth_mirr_addbypath = $dbh->prepare( $sql ) or die "$identifier: $DBI::errstr"; + my $sql = "SELECT mirr_add_bypath(?, ?);"; + if (!defined $sth_mirr_addbypath) { + printf "\nPreparing add statement\n\n" if $sqlverbose; + $sth_mirr_addbypath = $dbh->prepare( $sql ) or die "$identifier: $DBI::errstr"; - } + } - printf "$sql <-- $serverid, $path \n" if $sqlverbose; - $sth_mirr_addbypath->execute( $serverid, $path ) or die "$identifier: $DBI::errstr"; + printf "$sql <-- $serverid, $path \n" if $sqlverbose; + $sth_mirr_addbypath->execute( $serverid, $path ) or die "$identifier: $DBI::errstr"; - my @data = $sth_mirr_addbypath->fetchrow_array(); - #if ($sth_mirr_addbypath->rows > 0) { - my $fileid = $data[0]; - #print "fileid: $fileid\n"; - #} - $sth_mirr_addbypath->finish; - if (!$keep_dead_files) { - $sql = "DELETE FROM temp1 WHERE id = $fileid"; - print "$sql\n" if $sqlverbose; - $dbh->do($sql) or die "$sql: ".$DBI::errstr; - } - - } else { - - my $fileid = getfileid($path); - - if(checkfileserver_fileid($serverid, $fileid)) { - my $sql = "UPDATE file_server SET timestamp_scanner = ".time." WHERE fileid = ? AND serverid = ?;"; - if (!defined $sth_update) { - printf "\nPreparing update statement\n\n" if $sqlverbose; - $sth_update = $dbh->prepare( $sql ) or die $DBI::errstr; - } - - printf "$sql <-- $fileid, $serverid \n" if $sqlverbose; - $sth_update->execute( $fileid, $serverid ) or die $DBI::errstr; - } - else { - my $sql = "INSERT INTO file_server (fileid, serverid, timestamp_scanner) VALUES (?, ?, ".time.");"; - if (!defined $sth_insert_rel) { - printf "\nPreparing insert statement\n\n" if $sqlverbose; - $sth_insert_rel = $dbh->prepare( $sql ); - } - - printf "$sql <-- $fileid, $serverid \n" if $sqlverbose; - $sth_insert_rel->execute( $fileid, $serverid ) or die "$identifier: $DBI::errstr"; - } + my @data = $sth_mirr_addbypath->fetchrow_array(); + #if ($sth_mirr_addbypath->rows > 0) { + my $fileid = $data[0]; + #print "fileid: $fileid\n"; + #} + $sth_mirr_addbypath->finish; + if (!$keep_dead_files) { + $sql = "DELETE FROM temp1 WHERE id = $fileid"; + print "$sql\n" if $sqlverbose; + $dbh->do($sql) or die "$sql: ".$DBI::errstr; } return $path; @@ -958,19 +942,7 @@ -sub checkfileserver_fileid -{ - my ($serverid, $fileid) = @_; - - my $sql = "SELECT 1 FROM file_server WHERE fileid = $fileid AND serverid = $serverid;"; - printf "$sql\n" if $sqlverbose; - my $ary_ref = $dbh->selectall_arrayref($sql) or die $dbh->errstr(); - - return defined($ary_ref->[0]) ? 1 : 0; -} - - - +# callback function sub rsync_cb { my ($priv, $name, $len, $mode, $mtime, @info) = @_; @@ -993,7 +965,7 @@ else { $name = save_file($name, $priv->{identifier}, $priv->{serverid}, $mtime, $priv->{re}); $priv->{counter}++; - if (($priv->{counter} % 500) == 0) { + if (($priv->{counter} % 50) == 0) { print "$priv->{identifier}: commit after 500 files\n" if $verbose > 1; if($do_transaction) { $dbh->commit or die "$DBI::errstr"; @@ -1040,6 +1012,7 @@ $peer->{pass} = $1 if $cred and $cred =~ s{:(.*)}{}; $peer->{user} = $cred if $cred; $peer->{subdir} = $d if length $d; + $peer->{counter} = 0; $path .= "/". $d if length $d; rsync_get_filelist($identifier, $peer, $path, 0, \&rsync_cb, $peer); return $peer->{counter}; @@ -1172,7 +1145,7 @@ my @args = ('--server', '--sender', '-rl'); push @args, '--exclude=/*/*' if $norecurse; - if(@top_include_list) { + if(@top_include_list && !defined($peer->{subdir})) { foreach my $item (@top_include_list) { push @args, "--include=/$item"; } _______________________________________________ Opensuse-svn mailing list Opensuse-svn_at_forge.novell.com http://forge.novell.com/mailman/listinfo/opensuse-svn _______________________________________________ mirrorbrain-commits mailing list Note: To remove yourself from this list, send a mail with the content unsubscribe to the address mirrorbrain-commits-request_at_mirrorbrain.orgReceived on 2009-03-09Z09:17:02
This archive was generated by hypermail 2.2.0 : 2009-07-10Z19:18:12 GMT