[mirrorbrain-commits] [opensuse-svn] r6748 - in trunk/tools/download-redirector-v2: mirrordoctor scanner

From: Novell Forge SVN <noreply_at_novell.com>
Date: Mon, 9 Mar 2009 03:16:22 -0600 (MDT)
Author: poeml
Date: 2009-03-09 03:16:18 -0600 (Mon, 09 Mar 2009)
New Revision: 6748

Modified:
   trunk/tools/download-redirector-v2/mirrordoctor/mirrordoctor.py
   trunk/tools/download-redirector-v2/scanner/scanner.pl
Log:
scanner:
- implement subdirectory scans with deletion of obsolete files. So far, this
  was only possible with a full scan. The new, array-based database scheme
  allows for efficient substring match for path names, and thus we can also do
  deletes now when only a subdir is scanned.
- fix the application of the whitelist for top-level directory when a
  subdir-scan is initiated.
- fix rsync scan returning no files at all, when top-level includes are in use
  and doing a subdir scan (--exclude=/* was added then)
- the old scanner options -k and -x were superfluous and are gone now.
- for rsync scans, commit after 50 found files already, so transactions are
  held open a little less long.


Modified: trunk/tools/download-redirector-v2/mirrordoctor/mirrordoctor.py
===================================================================
--- trunk/tools/download-redirector-v2/mirrordoctor/mirrordoctor.py	2009-03-08 23:43:26 UTC (rev 6747)
+++ trunk/tools/download-redirector-v2/mirrordoctor/mirrordoctor.py	2009-03-09 09:16:18 UTC (rev 6748)
@@ -621,7 +621,7 @@
         if opts.enable:
             cmd += '-e '
         if opts.directory:
-            cmd += '-k -x -d %s ' % opts.directory
+            cmd += '-d %s ' % opts.directory
         if opts.jobs:
             cmd += '-j %s ' % opts.jobs
         if opts.all:

Modified: trunk/tools/download-redirector-v2/scanner/scanner.pl
===================================================================
--- trunk/tools/download-redirector-v2/scanner/scanner.pl	2009-03-08 23:43:26 UTC (rev 6747)
+++ trunk/tools/download-redirector-v2/scanner/scanner.pl	2009-03-09 09:16:18 UTC (rev 6748)
@@ -133,7 +133,6 @@
 my $start_dir = '/';
 my $parallel = 1;
 my $list_only = 0;
-my $extra_schedule_run = 0;
 my $keep_dead_files = 0;
 my $recursion_delay = 0;	# seconds delay per *_readdir recuursion
 my $force_scan = 0;
@@ -143,8 +142,6 @@
 
 # FIXME: use DBI functions transaction handling
 my $do_transaction = 1;
-# experimental other database scheme
-my $use_file_array = 1;
 
 # save prepared statements
 my $sth_update;
@@ -195,8 +192,6 @@
 	elsif ($arg =~ m{^-j})                 { $parallel = shift; }
 	elsif ($arg =~ m{^-e})                 { $enable_after_scan++; }
 	elsif ($arg =~ m{^-f})                 { $force_scan++; }
-	elsif ($arg =~ m{^-x})                 { $extra_schedule_run++; }
-	elsif ($arg =~ m{^-k})                 { $keep_dead_files++; }
 	elsif ($arg =~ m{^-d})                 { $start_dir = shift; }
 	elsif ($arg =~ m{^-b})                 { $brain_instance = shift; }
 	elsif ($arg =~ m{^-l})                 { $list_only++; 
@@ -291,7 +286,6 @@
 exit mirror_list(\@scan_list, $list_only-1) if $list_only;
 
 ###################
-# Keep in sync with "$start_dir/%" in unless ($keep_dead_files) below!
 $start_dir =~ s{^/+}{};	# leading slash is implicit; leads to '' per default.
 $start_dir =~ s{/+$}{};	# trailing slashes likewise. 
 ##################
@@ -310,8 +304,6 @@
   }
   push @cmd, '-f' if $force_scan;
   push @cmd, '-e' if $enable_after_scan;
-  push @cmd, '-x' if $extra_schedule_run;
-  push @cmd, '-k' if $keep_dead_files;
   push @cmd, '-d', $start_dir if length $start_dir;
   # We must not propagate -j here.
   # All other options we should propagate.
@@ -330,38 +322,53 @@
 }
 
 
+if($do_transaction) {
+  $dbh->{AutoCommit} = 0;
+  #$dbh->{RaiseError} = 1;
+}
+
 for my $row (@scan_list) {
   print localtime(time) . " $row->{identifier}: starting\n" if $verbose;
 
-  if($do_transaction) {
-    $dbh->{AutoCommit} = 0;
-    #$dbh�>{RaiseError} = 1;
+  # already in a transaction? why??
+  #if($do_transaction) {
+  #  $dbh->begin_work or die "$DBI::errstr";
+  #}
+
+  if(length $start_dir) {
+    $sql = "CREATE TEMPORARY TABLE temp1 AS 
+            SELECT id FROM filearr 
+            WHERE path LIKE '$start_dir%' 
+                  AND $row->{id} = ANY(mirrors)";
+  } else {
+    $sql = "CREATE TEMPORARY TABLE temp1 AS 
+            SELECT id FROM filearr 
+            WHERE $row->{id} = ANY(mirrors)";
   }
-  if ($use_file_array) {
-    if (!$keep_dead_files) {
-      $sql = "CREATE TEMPORARY TABLE temp1 AS SELECT id FROM filearr WHERE $row->{id} = ANY(mirrors); CREATE INDEX temp1_key ON temp1 (id)";
-      print "$sql\n" if $sqlverbose;
-      $dbh->do($sql) or die "$sql: ".$DBI::errstr;
+  print "$sql\n" if $sqlverbose;
+  $dbh->do($sql) or die "$sql: ".$DBI::errstr;
 
-      $sql = "SELECT COUNT(*) FROM temp1";
-      print "$sql\n" if $sqlverbose;
-      my $ary_ref = $dbh->selectall_arrayref($sql) or die $dbh->errstr();
-      my $file_count = defined($ary_ref->[0]) ? $ary_ref->[0][0] : 0;
-      print "$row->{identifier}: files before scan: $file_count\n";
-    } else {
-      $sql = "SELECT COUNT(*) FROM filearr WHERE $row->{id} = ANY(mirrors)";
-      print "$sql\n" if $sqlverbose;
-      my $ary_ref = $dbh->selectall_arrayref($sql) or die $dbh->errstr();
-      my $file_count = defined($ary_ref->[0]) ? $ary_ref->[0][0] : 0;
-      print "$row->{identifier}: files before scan: $file_count\n";
-    }
+  $sql = "CREATE INDEX temp1_key ON temp1 (id);
+          ANALYZE temp1;
+          SELECT COUNT(*) FROM temp1";
+  print "$sql\n" if $sqlverbose;
+    
+  my $ary_ref = $dbh->selectall_arrayref($sql) or die $dbh->errstr();
+  my $initial_file_count = defined($ary_ref->[0]) ? $ary_ref->[0][0] : 0;
+  if(length $start_dir) {
+    print localtime(time) . " $row->{identifier}: files in subdir $start_dir before scan: $initial_file_count\n";
+  } else {
+    print localtime(time) . " $row->{identifier}: files before scan: $initial_file_count\n";
   }
 
   if($do_transaction) {
     $dbh->commit or die "$DBI::errstr";
   }
 
+  #$sql = "SELECT COUNT(*) FROM filearr WHERE $row->{id} = ANY(mirrors)";
+  #print "$sql\n" if $sqlverbose;
 
+
   my $start = int(gettimeofday * 1000);
   my $file_count = rsync_readdir($row->{identifier}, $row->{id}, $row->{baseurl_rsync}, $start_dir);
   if(!$file_count and $row->{baseurl_ftp}) {
@@ -390,44 +397,28 @@
     $start = time();
     print localtime(time) . " $row->{identifier}: purging old files\n" if $verbose > 1;
 
-    if ($use_file_array) {
 
-      #$sql = "SELECT COUNT(*) FROM temp1";
-      $sql = "SELECT COUNT(mirr_del_byid($row->{id}, id)) FROM temp1";
-      print "$sql\n" if $sqlverbose;
-      $ary_ref = $dbh->selectall_arrayref($sql) or die $dbh->errstr();
-      my $purge_file_count = defined($ary_ref->[0]) ? $ary_ref->[0][0] : 0;
-      print localtime(time) . " $row->{identifier}: files to be purged: $purge_file_count\n";
+    #$sql = "SELECT COUNT(*) FROM temp1";
+    $sql = "SELECT COUNT(mirr_del_byid($row->{id}, id)) FROM temp1";
+    print "$sql\n" if $sqlverbose;
+    $ary_ref = $dbh->selectall_arrayref($sql) or die $dbh->errstr();
+    my $purge_file_count = defined($ary_ref->[0]) ? $ary_ref->[0][0] : 0;
+    print localtime(time) . " $row->{identifier}: files to be purged: $purge_file_count\n";
 
 
-      $sql = "SELECT COUNT(*) FROM filearr WHERE $row->{id} = ANY(mirrors);";
-      print "$sql\n" if $sqlverbose;
-      my $ary_ref = $dbh->selectall_arrayref($sql) or die $dbh->errstr();
-      $file_count = defined($ary_ref->[0]) ? $ary_ref->[0][0] : 0;
-      print localtime(time) . " $row->{identifier}: number of files: $file_count\n";
+    $sql = "SELECT COUNT(*) FROM filearr WHERE $row->{id} = ANY(mirrors);";
+    print "$sql\n" if $sqlverbose;
+    my $ary_ref = $dbh->selectall_arrayref($sql) or die $dbh->errstr();
+    $file_count = defined($ary_ref->[0]) ? $ary_ref->[0][0] : 0;
+    print localtime(time) . " $row->{identifier}: number of files: $file_count\n";
 
 
-    } else {
-      my $sql = "DELETE FROM file_server WHERE serverid = $row->{id} 
-        AND timestamp_scanner <= (SELECT extract(epoch from last_scan) FROM server 
-            WHERE id = $row->{id} limit 1)";
-
-      if(length $start_dir) {
-      ## let us hope subselects with paramaters work in mysql.
-        $sql .= " AND fileid IN (SELECT id FROM file WHERE path LIKE ?)";
-      }
-
-      # Keep in sync with $start_dir setup above!
-      my $sth = $dbh->prepare( $sql );
-      print "$row->{identifier}: $sql\n" if $sqlverbose;
-      $sth->execute(length($start_dir) ? "$start_dir/%" : ()) or die "$row->{identifier}: $DBI::errstr";
-    }
-
     $duration = time() - $start;
     print localtime(time) . " $row->{identifier}: purged old files in " . $duration . "s.\n" if $verbose > 0;
   }
 
-  unless ($extra_schedule_run) {
+  # update the last_scan timestamp; but only if we did a complete scan.
+  unless ($start_dir) {
     $sql = "UPDATE server SET last_scan = NOW(), scan_fpm = $fpm WHERE id = $row->{id};";
     print "$sql\n" if $sqlverbose;
     my $sth = $dbh->prepare( $sql );
@@ -442,6 +433,10 @@
     print "$row->{identifier}: now enabled.\n" if $verbose > 0;
   }
 
+  $sql = "DROP TABLE temp1";
+  print "$sql\n" if $sqlverbose;
+  $dbh->do($sql) or die "$sql: ".$DBI::errstr;
+
   if($do_transaction) {
     $dbh->commit or die "$DBI::errstr";
   }
@@ -573,23 +568,30 @@
 {
   my ($identifier, $id, $url, $name) = @_;
 
+  my $item;
+
   my $urlraw = $url;
   my $re = ''; $re = $1 if $url =~ s{#(.*?)$}{};
   print "$identifier: http_readdir: url=$url re=$re\n" if $verbose > 2;
   $url =~ s{/+$}{};	# we add our own trailing slashes...
   $name =~ s{/+$}{};
 
-  my $item;
-  my $included = 0;
-  foreach my $item(@top_include_list) {
-    if ($name =~ $item) {
-      $included = 1;
+  # are we looking at a top-level directory name?
+  # (we recognize it by not containing slashes)
+  my $attop = 0;
+  $attop = 1 if (length $name) && !($name =~ "/");
+  if ($attop && scalar(@top_include_list)) {
+    my $included = 0;
+    foreach my $item(@top_include_list) {
+      if ($name =~ $item) {
+        $included = 1;
+      }
     }
+    if (!$included) {
+      print "$identifier: not in top_include_list: $name\n";# if $verbose > 1;
+      return;
+    }
   }
-  if (scalar(@top_include_list) && ("$name/" ne "/") && !$included) {
-    print "$identifier: not in top_include_list: $name\n";# if $verbose > 1;
-    return;
-  }
 
   foreach $item(@norecurse_list) {
     $item =~ s/([^.])(\*)/$1.$2/g;
@@ -634,7 +636,7 @@
           ## we must be really sure it is a directory, when we come here.
           ## otherwise, we'll retrieve the contents of a file!
           sleep($recursion_delay) if $recursion_delay;
-          push @r, http_readdir($identifier, $id, $urlraw, $t);
+          push @r, http_readdir($identifier, $id, $urlraw, $t, 0);
         }
         else {
           ## it is a file.
@@ -696,16 +698,6 @@
   $ftp_timer = time;
 
   my $item;
-  my $included = 0;
-  foreach my $item(@top_include_list) {
-    if ($name =~ $item) {
-      $included = 1;
-    }
-  }
-  if (scalar(@top_include_list) && ("$name/" ne "/") && !$included) {
-    print "$identifier: not in top_include_list: $name\n";# if $verbose > 1;
-    return;
-  }
 
   # ignore paths matching those in @norecurse-list:
   for $item(@norecurse_list) {
@@ -721,6 +713,25 @@
   my $re = ''; $re = $1 if $url =~ s{#(.*?)$}{};
   $url =~ s{/+$}{};	# we add our own trailing slashes...
 
+
+  # are we looking at a top-level directory name?
+  # (we recognize it by not containing slashes)
+  my $attop = 0;
+  $attop = 1 if (length $name) && !($name =~ "/");
+  if ($attop && scalar(@top_include_list)) {
+    my $included = 0;
+    foreach my $item(@top_include_list) {
+      if ($name =~ $item) {
+        $included = 1;
+      }
+    }
+    if (!$included) {
+      print "$identifier: not in top_include_list: $name\n";# if $verbose > 1;
+      return;
+    }
+  }
+
+
   my $toplevel = ($ftp) ? 0 : 1;
   $ftp = ftp_connect($identifier, "$url/$name", "anonymous", $scanner_email) unless defined $ftp;
   return unless defined $ftp;
@@ -821,53 +832,26 @@
   $path =~ s{//+}{/}g;  # avoid double slashes.
 
 
-  if ($use_file_array) {
-    my $sql = "SELECT mirr_add_bypath(?, ?);";
-    if (!defined $sth_mirr_addbypath) {
-      printf "\nPreparing add statement\n\n" if $sqlverbose;
-      $sth_mirr_addbypath = $dbh->prepare( $sql ) or die "$identifier: $DBI::errstr";
+  my $sql = "SELECT mirr_add_bypath(?, ?);";
+  if (!defined $sth_mirr_addbypath) {
+    printf "\nPreparing add statement\n\n" if $sqlverbose;
+    $sth_mirr_addbypath = $dbh->prepare( $sql ) or die "$identifier: $DBI::errstr";
 
-    }
+  }
 
-    printf "$sql  <-- $serverid, $path \n" if $sqlverbose;
-    $sth_mirr_addbypath->execute( $serverid, $path ) or die "$identifier: $DBI::errstr"; 
+  printf "$sql  <-- $serverid, $path \n" if $sqlverbose;
+  $sth_mirr_addbypath->execute( $serverid, $path ) or die "$identifier: $DBI::errstr"; 
 
-    my @data = $sth_mirr_addbypath->fetchrow_array();
-    #if ($sth_mirr_addbypath->rows > 0) {
-      my $fileid = $data[0];
-      #print "fileid: $fileid\n";
-      #}
-    $sth_mirr_addbypath->finish;
-      if (!$keep_dead_files) {
-      $sql = "DELETE FROM temp1 WHERE id = $fileid";
-      print "$sql\n" if $sqlverbose;
-      $dbh->do($sql) or die "$sql: ".$DBI::errstr;
-    }
-
-  } else {
-
-    my $fileid = getfileid($path);
-
-    if(checkfileserver_fileid($serverid, $fileid)) {
-      my $sql = "UPDATE file_server SET timestamp_scanner = ".time." WHERE fileid = ? AND serverid = ?;";
-      if (!defined $sth_update) {
-        printf "\nPreparing update statement\n\n" if $sqlverbose;
-        $sth_update = $dbh->prepare( $sql ) or die $DBI::errstr;
-      }
-
-      printf "$sql  <-- $fileid, $serverid \n" if $sqlverbose;
-      $sth_update->execute( $fileid, $serverid ) or die $DBI::errstr; 
-    }
-    else {
-      my $sql = "INSERT INTO file_server (fileid, serverid, timestamp_scanner) VALUES (?, ?, ".time.");";
-      if (!defined $sth_insert_rel) {
-        printf "\nPreparing insert statement\n\n" if $sqlverbose;
-        $sth_insert_rel = $dbh->prepare( $sql );
-      }
-
-      printf "$sql  <-- $fileid, $serverid \n" if $sqlverbose;
-      $sth_insert_rel->execute( $fileid, $serverid ) or die "$identifier: $DBI::errstr";
-    }
+  my @data = $sth_mirr_addbypath->fetchrow_array();
+  #if ($sth_mirr_addbypath->rows > 0) {
+    my $fileid = $data[0];
+    #print "fileid: $fileid\n";
+    #}
+  $sth_mirr_addbypath->finish;
+    if (!$keep_dead_files) {
+    $sql = "DELETE FROM temp1 WHERE id = $fileid";
+    print "$sql\n" if $sqlverbose;
+    $dbh->do($sql) or die "$sql: ".$DBI::errstr;
   }
 
   return $path;
@@ -958,19 +942,7 @@
 
 
 
-sub checkfileserver_fileid
-{
-  my ($serverid, $fileid) = @_;
-
-  my $sql = "SELECT 1 FROM file_server WHERE fileid = $fileid AND serverid = $serverid;";
-  printf "$sql\n" if $sqlverbose;
-  my $ary_ref = $dbh->selectall_arrayref($sql) or die $dbh->errstr();
-
-  return defined($ary_ref->[0]) ? 1 : 0;
-}  
-
-
-
+# callback function
 sub rsync_cb
 {
   my ($priv, $name, $len, $mode, $mtime, @info) = @_;
@@ -993,7 +965,7 @@
       else {
         $name = save_file($name, $priv->{identifier}, $priv->{serverid}, $mtime, $priv->{re});
         $priv->{counter}++;
-        if (($priv->{counter} % 500) == 0) {
+        if (($priv->{counter} % 50) == 0) {
           print "$priv->{identifier}: commit after 500 files\n" if $verbose > 1;
           if($do_transaction) {
             $dbh->commit or die "$DBI::errstr";
@@ -1040,6 +1012,7 @@
   $peer->{pass} = $1 if $cred and $cred =~ s{:(.*)}{};
   $peer->{user} = $cred if $cred;
   $peer->{subdir} = $d if length $d;
+  $peer->{counter} = 0;
   $path .= "/". $d if length $d;
   rsync_get_filelist($identifier, $peer, $path, 0, \&rsync_cb, $peer);
   return $peer->{counter};
@@ -1172,7 +1145,7 @@
   my @args = ('--server', '--sender', '-rl');
   push @args, '--exclude=/*/*' if $norecurse;
 
-  if(@top_include_list) {
+  if(@top_include_list && !defined($peer->{subdir})) {
     foreach my $item (@top_include_list) {
       push @args, "--include=/$item";
     }

_______________________________________________
Opensuse-svn mailing list
Opensuse-svn_at_forge.novell.com
http://forge.novell.com/mailman/listinfo/opensuse-svn


_______________________________________________
mirrorbrain-commits mailing list

Note: To remove yourself from this list, send a mail with the content
 	unsubscribe
to the address mirrorbrain-commits-request_at_mirrorbrain.org
Received on 2009-03-09Z09:17:02

This archive was generated by hypermail 2.2.0 : 2009-07-10Z19:18:12 GMT