[mirrorbrain-commits] [opensuse-svn] r6615 - trunk/tools/download-redirector-v2/sql

From: Novell Forge SVN <noreply_at_novell.com> Date: Sun, 1 Mar 2009 12:03:02 -0700 (MST) · This archive was generated by hypermail 2.2.0 : 2009-07-10Z19:18:11 GMT

Author: poeml
Date: 2009-03-01 12:02:59 -0700 (Sun, 01 Mar 2009)
New Revision: 6615

Modified:
   trunk/tools/download-redirector-v2/sql/schema-postgresql.sql
Log:
New database scheme for PostgreSQL. Instead of a potentially huge relational
table including timestamps (48 bytes per row), files and associations are now
in a single table, using smallint arrays for the mirror ids. This makes the
table 5x faster and 1/3 the size. In addition, we need only a single index on
the path, which is a small and very fast b-tree. This also gives us a good
search, and the chance to do partial deletions (e.g. for a subtree), which
would always have required a table join before, and was next to impossible.
Partial deletion is important for subdirectory scans, and integration of rsync
and scanning.

The timestamps are not needed anymore; the scanner will make a snapshot of the
filelist into a temporary table and use that to delete unseen files later.

A number of stored procedures written in PL/pgSQL are provided for common tasks
(mirr_add_byid, mirr_del_byid, mirr_hasfile_byid, mirr_hasfile_byname,
mirr_add_bypath, mirr_get_name). This much simplifies application programming.


Modified: trunk/tools/download-redirector-v2/sql/schema-postgresql.sql
===================================================================

--- trunk/tools/download-redirector-v2/sql/schema-postgresql.sql	2009-02-28 15:56:32 UTC (rev 6614)
+++ trunk/tools/download-redirector-v2/sql/schema-postgresql.sql	2009-03-01 19:02:59 UTC (rev 6615)
@@ -3,18 +3,22 @@
 -- MirrorBrain Database scheme for PostgreSQL
 -- 
 
+-- before PL/pgSQL functions can be used, the languages needs to be "installed"
+-- in the database. This is done with:
+-- 
+-- createlang plpgsql <dbname>
+
 -- --------------------------------------------------------
 BEGIN;
 -- --------------------------------------------------------
 
 
-CREATE TABLE "file" (
+CREATE TABLE "filearr" (
         "id" serial NOT NULL PRIMARY KEY,
-        "path" varchar(512) NOT NULL
+        "path" varchar(512) UNIQUE NOT NULL,
+	"mirrors" smallint[]
 );
 
-CREATE INDEX "file_path_key" ON "file" ("path");
-
 -- --------------------------------------------------------
 
 CREATE TABLE "server" (
@@ -54,33 +58,7 @@
 
 -- --------------------------------------------------------
 
-CREATE TABLE "file_server" (
-        "serverid" integer NOT NULL REFERENCES "server" ("id") DEFERRABLE INITIALLY DEFERRED,
-        "fileid" integer NOT NULL REFERENCES "file" ("id") DEFERRABLE INITIALLY DEFERRED,
-	-- we actually never used the timestamp_file column.
-        -- "timestamp_file" timestamp with time zone NULL,
-        -- and the next one should be a unix epoch, which needs only 4 bytes instead of 8:
-        "timestamp_scanner" timestamp with time zone NULL,
-        UNIQUE ("fileid", "serverid")
-);
--- NOTICE:  CREATE TABLE / UNIQUE will create implicit index "file_server_fileid_key" for table "file_server"
--- CREATE TABLE
 
-CREATE INDEX "file_server_serverid_fileid_key" ON "file_server" ("serverid", "fileid");
-
-
--- For ORM's that require a primary key named 'id'; this way we don't need to
--- actually store it and have an index for it.
--- the index alone needs 800MB for 1.000.000 files
-CREATE VIEW file_server_withpk AS 
-        SELECT '1' 
-	|| LPAD(CAST(fileid AS TEXT), 12, '0') 
-	|| LPAD(CAST(serverid AS TEXT), 12, '0') 
-	AS id, serverid, fileid, timestamp_scanner 
-	FROM file_server;
-
--- --------------------------------------------------------
-
 CREATE TABLE "marker" (
         "id" serial NOT NULL PRIMARY KEY,
         "subtree_name" varchar(128) NOT NULL,
@@ -103,24 +81,136 @@
 
 
 
--- --------------------------------------------------------
 
---
--- from_unixtime
---
--- Takes a seconds-since-the-epoch integer and returns a timestamp
---
--- => select from_unixtime('1233609530');
---     from_unixtime    
--- ---------------------
---  2009-02-02 22:18:50
--- (1 row)
 
-CREATE OR REPLACE FUNCTION from_unixtime(integer) RETURNS timestamp AS '
-        SELECT $1::abstime::timestamp without time zone AS result
+-- add a mirror to the list of mirrors where a file was seen
+CREATE OR REPLACE FUNCTION mirr_add_byid(arg_serverid integer, arg_fileid integer) RETURNS integer AS $$
+DECLARE
+    arr smallint[];
+BEGIN
+    SELECT INTO arr mirrors FROM filearr WHERE id = arg_fileid;
+    IF arg_serverid = ANY(arr) THEN
+        RAISE DEBUG 'already there -- nothing to do';
+        RETURN 0;
+    ELSE
+        arr := array_append(arr, arg_serverid::smallint);
+        RAISE DEBUG 'arr: %', arr;
+        update filearr set mirrors = arr where id = arg_fileid;
+        return 1;
+    END IF;
+END;
+$$ LANGUAGE 'plpgsql';
+
+
+-- remove a mirror from the list of mirrors where a file was seen
+CREATE OR REPLACE FUNCTION mirr_del_byid(arg_serverid integer, arg_fileid integer) RETURNS integer AS $$
+DECLARE
+    arr smallint[];
+BEGIN
+    SELECT INTO arr mirrors FROM filearr WHERE id = arg_fileid;
+
+    IF NOT arg_serverid = ANY(arr) THEN
+        -- it's not there - nothing to do
+        RAISE DEBUG 'not there -- nothing to do';
+        RETURN 0;
+    ELSE
+        arr := ARRAY(
+                    SELECT arr[i] 
+                    FROM generate_series(array_lower(arr, 1), array_upper(arr, 1)) 
+                    AS i 
+                    WHERE arr[i] <> arg_serverid
+                );
+        RAISE DEBUG 'arr: %', arr;
+        -- update the array in the table
+        -- if arr is empty, we could actually remove the row instead, thus deleting the file
+        UPDATE filearr 
+            SET mirrors = arr WHERE id = arg_fileid;
+        RETURN 1;
+    END IF;
+END;
+$$ LANGUAGE 'plpgsql';
+
+
+-- check whether a given mirror is known to have a file (id)
+CREATE OR REPLACE FUNCTION mirr_hasfile_byid(arg_serverid integer, arg_fileid integer) RETURNS boolean AS $$
+DECLARE
+    result integer;
+BEGIN
+    SELECT INTO result 1 FROM filearr WHERE id = arg_fileid AND arg_serverid = ANY(mirrors);
+    IF result > 0 THEN
+        RETURN true;
+    END IF;
+    RETURN false;
+END;
+$$ LANGUAGE 'plpgsql';
+
+-- check whether a given mirror is known to have a file (name)
+CREATE OR REPLACE FUNCTION mirr_hasfile_byname(arg_serverid integer, arg_path text) RETURNS boolean AS $$
+DECLARE
+    result integer;
+BEGIN
+    SELECT INTO result 1 FROM filearr WHERE path = arg_path AND arg_serverid = ANY(mirrors);
+    IF result > 0 THEN
+        RETURN true;
+    END IF;
+    RETURN false;
+END;
+$$ LANGUAGE 'plpgsql';
+
+
+CREATE OR REPLACE FUNCTION mirr_add_bypath(arg_serverid integer, arg_path text) RETURNS integer AS $$
+DECLARE
+    fileid integer;
+    arr smallint[];
+    rc boolean;
+BEGIN
+    SELECT INTO fileid, arr
+        id, mirrors FROM filearr WHERE path = arg_path;
+
+    -- There are three cases to handle, and we want to handle each of them
+    -- with the minimal effort.
+    -- In any case, we return a file id in the end.
+    IF arg_serverid = ANY(arr) THEN
+        RAISE DEBUG 'nothing to do';
+    ELSIF fileid IS NULL THEN
+        RAISE DEBUG 'creating entry for new file.';
+        INSERT INTO filearr (path, mirrors) VALUES (arg_path, ARRAY[arg_serverid]);
+        fileid := currval('filearr_id_seq');
+    ELSE
+        RAISE DEBUG 'update existing file entry (id: %)', fileid;
+        arr := array_append(arr, arg_serverid::smallint);
+        update filearr set mirrors = arr where id = fileid;
+    END IF;
+
+    RETURN fileid;
+END;
+$$ LANGUAGE 'plpgsql';
+
+
+
+CREATE OR REPLACE FUNCTION mirr_get_name(integer) RETURNS text AS '
+    SELECT identifier FROM server WHERE id=$1
 ' LANGUAGE 'SQL';
 
+
+CREATE OR REPLACE FUNCTION mirr_get_name(ids smallint[]) RETURNS text[] AS $$
+DECLARE
+    names text[];
+    -- i integer;
+BEGIN
+    names := ARRAY(
+                  select mirr_get_name(cast(ids[i] AS integer)) from generate_series(array_lower(ids, 1), array_upper(ids, 1)) as i
+                  );
+    RETURN names;
+END;
+$$ LANGUAGE 'plpgsql';
+
+
+
+
+
 -- --------------------------------------------------------
 COMMIT;
 -- --------------------------------------------------------
 
+-- vim: ft=sql ai ts=4 sw=4 smarttab expandtab

_______________________________________________
Opensuse-svn mailing list
Opensuse-svn_at_forge.novell.com
http://forge.novell.com/mailman/listinfo/opensuse-svn


_______________________________________________
mirrorbrain-commits mailing list

Note: To remove yourself from this list, send a mail with the content
 	unsubscribe
to the address mirrorbrain-commits-request_at_mirrorbrain.org