Author: poeml Date: Wed Mar 10 02:38:05 2010 New Revision: 7971 URL: http://svn.mirrorbrain.org/viewvc/mirrorbrain?rev=7971&view=rev Log: mod_mirrorbrain: - Roughly support RFC Metalinks, in addition to v3 Metalinks. ".meta4" is understood as URL suffix in addition to ".metalink". A metalink4+xml Accept header also elicits a meta4 response. - For the possible requested representations, an enum type was added - also for non-Metalink types like mirrorlists, and in the future, checksums. (A clean up of the much-used meta4_forced && metalink_forced flags is pending). Modified: trunk/mod_mirrorbrain/mod_mirrorbrain.c Modified: trunk/mod_mirrorbrain/mod_mirrorbrain.c URL: http://svn.mirrorbrain.org/viewvc/mirrorbrain/trunk/mod_mirrorbrain/mod_mirrorbrain.c?rev=7971&r1=7970&r2=7971&view=diff ============================================================================== --- trunk/mod_mirrorbrain/mod_mirrorbrain.c (original) +++ trunk/mod_mirrorbrain/mod_mirrorbrain.c Wed Mar 10 02:38:05 2010 _at_@ -85,6 +85,8 @@ #define MOD_MIRRORBRAIN_VER "2.12.0" #define VERSION_COMPONENT "mod_mirrorbrain/"MOD_MIRRORBRAIN_VER +#define RFC3339_DATE_LEN (20) + #ifdef NO_MOD_GEOIP #define DEFAULT_GEOIPFILE "/var/lib/GeoIP/GeoIP.dat" #endif _at_@ -107,6 +109,19 @@ module AP_MODULE_DECLARE_DATA mirrorbrain_module; + +/* (meta) representations of a requested file */ +enum { META4, METALINK, MIRRORLIST, UNKNOWN }; +static struct { + int id; + char *ext; +} reps [] = { + { META4, "meta4" }, + { METALINK, "metalink" }, + { MIRRORLIST, "mirrorlist" }, + { UNKNOWN, NULL } +}; + #ifdef NO_MOD_GEOIP /* could also be put into the server config */ _at_@ -674,9 +689,13 @@ char *query_asn = NULL; char fakefile = 0, newmirror = 0; char mirrorlist = 0; - char metalink_forced = 0; /* metalink was explicitely requested */ - char metalink = 0; /* metalink was negotiated */ + char meta4_forced = 0; /* IETF metalink was explicitely requested */ + char meta4 = 0; /* IETF metalink was negotiated */ /* for negotiated metalinks, the exceptions are observed. */ + char metalink_forced = 0; /* v3 metalink was explicitely requested */ + char metalink = 0; /* v3 metalink was negotiated */ + int rep = UNKNOWN; /* type of a requested representation */ + char *rep_ext = NULL; /* extension string of a requested representation */ const char* continent_code; #ifdef NO_MOD_GEOIP short int country_id; _at_@ -763,8 +782,21 @@ query_country = form_lookup(r, "country"); query_asn = (char *) form_lookup(r, "as"); if (form_lookup(r, "newmirror")) newmirror = 1; - if (form_lookup(r, "mirrorlist")) mirrorlist =1; - if (form_lookup(r, "metalink")) metalink_forced = 1; + if (form_lookup(r, "mirrorlist")) { + rep = MIRRORLIST; + rep_ext = reps[MIRRORLIST].ext; + mirrorlist =1; + } + if (form_lookup(r, "meta4")) { + rep = META4; + rep_ext = reps[META4].ext; + meta4_forced = 1; + }; + if (form_lookup(r, "metalink")) { + rep = METALINK; + rep_ext = reps[METALINK].ext; + metalink_forced = 1; + }; } if (!query_country _at_@ -780,13 +812,19 @@ query_asn[i] = '\0'; } - if (!metalink_forced && !mirrorlist) { + if (!meta4_forced && !metalink_forced && !mirrorlist) { const char *accepts; accepts = apr_table_get(r->headers_in, "Accept"); if (accepts != NULL) { - if (ap_strstr_c(accepts, "metalink+xml")) { + if (ap_strstr_c(accepts, "metalink4+xml")) { + rep = META4; + rep_ext = reps[META4].ext; + meta4 = 1; + } else if (ap_strstr_c(accepts, "metalink+xml")) { + rep = METALINK; + rep_ext = reps[METALINK].ext; metalink = 1; - } + } } } _at_@ -832,39 +870,69 @@ return DECLINED; } - /* check if the file exists. Strip off optional .metalink extension. */ + /* if the file doesn't exist, maybe a representation of it is requested */ if (r->finfo.filetype != APR_REG) { - debugLog(r, cfg, "File does not exist acc. to r->finfo"); + debugLog(r, cfg, "File does not exist according to r->finfo"); + + if (r->filename[strlen(r->filename) - 1] == '.') { + debugLog(r, cfg, "invalid file extension '.'"); + return DECLINED; + } + + /* Try if we find a valid .metalink/.meta4/... extension. */ char *ext; if ((ext = ap_strrchr(r->filename, '.')) == NULL) { return DECLINED; - } else { - if (strcmp(ext, ".metalink") == 0) { - debugLog(r, cfg, "Metalink requested by .metalink extension"); + } + + for (i = 0; reps[i].ext; i++) { + if (strcmp(ext + 1, reps[i].ext) == 0) { + rep = i; + rep_ext = reps[i].ext; + debugLog(r, cfg, "File ending .%s found", rep_ext); + break; + } + } + + switch (rep) { + case UNKNOWN: + return DECLINED; + + case META4: + debugLog(r, cfg, "Metalink requested by .meta4 extension"); + meta4_forced = 1; + + case METALINK: + debugLog(r, cfg, "Metalink v3 requested by .metalink extension"); metalink_forced = 1; - /* we modify r->filename here. */ + + case MIRRORLIST: + debugLog(r, cfg, "Mirrorlist requested by .mirrorlist extension"); + mirrorlist = 1; + + /* note this actually modifies r->filename. */ ext[0] = '\0'; /* strip the extension from r->uri as well */ + debugLog(r, cfg, "r->uri: '%s'", r->uri); if ((ext = ap_strrchr(r->uri, '.')) != NULL) { - if (strcmp(ext, ".metalink") == 0) { + if (strcmp(ext + 1, rep_ext) == 0) { ext[0] = '\0'; } } - - /* fill in finfo */ - if ( apr_stat(&r->finfo, r->filename, APR_FINFO_SIZE, r->pool) - != APR_SUCCESS ) { - return HTTP_NOT_FOUND; - } - } else { - return DECLINED; - } - } + debugLog(r, cfg, "r->uri: '%s'", r->uri); + } + + + /* fill in finfo */ + if ( apr_stat(&r->finfo, r->filename, APR_FINFO_SIZE, r->pool) + != APR_SUCCESS ) { + return HTTP_NOT_FOUND; + } } /* is the requested file too small to be worth a redirect? */ - if (!mirrorlist && !metalink_forced && (r->finfo.size < cfg->min_size)) { + if (!mirrorlist && !meta4_forced && !metalink_forced && (r->finfo.size < cfg->min_size)) { debugLog(r, cfg, "File '%s' too small (%d bytes, less than %d)", r->filename, (int) r->finfo.size, (int) cfg->min_size); return DECLINED; _at_@ -873,6 +941,7 @@ /* is this file excluded from mirroring? */ if (!mirrorlist + && !meta4_forced && !metalink_forced && cfg->exclude_filemask && !ap_regexec(cfg->exclude_filemask, r->uri, 0, NULL, 0) ) { _at_@ -881,7 +950,7 @@ } /* is the request originating from an ip address excluded from redirecting? */ - if (!mirrorlist && !metalink_forced && cfg->exclude_ips->nelts) { + if (!mirrorlist && !meta4_forced && !metalink_forced && cfg->exclude_ips->nelts) { for (i = 0; i < cfg->exclude_ips->nelts; i++) { _at_@ -899,7 +968,7 @@ /* is the request originating from a network excluded from redirecting? */ - if (!mirrorlist && !metalink_forced && cfg->exclude_networks->nelts) { + if (!mirrorlist && !meta4_forced && !metalink_forced && cfg->exclude_networks->nelts) { for (i = 0; i < cfg->exclude_networks->nelts; i++) { _at_@ -917,7 +986,7 @@ /* is the file in the list of mimetypes to never mirror? */ - if (!mirrorlist && !metalink_forced && (r->content_type) && (cfg->exclude_mime->nelts)) { + if (!mirrorlist && !meta4_forced && !metalink_forced && (r->content_type) && (cfg->exclude_mime->nelts)) { for (i = 0; i < cfg->exclude_mime->nelts; i++) { _at_@ -934,7 +1003,7 @@ /* is this User-Agent excluded from redirecting? */ user_agent = (const char *) apr_table_get(r->headers_in, "User-Agent"); - if (!mirrorlist && !metalink_forced && (user_agent) && (cfg->exclude_agents->nelts)) { + if (!mirrorlist && !meta4_forced && !metalink_forced && (user_agent) && (cfg->exclude_agents->nelts)) { for (i = 0; i < cfg->exclude_agents->nelts; i++) { _at_@ -1474,7 +1543,7 @@ * * => best to sort the mirrors_same_country et al. individually, right? */ - if (metalink || metalink_forced || mirrorlist) { + if (meta4 || meta4_forced || metalink || metalink_forced || mirrorlist) { qsort(mirrors_same_prefix->elts, mirrors_same_prefix->nelts, mirrors_same_prefix->elt_size, cmp_mirror_rank); qsort(mirrors_same_as->elts, mirrors_same_as->nelts, _at_@ -1545,7 +1614,11 @@ /* return a metalink instead of doing a redirect? */ - if (metalink || metalink_forced) { + switch (rep) { + + case META4: + case METALINK: + debugLog(r, cfg, "Sending metalink"); /* tell caches that this is negotiated response and that not every client will take it */ _at_@ -1566,44 +1639,77 @@ "Content-Disposition", apr_pstrcat(r->pool, "attachment; filename=\"", - basename, ".metalink\"", NULL)); - - /* the current time in rfc 822 format */ - char *time_str = apr_palloc(r->pool, APR_RFC822_DATE_LEN); - apr_rfc822_date(time_str, apr_time_now()); - - ap_set_content_type(r, "application/metalink+xml; charset=UTF-8"); - ap_rputs( "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" - "<metalink version=\"3.0\" xmlns=\"http://www.metalinker.org/\"\n", r); - - - /* The origin URL is meant to specify the location for revalidation of this metalink - * - * Unfortunately, r->parsed_uri.scheme and r->parsed_uri.hostname don't - * seem to be filled out (why?). But we can put it together from - * r->hostname and r->uri. Actually we should add the port. - * - * We could use r->server->server_hostname instead, which would be the configured server name. - * - * We use r->uri, not r->unparsed_uri, so we don't need to escape query strings for xml. - */ - ap_rprintf(r, " origin=\"http://%s%s.metalink\"\n", r->hostname, r->uri); - ap_rputs( " generator=\"MirrorBrain "MOD_MIRRORBRAIN_VER" (see http://mirrorbrain.org/)\"\n", r); - ap_rputs( " type=\"dynamic\"", r); - ap_rprintf(r, " pubdate=\"%s\"", time_str); - ap_rprintf(r, " refreshdate=\"%s\">\n\n", time_str); - - if (scfg->metalink_publisher_name && scfg->metalink_publisher_url) { - ap_rputs( " <publisher>\n", r); - ap_rprintf(r, " <name>%s</name>\n", scfg->metalink_publisher_name); - ap_rprintf(r, " <url>%s</url>\n", scfg->metalink_publisher_url); - ap_rputs( " </publisher>\n\n", r); - } - - ap_rputs( " <files>\n", r); + basename, ".", rep_ext, "\"", NULL)); + + char *time_str = NULL; + + switch (rep) { + case META4: + ap_set_content_type(r, "application/metalink4+xml; charset=UTF-8"); + ap_rputs( "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + "<metalink version=\"3.0\" xmlns=\"http://www.metalinker.org/\"\n", r); + + /* put the current time into rfc 3339 date format */ + time_str = apr_palloc(r->pool, RFC3339_DATE_LEN); + apr_time_exp_t tm; + /* r->request_time should be filled out already, and save us the syscall to time() + * through apr_time_now() */ + apr_time_exp_gmt(&tm, r->request_time); + apr_strftime(time_str, &len, RFC3339_DATE_LEN, "%Y-%m-%dT%H:%M:%SZ", &tm); + + ap_rputs( " <generator>MirrorBrain/"MOD_MIRRORBRAIN_VER"</generator>\n", r); + /* The origin URL is meant to specify the location for revalidation of this metalink + * + * Unfortunately, r->parsed_uri.scheme and r->parsed_uri.hostname don't + * seem to be filled out (why?). But we can put it together from + * r->hostname and r->uri. Actually we should add the port. + * + * We could use r->server->server_hostname instead, which would be the configured server name. + * + * We use r->uri, not r->unparsed_uri, so we don't need to escape query strings for xml. + */ + ap_rprintf(r, " <origin dynamic=\"true\">http://%s%s.%s</origin>\n", r->hostname, r->uri, rep_ext); + ap_rprintf(r, " <published>%s</published>\n", time_str); + + if (scfg->metalink_publisher_name && scfg->metalink_publisher_url) { + ap_rputs( " <publisher>\n", r); + ap_rprintf(r, " <name>%s</name>\n", scfg->metalink_publisher_name); + ap_rprintf(r, " <url>%s</url>\n", scfg->metalink_publisher_url); + ap_rputs( " </publisher>\n\n", r); + } + + break; + + case METALINK: + ap_set_content_type(r, "application/metalink+xml; charset=UTF-8"); + ap_rputs( "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + "<metalink xmlns=\"urn:ietf:params:xml:ns:metalink\">\n", r); + + /* the current time in rfc 822 format */ + time_str = apr_palloc(r->pool, APR_RFC822_DATE_LEN); + apr_rfc822_date(time_str, apr_time_now()); + + ap_rprintf(r, " origin=\"http://%s%s.%s\"\n", r->hostname, r->uri, rep_ext); + ap_rputs( " generator=\"MirrorBrain "MOD_MIRRORBRAIN_VER" (see http://mirrorbrain.org/)\"\n", r); + ap_rputs( " type=\"dynamic\"", r); + ap_rprintf(r, " pubdate=\"%s\"", time_str); + ap_rprintf(r, " refreshdate=\"%s\">\n\n", time_str); + + if (scfg->metalink_publisher_name && scfg->metalink_publisher_url) { + ap_rputs( " <publisher>\n", r); + ap_rprintf(r, " <name>%s</name>\n", scfg->metalink_publisher_name); + ap_rprintf(r, " <url>%s</url>\n", scfg->metalink_publisher_url); + ap_rputs( " </publisher>\n\n", r); + } + + ap_rputs( " <files>\n", r); + + break; + } + + ap_rprintf(r, " <file name=\"%s\">\n", basename); ap_rprintf(r, " <size>%s</size>\n\n", apr_off_t_toa(r->pool, r->finfo.size)); - /* inject hashes, if they are prepared on-disk */ apr_finfo_t sb; _at_@ -1776,10 +1882,11 @@ " </files>\n" "</metalink>\n", r); return OK; - } /* end metafile */ + /* send an HTML list instead of doing a redirect? */ - if (mirrorlist) { + case MIRRORLIST: + debugLog(r, cfg, "Sending mirrorlist"); ap_set_content_type(r, "text/html; charset=ISO-8859-1"); _at_@ -1910,7 +2017,7 @@ ap_rputs("</body>\n", r); ap_rputs("</html>\n", r); return OK; - } /* end mirrorlist */ + } /* end switch representation */ const char *found_in; _______________________________________________ mirrorbrain-commits mailing list Archive: http://mirrorbrain.org/archive/mirrorbrain-commits/ Note: To remove yourself from this list, send a mail with the content unsubscribe to the address mirrorbrain-commits-request_at_mirrorbrain.orgReceived on Wed Mar 10 2010 - 01:38:10 GMT
This archive was generated by hypermail 2.3.0 : Mon Feb 20 2012 - 23:47:04 GMT