/* * Copyright (c) 1991 by the University of Washington * Copyright (c) 1993 by the University of Southern California * * For copying and distribution information, please see the files * and . */ #include #include #define ABSOLUTE_MAX_HITS 2500 #define ABSOLUTE_MAX_GIF 100 #include #include #include #include #include #include #include /* Archie defines */ #include #include #include #include #include "prarch.h" #include #include #include #include #include #include #include #include int archie_supported_version = 2; extern char hostname[]; extern char hostwport[]; char archie_prefix[] = "ARCHIE"; static int num_slashes(char *s); static int tkllength(TOKEN tkl); /* * dsdb - Make a database query as if it were a directory lookup * */ arch_dsdb(RREQ req, /* Request pointer */ char *name, /* Name of the directory */ char **componentsp, /* Next component of name */ TOKEN *rcompp, /* Additional components */ VDIR dir, /* Directory to be filled in */ int options, /* Options to list command */ const char *rattrib, /* Requested attributes */ FILTER filters) /* Filters to be applied */ { /* Note that componentspp and rcompp are pointers to */ /* pointers. This is necessary because */ /* this routine must be able to update these values */ /* if more than one component of the name is */ /* resolved. */ char *components = NULL; int num_unresolvedcomps = 0; VLINK cur_link = NULL; char newdirname[MAXPATHLEN]; static int dbopen = 0; char fullquery[MAXPATHLEN]; char *dbpart; char dbquery[MAXPATHLEN]; char dbargs[MAXPATHLEN]; char dbarg1[MAXPATHLEN]; char dbarg2[MAXPATHLEN]; char dbarg3[MAXPATHLEN]; char dirlinkname[MAXPATHLEN]; char sep; char *firstsep; int tmp; VLINK dirlink = NULL; TOKEN tkl_tmp; /* Make sure NAME, COMPONENTSP, and RCOMPP arguments are correct. */ /* Name components with slashes in them are malformed inputs to the ARCHIE database. */ if(componentsp && (components = *componentsp)) { if(index(components, '/')) RETURNPFAILURE; for (tkl_tmp = *rcompp; tkl_tmp; tkl_tmp = tkl_tmp->next) if (index(tkl_tmp->token, '/')) RETURNPFAILURE; } else { if (*rcompp) RETURNPFAILURE; /* ridiculous to specify additional comps and no initial comps.*/ } /* Directory already initialized, but note that this */ /* is not a real directory */ dir->version = -1; dir->inc_native = 3; /* Not really a directory */ /* Note that if we are resolving multiple components */ /* (rcomp!=NULL) the directory will already be empty */ /* since had anything been in it dirsrv would have */ /* already cleared it and moved on to the next comp */ /* Do only once */ if(!dbopen++) { set_default_dir(DEFAULT_DBDIR); if((tmp = open_db_files(DB_RDONLY)) != A_OK) { dbopen = 0; plog(L_DB_ERROR,NOREQ,"Can't open archie database",0); RETURNPFAILURE; } } /* For now, if only verifying, indicate success */ /* We don't want to do a DB search. Eventually */ /* we might actually check that the directory */ /* is valid. */ if(options&DSDB_VERIFY) return(PSUCCESS); /* Construct the full query from the pieces passed to us */ tmp = -1 + qsprintf(fullquery,sizeof fullquery, "%s%s%s",name, ((components && *components) ? "/" : ""), ((components && *components) ? components : "")); for (tkl_tmp = *rcompp; tkl_tmp; tkl_tmp = tkl_tmp->next) tmp += -1 + qsprintf(fullquery + tmp, sizeof fullquery - tmp, "/%s", (*rcompp)->token); if (tmp + 1 > sizeof fullquery) return DSRDIR_NOT_A_DIRECTORY; /* The format for the queries is */ /* DATABASE_PREFIX/COMMAND(PARAMETERS)/ARGS */ /* Strip off the database prefix */ dbpart = fullquery + strlen(archie_prefix); /* And we want to skip the next slash */ dbpart++; /* Find the query (up to the next /), determine if the */ /* / exists and then read the args */ tmp = sscanf(dbpart,"%[^/]%c%s",dbquery,&sep,dbargs); /* If no separator, for now return nothing */ /* Eventually, we might return a list of the query */ /* types supported */ if(tmp < 2) return(PSUCCESS); /* Check query type */ if(strncmp(dbquery,"MATCH",5)==0) { char stype = 'R'; /* search type */ int maxthit = 100; /* max entries to return */ int maxmatch = 100; /* max strings to match */ int maxhitpm = 100; /* max hits per match */ int offset = 0; /* entries to skip */ search_sel method; /* Search method */ int onlystr = 0; /* Just return strings */ /* In the MATCH querytype, the directory part of the query (the argument named NAME) may have no more than 3 components. There are 3 possible formats: 1) DATABASE_PREFIX (one component) 2) (1)/MATCH(...) 3) (2)/query-term (3 total components) */ if (num_slashes(name) > 2) return DSRDIR_NOT_A_DIRECTORY; /* if no strings to match, return nothing */ if(tmp < 3) return(PSUCCESS); /* Get arguments */ tmp = sscanf(dbquery,"MATCH(%d,%d,%d,%d,%c",&maxthit, &maxmatch,&maxhitpm,&offset,&stype); if(tmp < 3) { sscanf(dbquery,"MATCH(%d,%d,%c",&maxthit,&offset,&stype); maxmatch = maxthit; maxhitpm = maxthit; } /* Note: in maxhits, 0 means use default, -1 means use max */ /* Don't let the user request more than ABSOLUTE_MAX_HITS */ if((maxthit > ABSOLUTE_MAX_HITS) || (maxthit < 1)) { p_err_string = qsprintf_stcopyr(p_err_string, "Legal values for max hits are between 1 and %d ", ABSOLUTE_MAX_HITS); return(DIRSRV_NOT_AUTHORIZED); } if(maxthit == 0) maxthit = ABSOLUTE_MAX_HITS; switch(stype) { case '=': onlystr = 0; method = S_EXACT ; break; case 'C': onlystr = 0; method = S_SUB_CASE_STR ; break; case 'c': onlystr = 0; method = S_E_SUB_CASE_STR ; break; case 'K': onlystr = 1; method = S_SUB_CASE_STR ; break; case 'k': onlystr = 1; method = S_E_SUB_CASE_STR ; break; case 'R': onlystr = 0; method = S_FULL_REGEX ; break; case 'r': onlystr = 0; method = S_E_FULL_REGEX ; break; case 'X': onlystr = 1; method = S_FULL_REGEX ; break; case 'x': onlystr = 1; method = S_E_FULL_REGEX ; break; case 'z': onlystr = 1; method = S_E_SUB_NCASE_STR ; break; case 'Z': onlystr = 1; method = S_SUB_NCASE_STR ; break; case 's': onlystr = 0; method = S_E_SUB_NCASE_STR ; break; case 'S': default: onlystr = 0; method = S_SUB_NCASE_STR ; break; } *dbarg1 = *dbarg2 = *dbarg3 = '\0'; tmp = sscanf(dbargs,"%[^/]%c%[^/]%c%s",dbarg1,&sep,dbarg2, &sep,dbarg3); if(tmp < 2) { /* This specifies a directory, but not a link within it */ /* create a pseudo directory and return a pointer */ /* In other words, listing a MATCH directory by itself yields an empty directory. */ if(*dbarg1 && (strcmp(dbarg1,"*")!= 0)) { dirlink = vlalloc(); dirlink->target = stcopyr("DIRECTORY",dirlink->target); dirlink->name = stcopyr(dbarg1,dirlink->name); dirlink->host = stcopyr(hostwport,dirlink->host); sprintf(dirlinkname,"%s/%s/%s",archie_prefix,dbquery,dbarg1); dirlink->hsoname = stcopyr(dirlinkname,dirlink->hsoname); vl_insert(dirlink,dir,VLI_ALLOW_CONF); } } else { if(tmp > 4) { /* There are remaining components */ num_unresolvedcomps = num_slashes(dbarg3); } #ifdef ABSOLUTE_MAX_GIF /* If looking for GIF files (arrgh) don't allow them */ /* to set an unreasonable number of hits, this is */ /* promted by someone who set max hits to 10,000 */ if((maxthit+offset > ABSOLUTE_MAX_GIF)&&(((strlen(dbarg1) >= 4)&& (strcasecmp(dbarg1+strlen(dbarg1)-4,".gif") == 0)) || (strcasecmp(dbarg1,"gif") == 0))) { p_err_string = qsprintf_stcopyr(p_err_string, "Max hits for GIF searches is %d - See archie/doc/giflist.Z on \ archie.mcgill.ca for full gif list",ABSOLUTE_MAX_GIF); return(DIRSRV_NOT_AUTHORIZED); } #endif ABSOLUTE_MAX_GIF tmp = prarch_match(dbarg1,maxthit,maxmatch,maxhitpm, offset,method,dir,FALSE,onlystr); if(tmp) RETURNPFAILURE; } } else if (strncmp(dbquery,"HOST",4)==0) { /* First component of args is the site name */ /* remaining components are the directory name */ *dbarg1 = *dbarg2 = '\0'; tmp = sscanf(dbargs,"%[^/]%c%s",dbarg1,&sep,dbarg2); /* If first component is null, return an empty directory */ if(tmp < 1) return(PSUCCESS); /* if first component exists, but is last component, */ /* then it is the name of the subdirectory for the */ /* host, create a pseudo directory and return a */ /* pointer, If first component is a wildcard, and no */ /* additional components, then return matching list */ /* of sites. */ if(tmp == 1) { tmp = prarch_host(dbarg1,NULL,dir,A2PL_ARDIR); if(tmp == PRARCH_TOO_MANY) return(DIRSRV_TOO_MANY); if(tmp) return(tmp); } /* More than one component, Look up the requested directory */ /* Note that the since the full query is passed to us, it */ /* includes the component name, thus the directory name is */ /* what you get when you strip off the last component of the */ /* name */ else { char *lastsep = rindex(dbarg2,'/'); if(lastsep) *lastsep++ = '\0'; else *dbarg2 = '\0'; tmp = prarch_host(dbarg1,dbarg2,dir,A2PL_ARDIR); if(tmp == PRARCH_DONT_HAVE_SITE) return(DSRDIR_NOT_A_DIRECTORY); if(tmp) RETURNPFAILURE; } } else { /* Query type not supported */ return(DSRDIR_NOT_A_DIRECTORY); } /* We are done, but we need to figure out if we resolved multiple components and reset *componentsp and *rcompp appropriately. */ if (num_unresolvedcomps) { int skip = tkllength(*rcompp) - num_unresolvedcomps; if (skip < 0) return DSRDIR_NOT_A_DIRECTORY; /* shouldn't happen. */ while(skip-- > 0) { assert(*rcompp); *componentsp = (*rcompp)->token; *rcompp = (*rcompp)->next; } } else { while (*rcompp) { *componentsp = (*rcompp)->token; *rcompp = (*rcompp)->next; } } return(PSUCCESS); } static int tkllength(TOKEN tkl) { int retval = 0; for (;tkl; tkl = tkl->next) ++retval; return retval; } static int num_slashes(char *s) { int retval = 0; for (; *s; ++s) { if (*s == '/') ++retval; } return retval; }