Intial commit

This commit is contained in:
Mario Fetka
2024-05-27 16:13:40 +02:00
parent f8dc12b10a
commit d71d446104
2495 changed files with 539746 additions and 0 deletions

1
prospero/lib/psrv/archie2/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
Makefile

View File

@@ -0,0 +1,10 @@
FILES
Makefile
README
arch_dsdb.c
arch_prioritize.c
atopdate.c
atoplink.c
prarch.h
prarch_host.c
prarch_match.c

View File

@@ -0,0 +1,95 @@
A_INC = -Iarchie_src
INCS += ${A_INC}
# libraries should be relative to this dir, not SRC
DBS_LIB = libpsarchie.a
CFLAGS += -O2 -DMMAP -DSTRFIND -DCLEANUP
CFILES = \
arch_dsdb.c \
arch_prioritize.c \
prarch_match.c \
prarch_host.c \
atopdate.c \
atoplink.c
OBJECTS = \
arch_dsdb.o \
arch_prioritize.o \
prarch_match.o \
prarch_host.o \
atopdate.o \
atoplink.o
ARC_LIB = \
oper.o \
database.o \
misc.o \
error.o \
net.o
ARC_LIBSRC = \
oper.c \
database.c \
misc.c \
error.c \
net.c
CODE = ${CFILES} Makefile
all: ${DBS_LIB}
install:
cp ${DBS_LIB} ${INSTDIR}/${SRV_LIB}
ranlib ${INSTDIR}/${DBS_LIB}
cleandb:
\rm -f db/* /usr/tmp/archie.lock
\cp /dev/null db/file-list
\cp /dev/null db/strings-list
${ARC_LIB}: Makefile
${CC} ${F_CC} ${F_CPP} $*.c
${ARC_LIBSRC}:
ln -s archie_src/$@
${DBS_LIB}: ${OBJECTS} ${ARC_LIB}
rm -f ${DBS_LIB}
ar rv ${DBS_LIB} ${OBJECTS} ${ARC_LIB}
ranlib ${DBS_LIB}
# These Dependencies cannot be automatedly generated by the SWA Prospero
# scripts, unless you're running on a machine with the ARCHIE sources on it.
# Therefore, we treat them specially.
# These dependencies should be updated the next time someone reading this is
# in a position to do so.
# Actually, to set the dependencies, create a dummy archie_src directory
# with archie_defs.h, database.h, structs.h, defines.h, error.h
# This will work unless any archie include files in turn include other
# include files, which we of course don't know.
atoplink.o: prarch.h
atoplink.o: archie_src/database.h
atoplink.o: archie_src/defines.h
atoplink.o: ../../../include/pfs.h
atoplink.o: ../../../include/psite.h
atoplink.o: archie_src/structs.h
prarch_host.o: prarch.h
prarch_host.o: archie_src/archie_defs.h
prarch_host.o: archie_src/database.h
prarch_host.o: archie_src/defines.h
prarch_host.o: ../../../include/perrno.h
prarch_host.o: ../../../include/pfs.h
prarch_host.o: archie_src/structs.h
# Dependencies
arch_prioritize.o : \
../../../include/ardp.h \
../../../include/pfs_threads.h ../../../include/pfs_utils.h \
../../../include/list_macros.h \
../../../include/../lib/ardp/flocks.h ../../../include/pfs.h ../../../include/pmachine.h \
../../../include/implicit_fixes.h \
../../../include/perrno.h

View File

@@ -0,0 +1,45 @@
This directory contains code needed to integrate the archie 2 release
with a Prospero server.
You should start from the most recent prospero release. You can
obtain information about the release from info-prospero@isi.edu. The
release should be available on prospero.isi.edu in the file
/pub/prospero/prospero.tar.Z.
Here are specific instructions on how to tie Prospero in to Archie:
Retrieve the release, and untar it. Installation instructions are
included, but they are not tailored to Archie. The changes to the
installations instruction for using it with Archie follow:
1) There is probably no need to set up separate user and group IDs
for Prospero, just use those for Archie.
2) In pserver.h, define PSRV_ARCHIE.
3) In server/Makefile uncomment the appropriate DB_LIBS line
for your configuration (archie2 or archie3)
4) In the directory lib/psrv/archie2 or lib/psrv/archie3, create
a symbolic link archie_src in the directory the archie sources. Note,
you must obtain archie sources from the archie group. For archie3,
these sources must have been obtained after 2/22/93. For archie2,
there has been a change to oper.c in the archie sources that should
have been obtained since 2/22/93. Note also that for archie2, the
archie sources must be set up to use the full path of the database
directory, or you will also have to create a db symbolic link from the
directory within which the Prospero server will run.
5) Run make in lib/psrv/archie2 or lib/psrv/archie3 depending
on your configuration. If using archie3, make a link from
libparchie.a to the file libparchie.a in the archie3
distribution. If you want, you can add lib/psrv/archie2
or lib/psrv/archie3 to the list of subdirectories in the
top level Prospero makefile (SUBDIR).
6) Make the other necessary customizations (as per the installation
instructions for prospero) by editing include/pserver.h,
include/pmachine.h, and the top level make file.
7) As per the installation instructions, do a make, make install,
then run pstart (you will probably want to add pstart to your
system startup files).

View File

@@ -0,0 +1,376 @@
/*
* Copyright (c) 1991 by the University of Washington
* Copyright (c) 1993 by the University of Southern California
*
* For copying and distribution information, please see the files
* <uw-copyright.h> and <usc-copyr.h>.
*/
#include <uw-copyright.h>
#include <usc-copyr.h>
#define ABSOLUTE_MAX_HITS 2500
#define ABSOLUTE_MAX_GIF 100
#include <sys/param.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/dir.h>
#include <stdio.h>
#include <sgtty.h>
#include <string_with_strcasecmp.h>
/* Archie defines */
#include <defines.h>
#include <structs.h>
#include <error.h>
#include <database.h>
#include "prarch.h"
#include <pserver.h>
#include <pfs.h>
#include <ardp.h>
#include <psrv.h>
#include <plog.h>
#include <pprot.h>
#include <perrno.h>
#include <pmachine.h>
int archie_supported_version = 2;
extern char hostname[];
extern char hostwport[];
char archie_prefix[] = "ARCHIE";
static int num_slashes(char *s);
static int tkllength(TOKEN tkl);
/*
* dsdb - Make a database query as if it were a directory lookup
*
*/
arch_dsdb(RREQ req, /* Request pointer */
char *name, /* Name of the directory */
char **componentsp, /* Next component of name */
TOKEN *rcompp, /* Additional components */
VDIR dir, /* Directory to be filled in */
int options, /* Options to list command */
const char *rattrib, /* Requested attributes */
FILTER filters) /* Filters to be applied */
{
/* Note that componentspp and rcompp are pointers to */
/* pointers. This is necessary because */
/* this routine must be able to update these values */
/* if more than one component of the name is */
/* resolved. */
char *components = NULL;
int num_unresolvedcomps = 0;
VLINK cur_link = NULL;
char newdirname[MAXPATHLEN];
static int dbopen = 0;
char fullquery[MAXPATHLEN];
char *dbpart;
char dbquery[MAXPATHLEN];
char dbargs[MAXPATHLEN];
char dbarg1[MAXPATHLEN];
char dbarg2[MAXPATHLEN];
char dbarg3[MAXPATHLEN];
char dirlinkname[MAXPATHLEN];
char sep;
char *firstsep;
int tmp;
VLINK dirlink = NULL;
TOKEN tkl_tmp;
/* Make sure NAME, COMPONENTSP, and RCOMPP arguments are correct. */
/* Name components with slashes in them are malformed inputs to the
ARCHIE database. */
if(componentsp && (components = *componentsp)) {
if(index(components, '/'))
RETURNPFAILURE;
for (tkl_tmp = *rcompp; tkl_tmp; tkl_tmp = tkl_tmp->next)
if (index(tkl_tmp->token, '/'))
RETURNPFAILURE;
} else {
if (*rcompp) RETURNPFAILURE; /* ridiculous to specify additional comps
and no initial comps.*/
}
/* Directory already initialized, but note that this */
/* is not a real directory */
dir->version = -1;
dir->inc_native = 3; /* Not really a directory */
/* Note that if we are resolving multiple components */
/* (rcomp!=NULL) the directory will already be empty */
/* since had anything been in it dirsrv would have */
/* already cleared it and moved on to the next comp */
/* Do only once */
if(!dbopen++) {
set_default_dir(DEFAULT_DBDIR);
if((tmp = open_db_files(DB_RDONLY)) != A_OK) {
dbopen = 0;
plog(L_DB_ERROR,NOREQ,"Can't open archie database",0);
RETURNPFAILURE;
}
}
/* For now, if only verifying, indicate success */
/* We don't want to do a DB search. Eventually */
/* we might actually check that the directory */
/* is valid. */
if(options&DSDB_VERIFY) return(PSUCCESS);
/* Construct the full query from the pieces passed to us */
tmp = -1 + qsprintf(fullquery,sizeof fullquery, "%s%s%s",name,
((components && *components) ? "/" : ""),
((components && *components) ? components : ""));
for (tkl_tmp = *rcompp; tkl_tmp; tkl_tmp = tkl_tmp->next)
tmp += -1 + qsprintf(fullquery + tmp, sizeof fullquery - tmp,
"/%s", (*rcompp)->token);
if (tmp + 1 > sizeof fullquery) return DSRDIR_NOT_A_DIRECTORY;
/* The format for the queries is */
/* DATABASE_PREFIX/COMMAND(PARAMETERS)/ARGS */
/* Strip off the database prefix */
dbpart = fullquery + strlen(archie_prefix);
/* And we want to skip the next slash */
dbpart++;
/* Find the query (up to the next /), determine if the */
/* / exists and then read the args */
tmp = sscanf(dbpart,"%[^/]%c%s",dbquery,&sep,dbargs);
/* If no separator, for now return nothing */
/* Eventually, we might return a list of the query */
/* types supported */
if(tmp < 2) return(PSUCCESS);
/* Check query type */
if(strncmp(dbquery,"MATCH",5)==0) {
char stype = 'R'; /* search type */
int maxthit = 100; /* max entries to return */
int maxmatch = 100; /* max strings to match */
int maxhitpm = 100; /* max hits per match */
int offset = 0; /* entries to skip */
search_sel method; /* Search method */
int onlystr = 0; /* Just return strings */
/* In the MATCH querytype, the directory part of the query (the
argument named NAME) may have no more than 3 components.
There are 3 possible formats:
1) DATABASE_PREFIX (one component)
2) (1)/MATCH(...)
3) (2)/query-term (3 total components)
*/
if (num_slashes(name) > 2) return DSRDIR_NOT_A_DIRECTORY;
/* if no strings to match, return nothing */
if(tmp < 3) return(PSUCCESS);
/* Get arguments */
tmp = sscanf(dbquery,"MATCH(%d,%d,%d,%d,%c",&maxthit,
&maxmatch,&maxhitpm,&offset,&stype);
if(tmp < 3) {
sscanf(dbquery,"MATCH(%d,%d,%c",&maxthit,&offset,&stype);
maxmatch = maxthit;
maxhitpm = maxthit;
}
/* Note: in maxhits, 0 means use default, -1 means use max */
/* Don't let the user request more than ABSOLUTE_MAX_HITS */
if((maxthit > ABSOLUTE_MAX_HITS) || (maxthit < 1)) {
p_err_string = qsprintf_stcopyr(p_err_string,
"Legal values for max hits are between 1 and %d ",
ABSOLUTE_MAX_HITS);
return(DIRSRV_NOT_AUTHORIZED);
}
if(maxthit == 0) maxthit = ABSOLUTE_MAX_HITS;
switch(stype) {
case '=':
onlystr = 0;
method = S_EXACT ;
break;
case 'C':
onlystr = 0;
method = S_SUB_CASE_STR ;
break;
case 'c':
onlystr = 0;
method = S_E_SUB_CASE_STR ;
break;
case 'K':
onlystr = 1;
method = S_SUB_CASE_STR ;
break;
case 'k':
onlystr = 1;
method = S_E_SUB_CASE_STR ;
break;
case 'R':
onlystr = 0;
method = S_FULL_REGEX ;
break;
case 'r':
onlystr = 0;
method = S_E_FULL_REGEX ;
break;
case 'X':
onlystr = 1;
method = S_FULL_REGEX ;
break;
case 'x':
onlystr = 1;
method = S_E_FULL_REGEX ;
break;
case 'z':
onlystr = 1;
method = S_E_SUB_NCASE_STR ;
break;
case 'Z':
onlystr = 1;
method = S_SUB_NCASE_STR ;
break;
case 's':
onlystr = 0;
method = S_E_SUB_NCASE_STR ;
break;
case 'S':
default:
onlystr = 0;
method = S_SUB_NCASE_STR ;
break;
}
*dbarg1 = *dbarg2 = *dbarg3 = '\0';
tmp = sscanf(dbargs,"%[^/]%c%[^/]%c%s",dbarg1,&sep,dbarg2,
&sep,dbarg3);
if(tmp < 2) {
/* This specifies a directory, but not a link within it */
/* create a pseudo directory and return a pointer */
/* In other words, listing a MATCH directory by itself yields
an empty directory. */
if(*dbarg1 && (strcmp(dbarg1,"*")!= 0)) {
dirlink = vlalloc();
dirlink->target = stcopyr("DIRECTORY",dirlink->target);
dirlink->name = stcopyr(dbarg1,dirlink->name);
dirlink->host = stcopyr(hostwport,dirlink->host);
sprintf(dirlinkname,"%s/%s/%s",archie_prefix,dbquery,dbarg1);
dirlink->hsoname = stcopyr(dirlinkname,dirlink->hsoname);
vl_insert(dirlink,dir,VLI_ALLOW_CONF);
}
}
else {
if(tmp > 4) {
/* There are remaining components */
num_unresolvedcomps = num_slashes(dbarg3);
}
#ifdef ABSOLUTE_MAX_GIF
/* If looking for GIF files (arrgh) don't allow them */
/* to set an unreasonable number of hits, this is */
/* promted by someone who set max hits to 10,000 */
if((maxthit+offset > ABSOLUTE_MAX_GIF)&&(((strlen(dbarg1) >= 4)&&
(strcasecmp(dbarg1+strlen(dbarg1)-4,".gif") == 0)) ||
(strcasecmp(dbarg1,"gif") == 0))) {
p_err_string = qsprintf_stcopyr(p_err_string,
"Max hits for GIF searches is %d - See archie/doc/giflist.Z on \
archie.mcgill.ca for full gif list",ABSOLUTE_MAX_GIF);
return(DIRSRV_NOT_AUTHORIZED);
}
#endif ABSOLUTE_MAX_GIF
tmp = prarch_match(dbarg1,maxthit,maxmatch,maxhitpm,
offset,method,dir,FALSE,onlystr);
if(tmp) RETURNPFAILURE;
}
}
else if (strncmp(dbquery,"HOST",4)==0) {
/* First component of args is the site name */
/* remaining components are the directory name */
*dbarg1 = *dbarg2 = '\0';
tmp = sscanf(dbargs,"%[^/]%c%s",dbarg1,&sep,dbarg2);
/* If first component is null, return an empty directory */
if(tmp < 1) return(PSUCCESS);
/* if first component exists, but is last component, */
/* then it is the name of the subdirectory for the */
/* host, create a pseudo directory and return a */
/* pointer, If first component is a wildcard, and no */
/* additional components, then return matching list */
/* of sites. */
if(tmp == 1) {
tmp = prarch_host(dbarg1,NULL,dir,A2PL_ARDIR);
if(tmp == PRARCH_TOO_MANY) return(DIRSRV_TOO_MANY);
if(tmp) return(tmp);
}
/* More than one component, Look up the requested directory */
/* Note that the since the full query is passed to us, it */
/* includes the component name, thus the directory name is */
/* what you get when you strip off the last component of the */
/* name */
else {
char *lastsep = rindex(dbarg2,'/');
if(lastsep) *lastsep++ = '\0';
else *dbarg2 = '\0';
tmp = prarch_host(dbarg1,dbarg2,dir,A2PL_ARDIR);
if(tmp == PRARCH_DONT_HAVE_SITE)
return(DSRDIR_NOT_A_DIRECTORY);
if(tmp) RETURNPFAILURE;
}
}
else {
/* Query type not supported */
return(DSRDIR_NOT_A_DIRECTORY);
}
/* We are done, but we need to figure out if we resolved multiple
components and reset *componentsp and *rcompp appropriately. */
if (num_unresolvedcomps) {
int skip = tkllength(*rcompp) - num_unresolvedcomps;
if (skip < 0) return DSRDIR_NOT_A_DIRECTORY; /* shouldn't happen. */
while(skip-- > 0) {
assert(*rcompp);
*componentsp = (*rcompp)->token;
*rcompp = (*rcompp)->next;
}
} else {
while (*rcompp) {
*componentsp = (*rcompp)->token;
*rcompp = (*rcompp)->next;
}
}
return(PSUCCESS);
}
static int
tkllength(TOKEN tkl)
{
int retval = 0;
for (;tkl; tkl = tkl->next)
++retval;
return retval;
}
static
int
num_slashes(char *s)
{
int retval = 0;
for (; *s; ++s) {
if (*s == '/')
++retval;
}
return retval;
}

View File

@@ -0,0 +1,77 @@
/*
* Copyright (c) 1991 by the University of Washington
* Copyright (c) 1993 by the University of Southern California
*
* For copying and distribution information, please see the files
* <uw-copyright.h> and <usc-copyr.h>.
*/
#include <uw-copyright.h>
#include <usc-copyr.h>
#include <ardp.h>
#include <pfs.h>
#include <perrno.h>
static assign_priority();
arch_prioritize_request(r1,r2)
RREQ r1,r2;
{
if(!r1->pf_priority)
r1->pf_priority = assign_priority(r1);
if(!r2->pf_priority)
r2->pf_priority = assign_priority(r2);
if(r1->pf_priority == r2->pf_priority) return(0);
else if (r1->pf_priority < r2->pf_priority) return(-1);
else return(1);
}
static assign_priority(r1)
RREQ r1;
{
char *arg_ptr;
int maxhit = 0;
int maxmatch = 0;
int maxhitpm = 0;
int offset;
char stype;
int tmp;
int retval;
/* Result is probably cached, use it or lose it */
if(r1->prcvd_thru > 0) return(2);
arg_ptr = sindex(r1->rcvd->start,"ARCHIE");
if(!arg_ptr) return(1);
arg_ptr = sindex(arg_ptr,"MATCH");
if(!arg_ptr) return(3);
tmp = sscanf(arg_ptr,"MATCH(%d,%d,%d,%d,%c",&maxhit,&maxmatch,
&maxhitpm,&offset,&stype);
if(tmp != 5) tmp = sscanf(arg_ptr,"MATCH(%d,%d,%c",&maxhit,
&offset,&stype);
if(tmp < 3) return(4);
if(stype == '=') retval = 0;
else if ((stype == 'r') || (stype == 'x')) retval = 700;
else if ((stype == 'R') || (stype == 'X')) retval = 800;
else retval = 100;
/* If old format request, then add penalty */
if(tmp != 5) retval += 100;
tmp = maxhit;
if(offset > 0) tmp += offset;
if(tmp > 10000) retval += 10000;
else if (tmp > 100) retval+= tmp;
else retval+= 100;
if(sindex(arg_ptr,"gif") || sindex(arg_ptr,"GIF")) retval += 20000;
return(retval);
}

View File

@@ -0,0 +1,31 @@
#include <stdio.h>
#include <sys/types.h>
#include <string.h>
#include <defines.h>
#include <structs.h>
#include <database.h>
extern datestruct unpack_date();
char *atopdate(entry_db)
db_date entry_db;
{
static char result[20] ;
datestruct entry;
entry = unpack_date(entry_db);
if(entry.hour == MAX_HOUR) {
entry.hour = 0;
entry.min = 0;
}
(void) sprintf(result,"%04d%02d%02d%02d%02d00Z",entry.year,entry.month+1,
entry.day,entry.hour,entry.min);
return(result);
}

View File

@@ -0,0 +1,147 @@
#include <stdio.h>
#include <sys/types.h>
#include <database.h>
#include <sys/mman.h>
#include <defines.h>
#include <structs.h>
#include <pfs.h>
#include <psite.h>
#include "prarch.h"
extern FILE *strings_table;
extern char *strings_begin;
extern char hostname[];
extern char hostwport[];
extern char archie_prefix[];
char *perms_itoa();
char *print_date();
char *atopdate();
char *strstr();
VLINK atoplink(site_out *sop, /* Site output pointer */
int flags) /* Flags: see above */
{
VLINK vl = vlalloc(); /* New link */
PATTRIB at; /* Attributes */
PATTRIB last_at; /* Last attribute */
char fullpath[MAX_STRING_LEN];
char namebuf[MAX_STRING_LEN];
char *endname = NULL;
char modestring[20];
char str_ent[256];
char atval[256];
char *nameptr; /* Last component of file name */
char *ptr;
site_rec *srp = &(sop->site_ent); /* Site record pointer */
long strings_pos;
/* For now, all directory pointers are to pseudo-directories */
flags |= A2PL_ARDIR;
if((flags & A2PL_ROOT) || (srp->dir_or_f == 'T')) {
/* It's a directory - we should check to see if the site is */
/* running prospero, and if so return a pointer to the actual */
/* directory. If it isn't then we return a real pointer to */
/* a pseudo-directory maintained by this archie server. */
vl->target = stcopyr("DIRECTORY",vl->target);
}
else {
/* It's a file - we should check to see if the site is */
/* running prospero, and if so return a pointer to the real */
/* file. If it isn't, then we generate an external link */
vl->target = stcopyr("EXTERNAL",vl->target);
ad2l_am_atr(vl,"AFTP","BINARY",NULL);
flags &= (~A2PL_ARDIR);
}
if(flags & A2PL_ARDIR) vl->host = stcopyr(hostwport,vl->host);
else vl->host = stcopyr(sop->site_name,vl->host);
/* Get the the last component of name */
if(flags & A2PL_ROOT) vl->name = stcopyr(sop->site_name,vl->name);
else {
strncpy(namebuf,strings_begin + srp->in_or_addr.strings_ind +
sizeof(strings_header),sizeof(namebuf));
namebuf[sizeof(namebuf)-1] = '\0';
if(endname = strstr(namebuf," -> ")) *endname = '\0';
nameptr = namebuf;
vl->name = stcopyr(nameptr,vl->name);
}
if(flags & A2PL_ARDIR) {
if(flags & A2PL_ROOT)
sprintf(fullpath,"%s/HOST/%s",archie_prefix, sop->site_name);
else
sprintf(fullpath,"%s/HOST/%s%s%s%s",archie_prefix,
sop->site_name, sop->site_path,
((*(sop->site_path + strlen(sop->site_path) - 1) == '/') ?
"" : "/"), (nameptr ? nameptr : ""));
}
else {
if(flags & A2PL_ROOT)
sprintf(fullpath,"/");
else
sprintf(fullpath,"%s%s%s",sop->site_path,
((*(sop->site_path + strlen(sop->site_path) - 1) == '/') ?
"" : "/"), (nameptr ? nameptr : ""));
}
vl->hsoname = stcopyr(fullpath,vl->hsoname);
if(!(flags & A2PL_ROOT)) {
/* Here we can add cached attribute values from the archie */
/* database such as size, protection, and last modified time */
sprintf(atval,"%d bytes",srp->size);
ad2l_seq_atr(vl,ATR_PREC_CACHED,ATR_NATURE_INTRINSIC,
"SIZE",atval,NULL);
/* Directory modes in unix string format */
if(ptr = perms_itoa(srp->perms)) {
if(endname) sprintf(modestring,"%c%s",'l',ptr);
else sprintf(modestring,"%c%s",((srp->dir_or_f=='T')?'d':'-'),ptr);
ad2l_seq_atr(vl,ATR_PREC_CACHED,ATR_NATURE_INTRINSIC,
"UNIX-MODES", modestring, NULL);
}
/* Modified date - in prospero format */
if(ptr = atopdate(srp->mod_time)) {
ad2l_seq_atr(vl,ATR_PREC_CACHED,ATR_NATURE_INTRINSIC,
"LAST-MODIFIED", ptr, NULL);
}
}
if((flags & A2PL_ROOT) || (flags & A2PL_H_LAST_MOD)) {
/* Modified date - in prospero format */
if(ptr = atopdate(sop->site_mod_time))
ad2l_seq_atr(vl,ATR_PREC_CACHED,ATR_NATURE_APPLICATION,
"AR_H_LAST_MOD", ptr, NULL);
}
if((flags & A2PL_ROOT || (flags & A2PL_H_IP_ADDR))) {
/* Host IP Address */
if(sop->site_ipaddr.s_addr)
ad2l_seq_atr(vl,ATR_PREC_CACHED,ATR_NATURE_APPLICATION,
"AR_H_IP_ADDR", inet_ntoa(sop->site_ipaddr),
NULL);
}
return(vl);
}
VLINK atoqlink(char *str,int maxhit,int maxmatch,int maxhitpm)
{
VLINK vl = vlalloc();
char fullpath[MAX_STRING_LEN];
sprintf(fullpath,"%s/MATCH(%d,%d,%d,0,=)/%s", archie_prefix,
maxhit, maxmatch, maxhitpm, str);
vl->name = stcopyr(str,vl->host);
vl->target = stcopyr("DIRECTORY",vl->target);
vl->hsoname = stcopyr(fullpath,vl->hsoname);
vl->host = stcopyr(hostwport,vl->host);
return(vl);
}

View File

@@ -0,0 +1,42 @@
/* Error codes returned by prarch routines */
#define PRARCH_SUCCESS 0 /* Successful completion */
#define PRARCH_BAD_ARG 1 /* Bad argument */
#define PRARCH_OUT_OF_MEMORY 2 /* Can't allocate enough space */
#define PRARCH_BAD_REGEX 3 /* Bad regular expression */
#define PRARCH_DONT_HAVE_SITE 4 /* Can't find site file */
#define PRARCH_CANT_OPEN_FILE 5 /* Can't open DB file */
#define PRARCH_DB_ERROR 6 /* Database Error */
#define PRARCH_CLEANUP 7 /* Cleanup failed */
#define PRARCH_TOO_MANY 8 /* Too many matches */
/* For constructing link attributes */
#define A2PL_H_IP_ADDR 0x001
#define A2PL_HOSTIP 0x001
#define A2PL_H_OS_TYPE 0x002
#define A2PL_H_TIMEZ 0x004
#define A2PL_LK_LAST_MOD 0x020
#define A2PL_LINK_COUNT 0x040
#define A2PL_LINK_SZ 0x080
#define A2PL_NATIVE_MODES 0x100
#define A2PL_H_LAST_MOD 0x200
#define A2PL_SITEDATE 0x200
#define A2PL_UNIX_MODES 0x800
#define A2PL_ROOT 0x10000
#define A2PL_ARDIR 0x40000
/* Structure definitions */
struct site_out_t{
struct in_addr site_ipaddr;
db_date site_mod_time;
char site_name[MAX_HOST_LEN];
char site_update[SMALL_STR_LEN];
char site_path[MAX_FILE_NAME];
site_rec site_ent;
};
typedef struct site_out_t site_out;
char *get_host_file_name();
struct vlink *atoplink();

View File

@@ -0,0 +1,293 @@
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <string.h>
#include <sys/param.h>
#include <sys/mman.h>
/* Archie definitions */
#include <ndbm.h>
#include <defines.h>
#include <archie_defs.h>
#include <structs.h>
#include <database.h>
#include <error.h>
#include "prarch.h"
#include <pfs.h>
#include <perrno.h>
#include <plog.h>
#include <pmachine.h> /* For bzero */
#define TOO_MANY_HOSTS 200
/*
* prarch_host - Search host for contents of directory
*
* ARGS: site_name - name of host for which search is to be made
* dirname - name of directory to return (NULL if root )
* vd - pointer to directory to be filled in
* archiedir - flag - directory links should be to archie
*/
int prarch_host(char *site_name, /* Name of host to be searched */
char *dirname, /* Name of directory to be listed */
VDIR vd, /* Directory to be filled in */
int flags) /* Flags: Which attributes to use */
{
site_out so;
char *host_name;
char result[MAX_STRING_LEN];
char date_str[SMALL_STR_LEN];
char hostip_str[SMALL_STR_LEN];
site_rec curr_site_rec;
site_rec rootrec;
int recno;
int last_parent = -1;
site_rec *site_ptr;
int correct_dir = 0; /* Scanning the requested directory */
int loopcount = 0; /* To decide when to call ardp_accept */
VLINK clink; /* Current link */
FILE *fp;
caddr_t site_begin;
site_rec *site_end;
struct stat statbuf;
if(!dirname) { /* Find host directory */
char hosttemp[200];
char *p = hosttemp;
char *htemp = site_name;
char tmp1[MAX_STRING_LEN];
char tmp2[MAX_STRING_LEN];
char dirlinkname[MAXPATHLEN];
char **test;
int i;
/* If a single wildcard, then return nothing */
if(strcmp(site_name,"*") == 0) return(PRARCH_SUCCESS);
/* If regular expressions or wildcards */
if((index(site_name,'(') || index(site_name,'?') ||
index(site_name,'*'))) {
if((*htemp == '(') && (*(htemp + strlen(htemp)-1) == ')')) {
strncpy(hosttemp,htemp+1,sizeof(hosttemp));
hosttemp[sizeof(hosttemp)-1] = '\0';
hosttemp[strlen(hosttemp)-1] = '\0';
}
else if(htemp) {
*p++ = '^';
while(*htemp) {
if(*htemp == '*') {*(p++)='.'; *(p++) = *(htemp++);}
else if(*htemp == '?') {*(p++)='.';htemp++;}
else if(*htemp == '.') {*(p++)='\\';*(p++)='.';htemp++;}
else if(*htemp == '[') {*(p++)='\\';*(p++)='[';htemp++;}
else if(*htemp == '$') {*(p++)='\\';*(p++)='$';htemp++;}
else if(*htemp == '^') {*(p++)='\\';*(p++)='^';htemp++;}
else if(*htemp == '\\') {*(p++)='\\';*(p++)='\\';htemp++;}
else *(p++) = *(htemp++);
}
*p++ = '$';
*p++ = '\0';
}
test = (char **) find_sites(hosttemp,&i,tmp1);
if((int) test == BAD_REGEX) {
p_err_string = qsprintf_stcopyr(p_err_string,
"archie find_sites(): bad regular expression");
return(PRARCH_BAD_REGEX);
}
if((int) test == DB_HBYADDR_ERROR) {
p_err_string = qsprintf_stcopyr(p_err_string,
"archie find_sites() hostbyaddr error");
return(PRARCH_DB_ERROR);
}
if((int) test == BAD_MALLOC) {
p_err_string = qsprintf_stcopyyr(p_err_string,
"archie find_sites() out of memory");
return(PRARCH_OUT_OF_MEMORY);
}
if(i > TOO_MANY_HOSTS) {
free(test[i]);
return(PRARCH_TOO_MANY);
}
else while( i-- ) {
get_site_file(test[i],tmp2);
if((fp = fopen(db_file(tmp2),"r")) != (FILE *) NULL) {
if(fstat(fileno(fp),&statbuf) == -1) {
plog(L_DB_ERROR,NOREQ,"can't stat site file %s",db_file(tmp2));
fclose(fp);
continue;
}
site_begin = mmap(0,statbuf.st_size,PROT_READ,MAP_SHARED,
fileno(fp),0);
if((site_begin == (caddr_t)-1) || (site_begin == (caddr_t)NULL)){
plog(L_DB_ERROR,NOREQ,"can't map site file %s",db_file(tmp2));
fclose(fp);
continue;
}
bzero(&so,sizeof(so));
if(print_sinfo(site_begin,so.site_name,hostip_str,date_str) != 0) {
plog(L_DB_ERROR,NOREQ,"can't obtain site info from %s",
db_file(tmp2));
munmap(site_begin,statbuf.st_size);
fclose(fp);
continue;
}
/* The root is the first record in the site after site info */
rootrec = *(((site_rec *) site_begin));
bcopy(&rootrec,&(so.site_ent),sizeof(rootrec));
bcopy(&(rootrec.in_or_addr.ipaddress),&(so.site_ipaddr),
sizeof(so.site_ipaddr));
bcopy(&(rootrec.mod_time),&(so.site_mod_time),
sizeof(so.site_mod_time));
clink = atoplink(&so,flags|A2PL_ARDIR|A2PL_ROOT);
if(clink) vl_insert(clink,vd,VLI_NOSORT);
if(munmap(site_begin,statbuf.st_size) == -1) {
plog(L_DB_ERROR,NOREQ,"archie munmap() failed on %s",db_file(tmp2));
return(PRARCH_CLEANUP);
}
fclose(fp);
free(test[i]);
}
else plog(L_DB_ERROR,NOREQ,"fopen failed for %s",db_file(tmp2));
}
return(PRARCH_SUCCESS);
}
/* No regular expression or wildcards */
else {
if(( host_name = get_host_file_name( site_name )) == (char *)NULL )
return(PRARCH_SUCCESS); /* No match */
if((fp = fopen(host_name,"r")) != (FILE *) NULL) {
if(fstat(fileno(fp),&statbuf) == -1) {
plog(L_DB_ERROR,NOREQ,"can't stat site file %s",db_file(tmp2));
fclose(fp);
return(PRARCH_CANT_OPEN_FILE);
}
site_begin = mmap(0,statbuf.st_size,PROT_READ,MAP_SHARED,
fileno(fp),0);
if((site_begin == (caddr_t)-1) || (site_begin == (caddr_t)NULL)){
plog(L_DB_ERROR,NOREQ,"can't map site file %s",db_file(tmp2));
fclose(fp);
return(PRARCH_CANT_OPEN_FILE);
}
bzero(&so,sizeof(so));
if(print_sinfo(site_begin,so.site_name,hostip_str,date_str) != 0) {
plog(L_DB_ERROR,NOREQ,"can't obtain site info from %s",
db_file(tmp2));
munmap(site_begin,statbuf.st_size);
fclose(fp);
return(PRARCH_DB_ERROR);
}
/* The root is the first record in the site after site info */
rootrec = *(((site_rec *) site_begin));
bcopy(&rootrec,&(so.site_ent),sizeof(rootrec));
bcopy(&(rootrec.in_or_addr.ipaddress),&(so.site_ipaddr),
sizeof(so.site_ipaddr));
bcopy(&(rootrec.mod_time),&(so.site_mod_time),
sizeof(so.site_mod_time));
clink = atoplink(&so,flags|A2PL_ARDIR|A2PL_ROOT);
if(clink) {
clink->name = stcopyr(site_name,clink->name);
vl_insert(clink,vd,VLI_NOSORT);
}
if(munmap(site_begin,statbuf.st_size) == -1) {
plog(L_DB_ERROR,NOREQ,"archie munmap() failed on %s",db_file(tmp2));
return(PRARCH_CLEANUP);
}
fclose(fp);
return(PRARCH_SUCCESS);
}
else return(PRARCH_CANT_OPEN_FILE);
}
}
bzero(&so,sizeof(so));
if(( host_name = get_host_file_name( site_name )) == (char *)NULL )
return(PRARCH_DONT_HAVE_SITE);
if((fp = fopen(host_name, "r")) == NULL)
return(PRARCH_CANT_OPEN_FILE);
if(fstat(fileno(fp),&statbuf) == -1) {
fclose(fp);
return(PRARCH_CANT_OPEN_FILE);
}
site_begin = mmap(0,statbuf.st_size,PROT_READ,MAP_SHARED,
fileno(fp),0);
if((site_begin == (caddr_t) -1) || (site_begin == (caddr_t) NULL)) {
fclose(fp);
return(PRARCH_CANT_OPEN_FILE);
}
if(print_sinfo(site_begin,so.site_name,hostip_str,date_str) != 0) {
munmap(site_begin,statbuf.st_size);
fclose(fp);
return(PRARCH_DB_ERROR);
}
site_end = (site_rec *)site_begin + statbuf.st_size / sizeof(site_rec);
rootrec = *(((site_rec *) site_begin));
bcopy(&(rootrec.in_or_addr.ipaddress),&(so.site_ipaddr),
sizeof(so.site_ipaddr));
bcopy(&(rootrec.mod_time),&(so.site_mod_time),
sizeof(so.site_mod_time));
for(recno = 1;(site_ptr = (site_rec *)site_begin + recno) < site_end;
recno++){
if((loopcount++ & 0x3ff) == 0) ardp_accept();
curr_site_rec = *site_ptr;
if(last_parent != curr_site_rec.parent_ind){
if(find_ancestors(site_begin, recno, result) != 0) {
munmap(site_begin,statbuf.st_size);
fclose(fp);
return(PRARCH_DB_ERROR);
}
last_parent = curr_site_rec.parent_ind;
/* Don't want to check the leading / */
if(strcmp(dirname,result+1) == 0) {
correct_dir++;
strcpy(so.site_path,result);
}
else if(correct_dir) break;
}
bcopy(&curr_site_rec,&(so.site_ent),sizeof(curr_site_rec));
if(correct_dir) {
if((loopcount & 0x7f) == 0) ardp_accept();
clink = atoplink(&so,flags);
if(clink) vl_insert(clink,vd,VLI_NOSORT);
}
}
munmap(site_begin,statbuf.st_size);
fclose(fp);
return(PRARCH_SUCCESS);
}

View File

@@ -0,0 +1,637 @@
/*XXX Note to make this thread safe, need to mutex re_comp and re_exec */
#include <stdio.h>
#include <sys/types.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h> /* For malloc and free */
#define _toupper(c) ((c)-'a'+'A')
#ifdef MMAP
#include <sys/mman.h>
#endif
/* Archie definitions */
#include <ndbm.h>
#include <defines.h>
#include <structs.h>
#include <database.h>
#include <error.h>
#include "prarch.h"
#include <ardp.h>
#include <pfs.h>
#include <perrno.h>
#include <plog.h>
VLINK atoplink();
VLINK atoqlink();
char *re_comp();
char *make_lcase();
int get_match_list();
extern char *strings_begin;
extern long strings_table_size;
extern DBM *fast_strings;
/* So we can adjust our cache policy based on queue length */
extern int pQlen;
static char lowertable[256] = {
'\000','\001','\002','\003','\004','\005','\006','\007',
'\010','\011','\012','\013','\014','\015','\016','\017',
'\020','\021','\022','\023','\024','\025','\026','\027',
'\030','\031','\032','\033','\034','\035','\036','\037',
' ','!','"','#','$','%','&','\'',
'(',')','*','+',',','-','.','/',
'0','1','2','3','4','5','6','7',
'8','9',':',';','<','=','>','?',
'@','a','b','c','d','e','f','g',
'h','i','j','k','l','m','n','o',
'p','q','r','s','t','u','v','w',
'x','y','z','[','\\',']','^','_',
'`','a','b','c','d','e','f','g',
'h','i','j','k','l','m','n','o',
'p','q','r','s','t','u','v','w',
'x','y','z','{','|','}','~','\177',
'\200','\201','\202','\203','\204','\205','\206','\207',
'\210','\211','\212','\213','\214','\215','\216','\217',
'\220','\221','\222','\223','\224','\225','\226','\227',
'\230','\231','\232','\233','\234','\235','\236','\237',
'\240','\241','\242','\243','\244','\245','\246','\247',
'\250','\251','\252','\253','\254','\255','\256','\257',
'\260','\261','\262','\263','\264','\265','\266','\267',
'\270','\271','\272','\273','\274','\275','\276','\277',
'\300','\301','\302','\303','\304','\305','\306','\307',
'\310','\311','\312','\313','\314','\315','\316','\317',
'\320','\321','\322','\323','\324','\325','\326','\327',
'\330','\331','\332','\333','\334','\335','\336','\337',
'\340','\341','\342','\343','\344','\345','\346','\347',
'\350','\351','\352','\353','\354','\355','\356','\357',
'\360','\361','\362','\363','\364','\365','\366','\367',
'\370','\371','\372','\373','\374','\375','\376','\377'};
#define MATCH_CACHE_SIZE 15
struct match_cache {
char *arg; /* Matched regular expression */
int max_hits; /* Maximum matchess <0 = found all */
int offset; /* Offset */
search_sel search_type; /* Search method (the one used) */
search_sel req_type; /* Requested method */
VLINK matches; /* Matches */
VLINK more; /* Additional matches */
int flags; /* Flags: for link attributes */
struct match_cache *next; /* Next entry in cache */
};
static struct match_cache *mcache = NULL;
static int cachecount = 0;
/*
* prarch_match - Search archie database for specified file
*
* PRARCH_MATCH searches the archie database and returns
* a list of files matching the provided regular expression
*
* ARGS: program_name - regular expression for files to match
* max_hits - maximum number of entries to return (max hits)
* offset - start the search after this many hits
* search_type - search method
* vd - pointer to directory to be filled in
* archiedir - flag - directory links should be to archie
* onlystrings - flag - only return strings, not matches
*
* Search method is one of: S_FULL_REGEX
* S_EXACT
* S_SUB_NCASE_STR
* S_SUB_CASE_STR
*/
int prarch_match(char *program_name, /* Regular expression to be matched */
int max_hits, /* Maximum number of entries to rtrn */
int max_match, /* Maximum number of unique strings */
int max_hitspm, /* Maximum hits per match */
int offset, /* Skip # matches before starting */
search_sel search_type, /* Search method */
VDIR vd, /* Directory to be filled in */
int flags, /* Flag for link attributes */
int onlystrings) /* Only return matching strings */
{
/*
* Search the database for the string specified by 'program_name'. Use the
* fast dbm strings database if 'is_exact' is set, otherwise search through
* the strings table. Stop searching after all matches have been found, or
* 'max_hits' matches have been found, whichever comes first.
*/
char s_string[MAX_STRING_LEN];
char *strings_ptr;
char *strings_curr_off;
strings_header str_head;
datum search_key, key_value;
search_sel new_search_type = S_EXACT; /* Alternate search method */
search_sel or_search_type = search_type; /* Original search method */
int nocase = 0;
int hits_exceeded = FALSE; /* should be boolean? */
char *strings_end;
int match_number;
int patlen;
site_out **site_outptr;
site_out site_outrec;
int i;
VLINK cur_link;
int loopcount = 0;
int retval;
int match_rem = max_match;
if(!program_name || !(*program_name)) return(PRARCH_BAD_ARG);
if((0 < max_hits) && (max_hits < match_rem)) match_rem = max_hits;
if((0 < max_hits) && (max_hits < max_hitspm)) max_hitspm = max_hits;
strcpy(s_string, program_name);
/* See if we can use a less expensive search method */
if((search_type == S_FULL_REGEX) || (search_type == S_E_FULL_REGEX)) {
/* Regex search assumes wildcards on both ends, so remove from string */
if(strncmp(program_name,".*",2) == 0)
strcpy(s_string, program_name+2);
if((i = strlen(s_string)) >= 2) {
if(strcmp(s_string+i-2,".*") == 0)
*(s_string+i-2) = '\0';
}
/* If no special characters, then fall back to substring search */
if((search_type == S_FULL_REGEX) &&
(strpbrk(s_string,"\\^$.,[]<>*+?|(){}/") == NULL))
or_search_type = search_type = S_SUB_CASE_STR;
else if((search_type == S_E_FULL_REGEX) &&
(strpbrk(s_string,"\\^$.,[]<>*+?|(){}/") == NULL))
or_search_type = search_type = S_E_SUB_CASE_STR;
}
/* The caching code assumes we are handed an empty directory */
/* if not, return an error for now. Eventually we will get */
/* rid of that assumption */
if(vd->links) {
plog(L_DIR_ERR, NOREQ, "Prarch_match handed non empty dir",0);
return(PRARCH_BAD_ARG);
}
if(!onlystrings && (check_cache(s_string,max_hits,offset,search_type,
flags,&(vd->links)) == TRUE)) {
plog(L_DB_INFO, NOREQ, "Responding with cached data",0);
return(PSUCCESS);
}
site_outptr = (site_out **) malloc((unsigned)(sizeof(site_out) *
(max_hits + offset)));
if(!site_outptr) return(PRARCH_OUT_OF_MEMORY);
startsearch:
strings_ptr = strings_begin;
strings_end = strings_begin + (int) strings_table_size;
match_number = 0;
switch(search_type){
case S_E_SUB_CASE_STR:
new_search_type = S_SUB_CASE_STR;
goto exact_match;
case S_E_SUB_NCASE_STR:
new_search_type = S_SUB_NCASE_STR;
goto exact_match;
case S_E_FULL_REGEX:
new_search_type = S_FULL_REGEX;
exact_match:
case S_EXACT:
search_key.dptr = s_string;
search_key.dsize = strlen(s_string) + 1;
ardp_accept();
key_value = dbm_fetch(fast_strings, search_key) ;
if(key_value.dptr != (char *)NULL){ /* string in table */
int string_pos;
bcopy(key_value.dptr,(char *)&string_pos, key_value.dsize);
strings_ptr += string_pos;
bcopy(strings_ptr,(char *)&str_head,sizeof(strings_header));
ardp_accept();
if(onlystrings) {
cur_link = atoqlink(strings_ptr,max_hits,max_match,max_hitspm);
if(cur_link) vl_insert(cur_link,vd,VLI_NOSORT);
if(--match_rem <= 0) {
hits_exceeded = TRUE;
break;
}
}
else if(str_head.filet_index != -1) {
retval = get_match_list((int) str_head.filet_index, max_hitspm,
&match_number, site_outptr, FALSE);
if((retval != A_OK) && (retval != HITS_EXCEEDED)) {
plog(L_DB_ERROR, NOREQ,"get_match_list failed (%d)",retval,0);
goto cleanup;
}
if( match_number >= max_hits + offset ){
hits_exceeded = TRUE;
break;
}
}
}
else if (search_type != S_EXACT) { /* Not found - but try other method */
search_type = new_search_type;
goto startsearch;
}
break;
case S_FULL_REGEX:
if(re_comp(s_string) != (char *)NULL){
return (PRARCH_BAD_REGEX);
}
str_head.str_len = -1;
ardp_accept();
while((strings_curr_off = strings_ptr + str_head.str_len + 1) < strings_end){
if((loopcount++ & 0x7ff) == 0) ardp_accept();
strings_ptr = strings_curr_off;
bcopy(strings_ptr,(char *)&str_head,sizeof(strings_header));
strings_ptr += sizeof(strings_header);
if(re_exec( strings_ptr ) == 1 ){ /* TRUE */
strings_curr_off = strings_ptr;
ardp_accept();
if(onlystrings) {
if(strstr(strings_ptr," -> ") == NULL) { /* No broken strings */
cur_link = atoqlink(strings_ptr,max_hits,max_match,max_hitspm);
if(cur_link) vl_insert(cur_link,vd,VLI_NOSORT);
if(--match_rem <= 0) {
hits_exceeded = TRUE;
break;
}
}
}
else if(str_head.filet_index != -1){
retval = get_match_list((int) str_head.filet_index, max_hitspm,
&match_number, site_outptr, FALSE);
if((retval != A_OK) && (retval != HITS_EXCEEDED)) {
plog(L_DB_ERROR, NOREQ,"get_match_list failed (%d)",retval,0);
goto cleanup;
}
if( match_number >= max_hits + offset ){
hits_exceeded = TRUE;
break;
}
}
}
}
break;
#define TABLESIZE 256
case S_SUB_NCASE_STR:
nocase++;
case S_SUB_CASE_STR: {
char pattern[MAX_STRING_LEN];
int skiptab[TABLESIZE];
register int pc, tc;
register int local_loopcount = 0xfff;
char *bp1;
int skip;
int plen;
int plen_1;
int tlen;
unsigned char tchar;
plen = strlen(s_string);
plen_1 = plen -1;
/* Old code (replaced by inline code taken from initskip) */
/* patlen = strlen(s_string ) ; */
/* initskip(s_string, patlen, search_type == S_SUB_NCASE_STR) ; */
if(nocase) {
for(pc = 0; s_string[pc]; pc++)
pattern[pc] = lowertable[s_string[pc]];
pattern[pc] = '\0';
}
else strcpy(pattern,s_string);
for( i = 0 ; i < TABLESIZE ; i++ )
skiptab[ i ] = plen;
/* Note that we want both ucase and lcase in this table if nocase */
for( i = 0, tchar = *pattern; i < plen ; i++, tchar = *(pattern + i)) {
skiptab[tchar] = plen - 1 - i;
if(nocase && islower(tchar))
skiptab[_toupper(tchar)] = plen - 1 - i;
}
/* Begin heavily optimized and non portable code */
/* Note that we are depending on str_head being 8 bytes */
tlen = -9; /* str_head.str_len */
strings_curr_off = strings_ptr;
while((strings_curr_off += tlen + 9) < strings_end) {
if(--local_loopcount == 0) {
ardp_accept();
local_loopcount = 0xfff;
}
strings_ptr = strings_curr_off;
/* This is a kludge, non-portable, but it eliminates a pr call */
/* Note that the size is 8 on suns. Is there a better way? */
/* bcopy(strings_ptr,(char *)&str_head,sizeof(strings_header)); */
bp1 = (char *) &str_head;
/* The copying of the file index is done only on a match */
bp1[4] = strings_ptr[4]; bp1[5] = strings_ptr[5];
/* bp1[6] = strings_ptr[6]; bp1[7] = strings_ptr[7]; */
tlen = (unsigned short) str_head.str_len;
/* To catch database corruption, this is a sanity check */
if((tlen < 0) || (tlen > MAX_STRING_LEN)) {
plog(L_DB_ERROR, NOREQ,"Database corrupt: string length out of bounds",0);
break;
}
/* Old code (replaced by inline code taken from strfind) */
/* if(strfind(strings_ptr,str_head.str_len)) */
if( tlen <= plen_1 ) continue;
pc = tc = plen_1;
strings_ptr += 8;
/* Moved the nocase test outside the inner loop for performace */
/* Clauses are identical except for the first if */
if(nocase) do {
tchar = strings_ptr[tc];
/* improve efficiency of this test */
if(lowertable[tchar] == pattern[pc]) {--pc; --tc;}
else {
skip = skiptab[tchar] ;
tc += (skip < plen_1 - pc) ? plen : skip ;
pc = plen_1 ;
}
} while( pc >= 0 && tc < tlen ) ;
else /* (!nocase) */ do {
tchar = strings_ptr[tc];
/* improve efficiency of this test */
if(tchar == pattern[pc]) {--pc; --tc;}
else {
skip = skiptab[tchar] ;
tc += (skip < plen_1 - pc) ? plen : skip ;
pc = plen_1 ;
}
} while( pc >= 0 && tc < tlen ) ;
if(pc >= 0) continue;
/* We have a match */
/* Finish copying str_head - strings_curr_off */
/* is old strings_ptr. */
bp1[0] = strings_curr_off[0]; bp1[1] = strings_curr_off[1];
bp1[2] = strings_curr_off[2]; bp1[3] = strings_curr_off[3];
/* End heavily optimized and non portable code */
ardp_accept();
if(onlystrings) {
if(strstr(strings_ptr," -> ") == NULL) { /* No broken strings */
cur_link = atoqlink(strings_ptr,max_hits,max_match,max_hitspm);
if(cur_link) vl_insert(cur_link,vd,VLI_NOSORT);
if(--match_rem <= 0) {
hits_exceeded = TRUE;
break;
}
}
}
else if(str_head.filet_index != -1){
retval = get_match_list((int) str_head.filet_index, max_hitspm,
&match_number, site_outptr, FALSE);
if((retval != A_OK) && (retval != HITS_EXCEEDED)) {
plog(L_DB_ERROR,NOREQ,"get_match_list failed (%d)",retval,0);
goto cleanup;
}
if( match_number >= max_hits + offset ) {
hits_exceeded = TRUE;
break;
}
}
}
}
break;
default:
return(PRARCH_BAD_ARG);
cleanup:
for(i = 0;i < match_number; i++) free((char *)site_outptr[i]);
free((char *)site_outptr);
return(PRARCH_DB_ERROR);
}
for(i = 0;i < match_number; i++){
if((i & 0x7f) == 0) ardp_accept();
site_outrec = *site_outptr[i];
if(i >= offset) {
cur_link = atoplink(site_outrec,flags);
if(cur_link) vl_insert(cur_link,vd,VLI_NOSORT);
}
free((char *)site_outptr[i]);
}
free((char *)site_outptr);
if(hits_exceeded) {
/* Insert a continuation entry */
}
if((search_type == S_EXACT) && (pQlen > (MATCH_CACHE_SIZE - 5)))
return(PRARCH_SUCCESS);
if(!onlystrings)
add_to_cache(vd->links,s_string, (hits_exceeded ? max_hits : -max_hits),
offset,search_type,or_search_type,flags);
return(PRARCH_SUCCESS);
}
/* Check for cached results */
check_cache(arg,max_hits,offset,qtype,flags,linkpp)
char *arg;
int max_hits;
int offset;
search_sel qtype;
int flags;
VLINK *linkpp;
{
struct match_cache *cachep = mcache;
struct match_cache *pcachep = NULL;
VLINK tmp_link, cur_link;
VLINK rest = NULL;
VLINK next = NULL;
int count = max_hits;
while(cachep) {
if(((qtype == cachep->search_type)||(qtype == cachep->req_type))&&
(cachep->offset == offset) &&
/* All results are in cache - or enough to satisfy request */
((cachep->max_hits < 0) || (max_hits <= cachep->max_hits)) &&
(strcmp(cachep->arg,arg) == 0) &&
(cachep->flags == flags)) {
/* We have a match. Move to front of list */
if(pcachep) {
pcachep->next = cachep->next;
cachep->next = mcache;
mcache = cachep;
}
/* We now have to clear the expanded bits or the links */
/* returned in previous queries will not be returned */
/* We also need to truncate the list of there are more */
/* matches than requested */
cur_link = cachep->matches;
/* IMPORTANT: This code assumes the list is one */
/* dimensional, which is the case because we called */
/* vl_insert with the VLI_NOSORT option */
while(cur_link) {
cur_link->expanded = FALSE;
if((--count == 0) && cur_link->next) {
/* truncate list */
if(cachep->more) {
cur_link->next->previous = cachep->more->previous;
cachep->more->previous = cachep->matches->previous;
cachep->matches->previous->next = cachep->more;
}
else {
cachep->more = cur_link->next;
cachep->more->previous = cachep->matches->previous;
}
cur_link->next = NULL;
cachep->matches->previous = cur_link;
}
else if ((cur_link->next == NULL) && (count != 0) &&
cachep->more) {
/* Merge lists */
cachep->matches->previous = cachep->more->previous;
cur_link->next = cachep->more;
cachep->more->previous = cur_link;
cachep->more = NULL;
}
cur_link = cur_link->next;
}
*linkpp = cachep->matches;
return(TRUE);
}
pcachep = cachep;
cachep = cachep->next;
}
*linkpp = NULL;
return(FALSE);
}
/* Cache the response for later use */
add_to_cache(vl,arg,max_hits,offset,search_type,req_type,flags)
VLINK vl;
char *arg;
int max_hits;
int offset;
search_sel search_type;
search_sel req_type;
int flags;
{
struct match_cache *newresults = NULL;
struct match_cache *pcachep = NULL;
if(cachecount < MATCH_CACHE_SIZE) { /* Create a new entry */
newresults = (struct match_cache *) malloc(sizeof(struct match_cache));
cachecount++;
newresults->next = mcache;
mcache = newresults;
newresults->arg = stcopy(arg);
newresults->max_hits = max_hits;
newresults->offset = offset;
newresults->search_type = search_type;
newresults->req_type = req_type;
newresults->flags = flags;
newresults->matches = NULL;
newresults->more = NULL;
}
else { /* Use last entry - Assumes list has at least two entries */
pcachep = mcache;
while(pcachep->next) pcachep = pcachep->next;
newresults = pcachep;
/* move to front of list */
newresults->next = mcache;
mcache = newresults;
/* Fix the last entry so we don't have a cycle */
while(pcachep->next != newresults) pcachep = pcachep->next;
pcachep->next = NULL;
/* Free the old results */
if(newresults->matches) {
newresults->matches->dontfree = FALSE;
vllfree(newresults->matches);
newresults->matches = NULL;
}
if(newresults->more) {
newresults->more->dontfree = FALSE;
vllfree(newresults->more);
newresults->more = NULL;
}
newresults->arg = stcopyr(arg,newresults->arg);
newresults->max_hits = max_hits;
newresults->offset = offset;
newresults->search_type = search_type;
newresults->req_type = req_type;
newresults->flags = flags;
}
/* Since we are caching the data. If there are any links, */
/* note that they should not be freed when sent back */
if(vl) vl->dontfree = TRUE;
newresults->matches = vl;
}