Intial commit

2024-05-27 16:13:40 +02:00
parent f8dc12b10a
commit d71d446104
2495 changed files with 539746 additions and 0 deletions
--- a/prospero/lib/psrv/archie2/.gitignore
+++ b/prospero/lib/psrv/archie2/.gitignore
@@ -0,0 +1 @@
+Makefile
--- a/prospero/lib/psrv/archie2/FILES
+++ b/prospero/lib/psrv/archie2/FILES
@@ -0,0 +1,10 @@
+FILES
+Makefile
+README
+arch_dsdb.c
+arch_prioritize.c
+atopdate.c
+atoplink.c
+prarch.h
+prarch_host.c
+prarch_match.c
--- a/prospero/lib/psrv/archie2/Makefile.backup
+++ b/prospero/lib/psrv/archie2/Makefile.backup
@@ -0,0 +1,95 @@
+A_INC	= -Iarchie_src
+
+INCS	+= ${A_INC}
+
+# libraries should be relative to this dir, not SRC
+DBS_LIB	= libpsarchie.a
+
+CFLAGS += -O2 -DMMAP -DSTRFIND -DCLEANUP
+
+CFILES = \
+	arch_dsdb.c \
+	arch_prioritize.c \
+	prarch_match.c \
+	prarch_host.c \
+	atopdate.c \
+	atoplink.c 
+
+OBJECTS	= \
+	arch_dsdb.o \
+	arch_prioritize.o \
+	prarch_match.o \
+	prarch_host.o \
+	atopdate.o \
+	atoplink.o 
+
+ARC_LIB = \
+	oper.o \
+	database.o \
+	misc.o \
+	error.o \
+	net.o
+
+ARC_LIBSRC = \
+	oper.c \
+	database.c \
+	misc.c \
+	error.c \
+	net.c
+
+
+CODE	= ${CFILES} Makefile
+
+all:	${DBS_LIB}
+
+install:
+	cp ${DBS_LIB} ${INSTDIR}/${SRV_LIB}
+	ranlib ${INSTDIR}/${DBS_LIB}
+
+cleandb:
+	\rm -f db/* /usr/tmp/archie.lock
+	\cp /dev/null db/file-list
+	\cp /dev/null db/strings-list
+
+${ARC_LIB}:	Makefile 
+	${CC} ${F_CC} ${F_CPP} $*.c
+
+${ARC_LIBSRC}:
+	ln -s archie_src/$@
+
+${DBS_LIB}: ${OBJECTS} ${ARC_LIB}
+	rm -f ${DBS_LIB}
+	ar rv ${DBS_LIB} ${OBJECTS} ${ARC_LIB}
+	ranlib ${DBS_LIB}
+
+# These Dependencies cannot be automatedly generated by the SWA Prospero 
+# scripts, unless you're running on a machine with the ARCHIE sources on it.
+# Therefore, we treat them specially.
+# These dependencies should be updated the next time someone reading this is 
+# in a position to do so.
+# Actually, to set the dependencies, create a dummy archie_src directory
+# with archie_defs.h, database.h, structs.h, defines.h, error.h
+# This will work unless any archie include files in turn include other 
+# include files, which we of course don't know.
+atoplink.o: prarch.h
+atoplink.o: archie_src/database.h
+atoplink.o: archie_src/defines.h
+atoplink.o: ../../../include/pfs.h
+atoplink.o: ../../../include/psite.h
+atoplink.o: archie_src/structs.h
+prarch_host.o: prarch.h
+prarch_host.o: archie_src/archie_defs.h
+prarch_host.o: archie_src/database.h
+prarch_host.o: archie_src/defines.h
+prarch_host.o: ../../../include/perrno.h
+prarch_host.o: ../../../include/pfs.h
+prarch_host.o: archie_src/structs.h
+
+# Dependencies
+arch_prioritize.o : \
+  ../../../include/ardp.h \
+  ../../../include/pfs_threads.h ../../../include/pfs_utils.h \
+  ../../../include/list_macros.h \
+  ../../../include/../lib/ardp/flocks.h ../../../include/pfs.h ../../../include/pmachine.h \
+  ../../../include/implicit_fixes.h \
+  ../../../include/perrno.h 
--- a/prospero/lib/psrv/archie2/README
+++ b/prospero/lib/psrv/archie2/README
@@ -0,0 +1,45 @@
+This directory contains code needed to integrate the archie 2 release
+with a Prospero server.  
+
+You should start from the most recent prospero release.  You can
+obtain information about the release from info-prospero@isi.edu.  The
+release should be available on prospero.isi.edu in the file
+/pub/prospero/prospero.tar.Z.
+
+Here are specific instructions on how to tie Prospero in to Archie:
+Retrieve the release, and untar it.  Installation instructions are
+included, but they are not tailored to Archie.  The changes to the
+installations instruction for using it with Archie follow:
+
+  1) There is probably no need to set up separate user and group IDs
+     for Prospero, just use those for Archie.
+
+  2) In pserver.h, define PSRV_ARCHIE.
+
+  3) In server/Makefile uncomment the appropriate DB_LIBS line
+     for your configuration (archie2 or archie3)
+
+  4) In the directory lib/psrv/archie2 or lib/psrv/archie3, create
+     a symbolic link archie_src in the directory the archie sources.  Note,
+     you must obtain archie sources from the archie group. For archie3,
+     these sources must have been obtained after 2/22/93.  For archie2,
+     there has been a change to oper.c in the archie sources that should
+     have been obtained since 2/22/93.  Note also that for archie2, the
+     archie sources must be set up to use the full path of the database
+     directory, or you will also have to create a db symbolic link from the
+     directory within which the Prospero server will run.
+
+  5) Run make in lib/psrv/archie2 or lib/psrv/archie3 depending
+     on your configuration.  If using archie3, make a link from 
+     libparchie.a to the file libparchie.a in the archie3 
+     distribution.  If you want, you can add lib/psrv/archie2
+     or lib/psrv/archie3 to the list of subdirectories in the
+     top level Prospero makefile (SUBDIR).
+
+  6) Make the other necessary customizations (as per the installation
+     instructions for prospero) by editing include/pserver.h,
+     include/pmachine.h, and the top level make file.
+
+  7) As per the installation instructions, do a make, make install,
+     then run pstart (you will probably want to add pstart to your
+     system startup files).
--- a/prospero/lib/psrv/archie2/arch_dsdb.c
+++ b/prospero/lib/psrv/archie2/arch_dsdb.c
@@ -0,0 +1,376 @@
+/*
+ * Copyright (c) 1991 by the University of Washington
+ * Copyright (c) 1993 by the University of Southern California
+ *
+ * For copying and distribution information, please see the files
+ * <uw-copyright.h> and <usc-copyr.h>.
+ */
+
+#include <uw-copyright.h>
+#include <usc-copyr.h>
+
+#define ABSOLUTE_MAX_HITS 2500
+#define ABSOLUTE_MAX_GIF  100
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/dir.h>
+#include <stdio.h>
+#include <sgtty.h>
+#include <string_with_strcasecmp.h>
+
+/* Archie defines */
+#include <defines.h>
+#include <structs.h>
+#include <error.h>
+#include <database.h>
+
+#include "prarch.h"
+
+#include <pserver.h>
+#include <pfs.h>
+#include <ardp.h>
+#include <psrv.h>
+#include <plog.h>
+#include <pprot.h>
+#include <perrno.h>
+#include <pmachine.h>
+
+int		archie_supported_version = 2;
+
+extern char	hostname[];
+extern char	hostwport[];
+char		archie_prefix[] = "ARCHIE";
+static int num_slashes(char *s);
+static int tkllength(TOKEN tkl);
+/*
+ * dsdb - Make a database query as if it were a directory lookup
+ *
+ */
+arch_dsdb(RREQ	req,           /* Request pointer                           */
+	  char	*name,         /* Name of the directory                     */
+	  char	**componentsp, /* Next component of name                    */
+	  TOKEN	*rcompp,       /* Additional components                     */
+	  VDIR	dir,           /* Directory to be filled in                 */
+	  int	options,       /* Options to list command                   */
+	  const char *rattrib, /* Requested attributes                      */
+	  FILTER filters)       /* Filters to be applied                     */
+{
+    /* Note that componentspp and rcompp are pointers to */
+    /* pointers.  This is necessary because    */
+    /* this routine must be able to update these values  */
+    /* if more than one component of the name is         */
+    /* resolved.                                         */
+    char 	*components = NULL;
+    int		num_unresolvedcomps = 0;
+    VLINK	cur_link = NULL;
+    char	newdirname[MAXPATHLEN];
+    static int	dbopen = 0;
+    char	fullquery[MAXPATHLEN];
+    char	*dbpart;
+    char	dbquery[MAXPATHLEN];
+    char	dbargs[MAXPATHLEN];
+    char	dbarg1[MAXPATHLEN];
+    char	dbarg2[MAXPATHLEN];
+    char	dbarg3[MAXPATHLEN];
+    char	dirlinkname[MAXPATHLEN];
+    char	sep;
+    char	*firstsep;
+    int		tmp;
+    VLINK	dirlink = NULL;
+    TOKEN       tkl_tmp;
+
+    /* Make sure NAME, COMPONENTSP, and RCOMPP arguments are correct. */
+
+    /* Name components with slashes in them are malformed inputs to the
+       ARCHIE database. */ 
+    if(componentsp && (components = *componentsp)) {
+        if(index(components, '/')) 
+            RETURNPFAILURE;
+        for (tkl_tmp = *rcompp; tkl_tmp; tkl_tmp = tkl_tmp->next)
+            if (index(tkl_tmp->token, '/'))
+                RETURNPFAILURE;
+    } else {
+        if (*rcompp) RETURNPFAILURE; /* ridiculous to specify additional comps
+                                         and no initial comps.*/
+    }
+
+    /* Directory already initialized, but note that this */
+    /* is not a real directory                           */
+    dir->version = -1;
+    dir->inc_native = 3;	   /* Not really a directory */
+
+    /* Note that if we are resolving multiple components */
+    /* (rcomp!=NULL) the directory will already be empty */
+    /* since had anything been in it dirsrv would have   */
+    /* already cleared it and moved on to the next comp  */
+
+    /* Do only once */
+    if(!dbopen++) {
+	set_default_dir(DEFAULT_DBDIR);
+	if((tmp = open_db_files(DB_RDONLY)) != A_OK) {
+	    dbopen = 0;
+	    plog(L_DB_ERROR,NOREQ,"Can't open archie database",0);
+	    RETURNPFAILURE;
+	}
+    }
+
+    /* For now, if only verifying, indicate success */
+    /* We don't want to do a DB search.  Eventually */
+    /* we might actually check that the directory   */
+    /* is valid.                                    */
+    if(options&DSDB_VERIFY) return(PSUCCESS);
+    
+    /* Construct the full query from the pieces passed to us */
+    tmp = -1 + qsprintf(fullquery,sizeof fullquery, "%s%s%s",name,
+                        ((components && *components) ? "/" : ""),
+                        ((components && *components) ? components : ""));
+    for (tkl_tmp = *rcompp; tkl_tmp; tkl_tmp = tkl_tmp->next)
+        tmp += -1 + qsprintf(fullquery + tmp, sizeof fullquery - tmp, 
+                             "/%s", (*rcompp)->token);
+    if (tmp + 1 > sizeof fullquery) return DSRDIR_NOT_A_DIRECTORY;
+    
+    /* The format for the queries is            */
+    /* DATABASE_PREFIX/COMMAND(PARAMETERS)/ARGS */
+    
+    /* Strip off the database prefix */
+    dbpart = fullquery + strlen(archie_prefix);
+
+    /* And we want to skip the next slash */
+    dbpart++;
+    
+    /* Find the query (up to the next /), determine if the */
+    /* / exists and then read the args                     */
+    tmp = sscanf(dbpart,"%[^/]%c%s",dbquery,&sep,dbargs);
+    
+    /* If no separator, for now return nothing         */
+    /* Eventually, we might return a list of the query */
+    /* types supported                                 */
+    if(tmp < 2) return(PSUCCESS);
+    
+    /* Check query type */
+    if(strncmp(dbquery,"MATCH",5)==0) {
+	char	stype = 'R';     /* search type           */
+	int	maxthit = 100;   /* max entries to return */
+	int	maxmatch = 100;  /* max strings to match  */
+	int	maxhitpm = 100;  /* max hits per match    */
+	int	offset = 0;      /* entries to skip       */
+	search_sel method;	 /* Search method         */
+	int	onlystr = 0;	 /* Just return strings   */
+	
+        /* In the MATCH querytype, the directory part of the query (the
+           argument named NAME) may have no more than 3 components.  
+           There are 3 possible formats:
+           1) DATABASE_PREFIX (one component)
+           2) (1)/MATCH(...)
+           3) (2)/query-term (3 total components)
+           */
+	if (num_slashes(name) > 2) return DSRDIR_NOT_A_DIRECTORY;
+	/* if no strings to match, return nothing */
+	if(tmp < 3) return(PSUCCESS);
+	
+	/* Get arguments */
+	tmp = sscanf(dbquery,"MATCH(%d,%d,%d,%d,%c",&maxthit,
+		     &maxmatch,&maxhitpm,&offset,&stype);
+	
+	if(tmp < 3) {
+	    sscanf(dbquery,"MATCH(%d,%d,%c",&maxthit,&offset,&stype);
+	    maxmatch = maxthit;
+	    maxhitpm = maxthit;
+	}
+	/* Note: in maxhits, 0 means use default, -1 means use max */
+	
+	/* Don't let the user request more than ABSOLUTE_MAX_HITS */
+	if((maxthit > ABSOLUTE_MAX_HITS) || (maxthit < 1)) {
+	    p_err_string = qsprintf_stcopyr(p_err_string,
+	   	"Legal values for max hits are between 1 and %d ",
+		ABSOLUTE_MAX_HITS);
+	    return(DIRSRV_NOT_AUTHORIZED);
+	}
+	if(maxthit == 0) maxthit = ABSOLUTE_MAX_HITS;
+	
+	switch(stype) {
+	case '=':
+	    onlystr = 0;
+	    method = S_EXACT ;
+	    break;
+	case 'C':
+	    onlystr = 0;
+	    method = S_SUB_CASE_STR ;
+	    break;
+	case 'c':
+	    onlystr = 0;
+	    method = S_E_SUB_CASE_STR ;
+	    break;
+	case 'K':
+	    onlystr = 1;
+	    method = S_SUB_CASE_STR ;
+	    break;
+	case 'k':
+	    onlystr = 1;
+	    method = S_E_SUB_CASE_STR ;
+	    break;
+	case 'R':
+	    onlystr = 0;
+	    method = S_FULL_REGEX ;
+	    break;
+	case 'r':
+	    onlystr = 0;
+	    method = S_E_FULL_REGEX ;
+	    break;
+	case 'X':
+	    onlystr = 1;
+	    method = S_FULL_REGEX ;
+	    break;
+	case 'x':
+	    onlystr = 1;
+	    method = S_E_FULL_REGEX ;
+	    break;
+	case 'z':
+	    onlystr = 1;
+	    method = S_E_SUB_NCASE_STR ;
+	    break;
+	case 'Z':
+	    onlystr = 1;
+	    method = S_SUB_NCASE_STR ;
+	    break;
+	case 's':
+	    onlystr = 0;
+	    method = S_E_SUB_NCASE_STR ;
+	    break;
+	case 'S':
+	default:
+	    onlystr = 0;
+	    method = S_SUB_NCASE_STR ;
+	    break;
+	}
+	
+	*dbarg1 = *dbarg2 = *dbarg3 = '\0';
+	
+	tmp = sscanf(dbargs,"%[^/]%c%[^/]%c%s",dbarg1,&sep,dbarg2,
+		     &sep,dbarg3); 
+	
+	if(tmp < 2) {
+	    /* This specifies a directory, but not a link within it  */
+	    /* create a pseudo directory and return a pointer        */
+            /* In other words, listing a MATCH directory by itself yields
+               an empty directory. */
+	    if(*dbarg1 && (strcmp(dbarg1,"*")!= 0)) {
+		dirlink = vlalloc();
+		dirlink->target = stcopyr("DIRECTORY",dirlink->target);
+		dirlink->name = stcopyr(dbarg1,dirlink->name);
+		dirlink->host = stcopyr(hostwport,dirlink->host);
+		sprintf(dirlinkname,"%s/%s/%s",archie_prefix,dbquery,dbarg1);
+		dirlink->hsoname = stcopyr(dirlinkname,dirlink->hsoname);
+		vl_insert(dirlink,dir,VLI_ALLOW_CONF);
+	    }
+	}
+	else {
+	    if(tmp > 4) {
+		/* There are remaining components */
+		num_unresolvedcomps = num_slashes(dbarg3);
+	    }
+#ifdef ABSOLUTE_MAX_GIF
+	    /* If looking for GIF files (arrgh) don't allow them */
+	    /* to set an unreasonable number of hits, this is    */
+	    /* promted by someone who set max hits to 10,000     */
+	    if((maxthit+offset > ABSOLUTE_MAX_GIF)&&(((strlen(dbarg1) >= 4)&&
+		      (strcasecmp(dbarg1+strlen(dbarg1)-4,".gif") == 0)) ||
+  		      (strcasecmp(dbarg1,"gif") == 0))) {
+		p_err_string = qsprintf_stcopyr(p_err_string,
+"Max hits for GIF searches is %d - See archie/doc/giflist.Z on \
+archie.mcgill.ca for full gif list",ABSOLUTE_MAX_GIF);
+		return(DIRSRV_NOT_AUTHORIZED);
+	    }
+#endif ABSOLUTE_MAX_GIF
+	    
+	    tmp = prarch_match(dbarg1,maxthit,maxmatch,maxhitpm,
+			       offset,method,dir,FALSE,onlystr);
+	    if(tmp) RETURNPFAILURE;
+	}
+    }
+    else if (strncmp(dbquery,"HOST",4)==0) {
+	/* First component of args is the site name    */
+	/* remaining components are the directory name */
+	
+	*dbarg1 = *dbarg2 = '\0';
+	
+	tmp = sscanf(dbargs,"%[^/]%c%s",dbarg1,&sep,dbarg2);
+	
+	/* If first component is null, return an empty directory */
+	if(tmp < 1) return(PSUCCESS);
+	
+	/* if first component exists, but is last component, */
+	/* then it is the name of the subdirectory for the   */
+	/* host, create a pseudo directory and return a      */
+	/* pointer, If first component is a wildcard, and no */
+	/* additional components, then return matching list  */
+	/* of sites.                                         */
+	if(tmp == 1) {
+	    tmp = prarch_host(dbarg1,NULL,dir,A2PL_ARDIR);
+	    if(tmp == PRARCH_TOO_MANY) return(DIRSRV_TOO_MANY);
+	    if(tmp) return(tmp);
+	}
+	/* More than one component, Look up the requested directory  */
+	/* Note that the since the full query is passed to us, it    */
+	/* includes the component name, thus the directory name is   */
+	/* what you get when you strip off the last component of the */
+	/* name                                                      */
+	else {
+	    char *lastsep = rindex(dbarg2,'/');
+	    if(lastsep) *lastsep++ = '\0';
+	    else *dbarg2 = '\0';
+	    tmp = prarch_host(dbarg1,dbarg2,dir,A2PL_ARDIR);
+	    if(tmp == PRARCH_DONT_HAVE_SITE) 
+		return(DSRDIR_NOT_A_DIRECTORY);
+	    if(tmp) RETURNPFAILURE;
+	}
+    }
+    else {
+	/* Query type not supported */
+	return(DSRDIR_NOT_A_DIRECTORY);
+    }
+    
+    /* We are done, but we need to figure out if we resolved multiple
+       components and reset *componentsp and *rcompp appropriately. */ 
+    
+    if (num_unresolvedcomps) {
+        int skip = tkllength(*rcompp) - num_unresolvedcomps;
+        if (skip < 0) return DSRDIR_NOT_A_DIRECTORY; /* shouldn't happen. */
+        while(skip-- > 0) {
+            assert(*rcompp);
+            *componentsp = (*rcompp)->token;
+            *rcompp = (*rcompp)->next;
+        }
+    } else {
+        while (*rcompp) {
+            *componentsp = (*rcompp)->token;
+            *rcompp = (*rcompp)->next;
+        }
+    }
+    return(PSUCCESS);
+}
+
+static int
+tkllength(TOKEN tkl)
+{
+    int retval = 0;
+    for (;tkl; tkl = tkl->next)
+        ++retval;
+    return retval;
+}
+
+
+static
+int
+num_slashes(char *s)
+{
+    int retval = 0;
+    for (; *s; ++s) {
+        if (*s == '/') 
+            ++retval;
+    }
+    return retval;
+}
+
--- a/prospero/lib/psrv/archie2/arch_prioritize.c
+++ b/prospero/lib/psrv/archie2/arch_prioritize.c
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 1991 by the University of Washington
+ * Copyright (c) 1993 by the University of Southern California
+ *
+ * For copying and distribution information, please see the files
+ * <uw-copyright.h> and <usc-copyr.h>.
+ */
+
+#include <uw-copyright.h>
+#include <usc-copyr.h>
+
+#include <ardp.h>
+#include <pfs.h>
+#include <perrno.h>
+
+static assign_priority();
+
+arch_prioritize_request(r1,r2)
+    RREQ r1,r2;
+    {
+	if(!r1->pf_priority)
+	    r1->pf_priority = assign_priority(r1);
+	if(!r2->pf_priority)
+	    r2->pf_priority = assign_priority(r2);
+
+	if(r1->pf_priority == r2->pf_priority) return(0);
+	else if (r1->pf_priority < r2->pf_priority) return(-1);
+	else return(1);
+    }
+
+
+static assign_priority(r1)
+    RREQ r1;
+    {
+	char	*arg_ptr;
+	int	maxhit = 0;
+	int	maxmatch = 0;
+	int	maxhitpm = 0;
+	int	offset;
+	char	stype;
+	int	tmp;
+	int	retval;
+
+	/* Result is probably cached, use it or lose it */
+	if(r1->prcvd_thru > 0) return(2);
+
+	arg_ptr = sindex(r1->rcvd->start,"ARCHIE");
+	if(!arg_ptr) return(1);
+
+	arg_ptr = sindex(arg_ptr,"MATCH");
+	if(!arg_ptr) return(3);
+
+	tmp = sscanf(arg_ptr,"MATCH(%d,%d,%d,%d,%c",&maxhit,&maxmatch,
+		     &maxhitpm,&offset,&stype);
+	if(tmp != 5) tmp = sscanf(arg_ptr,"MATCH(%d,%d,%c",&maxhit,
+				  &offset,&stype);
+	if(tmp < 3) return(4);
+
+	if(stype == '=') retval = 0;
+	else if ((stype == 'r') || (stype == 'x')) retval = 700;
+	else if ((stype == 'R') || (stype == 'X')) retval = 800;
+	else retval = 100;
+
+	/* If old format request, then add penalty */
+	if(tmp != 5) retval += 100;
+
+	tmp = maxhit;
+	if(offset > 0) tmp += offset;
+
+	if(tmp > 10000) retval += 10000;
+	else if (tmp > 100) retval+= tmp;
+	else retval+= 100;
+
+	if(sindex(arg_ptr,"gif") || sindex(arg_ptr,"GIF")) retval += 20000;
+
+	return(retval);
+    }
--- a/prospero/lib/psrv/archie2/archie_src/include/ar_attrib.h
+++ b/prospero/lib/psrv/archie2/archie_src/include/ar_attrib.h
--- a/prospero/lib/psrv/archie2/archie_src/include/ar_search.h
+++ b/prospero/lib/psrv/archie2/archie_src/include/ar_search.h
--- a/prospero/lib/psrv/archie2/archie_src/include/archie_defs.h
+++ b/prospero/lib/psrv/archie2/archie_src/include/archie_defs.h
--- a/prospero/lib/psrv/archie2/archie_src/include/database.h
+++ b/prospero/lib/psrv/archie2/archie_src/include/database.h
--- a/prospero/lib/psrv/archie2/archie_src/include/db_ops.h
+++ b/prospero/lib/psrv/archie2/archie_src/include/db_ops.h
--- a/prospero/lib/psrv/archie2/archie_src/include/defines.h
+++ b/prospero/lib/psrv/archie2/archie_src/include/defines.h
--- a/prospero/lib/psrv/archie2/archie_src/include/error.h
+++ b/prospero/lib/psrv/archie2/archie_src/include/error.h
--- a/prospero/lib/psrv/archie2/archie_src/include/files.h
+++ b/prospero/lib/psrv/archie2/archie_src/include/files.h
--- a/prospero/lib/psrv/archie2/archie_src/include/host_db.h
+++ b/prospero/lib/psrv/archie2/archie_src/include/host_db.h
--- a/prospero/lib/psrv/archie2/archie_src/include/master.h
+++ b/prospero/lib/psrv/archie2/archie_src/include/master.h
--- a/prospero/lib/psrv/archie2/archie_src/include/structs.h
+++ b/prospero/lib/psrv/archie2/archie_src/include/structs.h
--- a/prospero/lib/psrv/archie2/archie_src/include/typedef.h
+++ b/prospero/lib/psrv/archie2/archie_src/include/typedef.h
--- a/prospero/lib/psrv/archie2/atopdate.c
+++ b/prospero/lib/psrv/archie2/atopdate.c
@@ -0,0 +1,31 @@
+#include <stdio.h>
+#include <sys/types.h>
+
+#include <string.h>
+
+#include <defines.h>
+#include <structs.h>
+#include <database.h>
+
+extern datestruct unpack_date();
+
+char *atopdate(entry_db)
+	db_date entry_db;
+{
+
+   static char result[20] ;
+   datestruct entry;
+
+   entry = unpack_date(entry_db);
+
+   if(entry.hour == MAX_HOUR) {
+       entry.hour = 0;
+       entry.min = 0;
+   }
+
+   (void) sprintf(result,"%04d%02d%02d%02d%02d00Z",entry.year,entry.month+1,
+		  entry.day,entry.hour,entry.min);
+
+   return(result);
+     
+}
--- a/prospero/lib/psrv/archie2/atoplink.c
+++ b/prospero/lib/psrv/archie2/atoplink.c
@@ -0,0 +1,147 @@
+#include <stdio.h>
+#include <sys/types.h>
+
+#include <database.h>
+#include <sys/mman.h>
+#include <defines.h>
+#include <structs.h>
+#include <pfs.h>
+#include <psite.h>
+#include "prarch.h"
+
+extern FILE *strings_table;
+extern char *strings_begin;
+
+extern char	hostname[];
+extern char	hostwport[];
+extern char	archie_prefix[];
+
+char	*perms_itoa();
+char	*print_date();
+char	*atopdate();
+char	*strstr();
+
+VLINK atoplink(site_out	*sop,	/* Site output pointer                  */
+	       int	flags)	/* Flags: see above                     */
+{
+    VLINK		vl = vlalloc();  /* New link			*/
+    PATTRIB		at;	         /* Attributes                  */
+    PATTRIB		last_at;	 /* Last attribute              */
+    char 		fullpath[MAX_STRING_LEN];
+    char 		namebuf[MAX_STRING_LEN];
+    char		*endname = NULL;
+    char 		modestring[20];
+    char 		str_ent[256];
+    char 		atval[256];
+    char		*nameptr;        /* Last component of file name */
+    char		*ptr;
+    site_rec	*srp = &(sop->site_ent); /* Site record pointer */
+    long		strings_pos;
+  
+    /* For now, all directory pointers are to pseudo-directories */
+    flags |= A2PL_ARDIR;
+  
+    if((flags & A2PL_ROOT) || (srp->dir_or_f == 'T')) {
+	/* It's a directory - we should check to see if the site is   */
+	/* running prospero, and if so return a pointer to the actual */
+	/* directory.  If it isn't then we return a real pointer to   */
+	/* a pseudo-directory maintained by this archie server.       */
+	vl->target = stcopyr("DIRECTORY",vl->target);
+    }
+    else {
+	/* It's a file - we should check to see if the site is        */
+	/* running prospero, and if so return a pointer to the real   */
+	/* file.  If it isn't, then we generate an external link      */
+	vl->target = stcopyr("EXTERNAL",vl->target);
+	ad2l_am_atr(vl,"AFTP","BINARY",NULL);
+	flags &= (~A2PL_ARDIR);
+    }
+  
+    if(flags & A2PL_ARDIR) vl->host = stcopyr(hostwport,vl->host);
+    else vl->host = stcopyr(sop->site_name,vl->host);
+  
+    /* Get the the last component of name */
+  
+    if(flags & A2PL_ROOT) vl->name = stcopyr(sop->site_name,vl->name);
+    else {
+	strncpy(namebuf,strings_begin + srp->in_or_addr.strings_ind + 
+		sizeof(strings_header),sizeof(namebuf));
+	namebuf[sizeof(namebuf)-1] = '\0';
+	if(endname = strstr(namebuf," -> ")) *endname = '\0';
+	nameptr = namebuf;
+	vl->name = stcopyr(nameptr,vl->name);
+    }
+  
+    if(flags & A2PL_ARDIR) {
+	if(flags & A2PL_ROOT) 
+	    sprintf(fullpath,"%s/HOST/%s",archie_prefix, sop->site_name);
+	else 
+	    sprintf(fullpath,"%s/HOST/%s%s%s%s",archie_prefix,
+		    sop->site_name, sop->site_path,
+		    ((*(sop->site_path + strlen(sop->site_path) - 1) == '/') ?
+		     "" : "/"), (nameptr ? nameptr : ""));
+    }
+    else {
+	if(flags & A2PL_ROOT) 
+	    sprintf(fullpath,"/");
+	else
+	    sprintf(fullpath,"%s%s%s",sop->site_path, 
+		    ((*(sop->site_path + strlen(sop->site_path) - 1) == '/') ?
+		     "" : "/"), (nameptr ? nameptr : ""));
+    }
+  
+    vl->hsoname = stcopyr(fullpath,vl->hsoname);
+  
+    if(!(flags & A2PL_ROOT)) {
+	/* Here we can add cached attribute values from the archie   */
+	/* database such as size, protection, and last modified time */
+	sprintf(atval,"%d bytes",srp->size);
+	ad2l_seq_atr(vl,ATR_PREC_CACHED,ATR_NATURE_INTRINSIC,
+		     "SIZE",atval,NULL);
+    
+	/* Directory modes in unix string format */
+	if(ptr = perms_itoa(srp->perms)) {
+	    if(endname) sprintf(modestring,"%c%s",'l',ptr);
+	    else sprintf(modestring,"%c%s",((srp->dir_or_f=='T')?'d':'-'),ptr);
+	    ad2l_seq_atr(vl,ATR_PREC_CACHED,ATR_NATURE_INTRINSIC,
+			 "UNIX-MODES", modestring, NULL);
+	}
+    
+	/* Modified date - in prospero format */
+	if(ptr = atopdate(srp->mod_time)) {
+	    ad2l_seq_atr(vl,ATR_PREC_CACHED,ATR_NATURE_INTRINSIC,
+			 "LAST-MODIFIED", ptr, NULL);
+	}
+    }
+
+    if((flags & A2PL_ROOT) || (flags & A2PL_H_LAST_MOD)) {
+	/* Modified date - in prospero format */
+	if(ptr = atopdate(sop->site_mod_time))
+	    ad2l_seq_atr(vl,ATR_PREC_CACHED,ATR_NATURE_APPLICATION,
+			 "AR_H_LAST_MOD", ptr, NULL);
+    }
+       
+    if((flags & A2PL_ROOT || (flags & A2PL_H_IP_ADDR))) {
+	/* Host IP Address */
+	if(sop->site_ipaddr.s_addr) 
+	    ad2l_seq_atr(vl,ATR_PREC_CACHED,ATR_NATURE_APPLICATION,
+			 "AR_H_IP_ADDR", inet_ntoa(sop->site_ipaddr),
+			 NULL);
+    }
+    return(vl);
+}
+
+VLINK atoqlink(char *str,int maxhit,int maxmatch,int maxhitpm)
+{
+    VLINK		vl = vlalloc();  
+    char 		fullpath[MAX_STRING_LEN];
+
+    sprintf(fullpath,"%s/MATCH(%d,%d,%d,0,=)/%s", archie_prefix, 
+	    maxhit, maxmatch, maxhitpm, str);
+  
+    vl->name = stcopyr(str,vl->host);
+    vl->target = stcopyr("DIRECTORY",vl->target);
+    vl->hsoname = stcopyr(fullpath,vl->hsoname);
+    vl->host = stcopyr(hostwport,vl->host);
+    return(vl);
+} 
--- a/prospero/lib/psrv/archie2/prarch.h
+++ b/prospero/lib/psrv/archie2/prarch.h
@@ -0,0 +1,42 @@
+/* Error codes returned by prarch routines */
+#define PRARCH_SUCCESS		0	/* Successful completion       */
+#define PRARCH_BAD_ARG		1	/* Bad argument                */
+#define PRARCH_OUT_OF_MEMORY	2	/* Can't allocate enough space */
+#define PRARCH_BAD_REGEX	3	/* Bad regular expression      */
+#define PRARCH_DONT_HAVE_SITE	4	/* Can't find site file        */
+#define PRARCH_CANT_OPEN_FILE	5	/* Can't open DB file          */
+#define PRARCH_DB_ERROR		6	/* Database Error              */
+#define PRARCH_CLEANUP		7       /* Cleanup failed              */
+#define PRARCH_TOO_MANY		8	/* Too many matches            */
+
+
+/* For constructing link attributes */
+#define A2PL_H_IP_ADDR        0x001
+#define A2PL_HOSTIP           0x001
+#define A2PL_H_OS_TYPE        0x002
+#define A2PL_H_TIMEZ          0x004
+#define A2PL_LK_LAST_MOD      0x020
+#define A2PL_LINK_COUNT       0x040
+#define A2PL_LINK_SZ          0x080
+#define A2PL_NATIVE_MODES     0x100
+#define A2PL_H_LAST_MOD       0x200
+#define A2PL_SITEDATE         0x200
+#define A2PL_UNIX_MODES       0x800
+
+#define A2PL_ROOT	    0x10000
+#define A2PL_ARDIR	    0x40000
+
+/* Structure definitions */
+struct site_out_t{
+	struct in_addr site_ipaddr;
+	db_date site_mod_time;
+	char site_name[MAX_HOST_LEN];
+        char site_update[SMALL_STR_LEN];
+        char site_path[MAX_FILE_NAME];
+	site_rec site_ent;
+};
+
+typedef struct site_out_t site_out;
+
+char	*get_host_file_name();
+struct vlink *atoplink();
--- a/prospero/lib/psrv/archie2/prarch_host.c
+++ b/prospero/lib/psrv/archie2/prarch_host.c
@@ -0,0 +1,293 @@
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <string.h>
+#include <sys/param.h>
+
+#include <sys/mman.h>
+
+/* Archie definitions */
+#include <ndbm.h>
+#include <defines.h>
+#include <archie_defs.h>
+#include <structs.h>
+#include <database.h>
+#include <error.h>
+
+#include "prarch.h"
+
+#include <pfs.h>
+#include <perrno.h>
+#include <plog.h>
+#include <pmachine.h>		/* For bzero */
+#define TOO_MANY_HOSTS  200
+
+/*
+ * prarch_host - Search host for contents of directory 
+ *
+ *  ARGS: site_name - name of host for which search is to be made
+ *          dirname - name of directory to return (NULL if root )
+ *               vd - pointer to directory to be filled in
+ *        archiedir - flag - directory links should be to archie 
+ */
+int prarch_host(char	*site_name, /* Name of host to be searched         */
+		char	*dirname,   /* Name of directory to be listed      */
+		VDIR	vd,	    /* Directory to be filled in           */
+		int	flags)      /* Flags: Which attributes to use      */
+{
+    site_out	so;
+    char	*host_name;
+    char	result[MAX_STRING_LEN];
+    char	date_str[SMALL_STR_LEN];
+    char	hostip_str[SMALL_STR_LEN];
+    site_rec	curr_site_rec;
+    site_rec	rootrec;
+    int		recno;
+    int		last_parent = -1;  
+    site_rec 	*site_ptr;
+    int		correct_dir = 0; /* Scanning the requested directory   */
+    int		loopcount = 0;   /* To decide when to call ardp_accept */
+    VLINK 	clink;           /* Current link                       */
+    FILE *fp;
+    
+    caddr_t site_begin;
+    site_rec *site_end;
+    struct stat statbuf;
+  
+    if(!dirname) { /* Find host directory */
+	char	hosttemp[200];
+	char	*p = hosttemp;
+	char	*htemp = site_name;
+	char	tmp1[MAX_STRING_LEN];
+	char	tmp2[MAX_STRING_LEN];
+	char	dirlinkname[MAXPATHLEN];
+	char	**test;
+	int i;
+	
+	/* If a single wildcard, then return nothing */
+	if(strcmp(site_name,"*") == 0) return(PRARCH_SUCCESS);
+
+	/* If regular expressions or wildcards */
+	if((index(site_name,'(') || index(site_name,'?') ||
+	    index(site_name,'*'))) {
+	    
+	    if((*htemp == '(') && (*(htemp + strlen(htemp)-1) == ')')) {
+		strncpy(hosttemp,htemp+1,sizeof(hosttemp));
+		hosttemp[sizeof(hosttemp)-1] = '\0';
+		hosttemp[strlen(hosttemp)-1] = '\0';
+	    }
+	    else if(htemp) {
+		*p++ = '^';
+		while(*htemp) {
+		    if(*htemp == '*') {*(p++)='.'; *(p++) = *(htemp++);}
+		    else if(*htemp == '?') {*(p++)='.';htemp++;}
+		    else if(*htemp == '.') {*(p++)='\\';*(p++)='.';htemp++;}
+		    else if(*htemp == '[') {*(p++)='\\';*(p++)='[';htemp++;}
+		    else if(*htemp == '$') {*(p++)='\\';*(p++)='$';htemp++;}
+		    else if(*htemp == '^') {*(p++)='\\';*(p++)='^';htemp++;}
+		    else if(*htemp == '\\') {*(p++)='\\';*(p++)='\\';htemp++;}
+		    else *(p++) = *(htemp++);
+		}
+		*p++ = '$';
+		*p++ = '\0';
+	    }
+	    
+	    test = (char **) find_sites(hosttemp,&i,tmp1);
+	    if((int) test == BAD_REGEX) {
+		p_err_string = qsprintf_stcopyr(p_err_string,
+			"archie find_sites(): bad regular expression");
+		return(PRARCH_BAD_REGEX);
+	    }
+	    if((int) test ==  DB_HBYADDR_ERROR) {
+		p_err_string = qsprintf_stcopyr(p_err_string,
+			"archie find_sites() hostbyaddr error");
+		return(PRARCH_DB_ERROR);
+	    }
+	    if((int) test ==  BAD_MALLOC) {
+		p_err_string = qsprintf_stcopyyr(p_err_string,
+			"archie find_sites() out of memory");
+		return(PRARCH_OUT_OF_MEMORY);
+	    }
+	    if(i > TOO_MANY_HOSTS) {
+		free(test[i]);
+		return(PRARCH_TOO_MANY);
+	    }
+	    else while( i-- ) {
+		get_site_file(test[i],tmp2);
+		if((fp = fopen(db_file(tmp2),"r")) != (FILE *) NULL) {
+		    
+		    if(fstat(fileno(fp),&statbuf) == -1) {
+			plog(L_DB_ERROR,NOREQ,"can't stat site file %s",db_file(tmp2));
+			fclose(fp);
+			continue;
+		    }
+		    
+		    site_begin = mmap(0,statbuf.st_size,PROT_READ,MAP_SHARED,
+				      fileno(fp),0);   
+		    
+		    if((site_begin == (caddr_t)-1) || (site_begin == (caddr_t)NULL)){
+			plog(L_DB_ERROR,NOREQ,"can't map site file %s",db_file(tmp2));
+			fclose(fp);
+			continue;
+		    }
+		    
+		    bzero(&so,sizeof(so));
+		    if(print_sinfo(site_begin,so.site_name,hostip_str,date_str) != 0) {
+			plog(L_DB_ERROR,NOREQ,"can't obtain site info from %s",
+			     db_file(tmp2));
+			munmap(site_begin,statbuf.st_size);
+			fclose(fp);
+			continue;
+		    }
+		    
+		    /* The root is the first record in the site after site info */
+		    rootrec = *(((site_rec *) site_begin));
+		    
+		    bcopy(&rootrec,&(so.site_ent),sizeof(rootrec));
+		    bcopy(&(rootrec.in_or_addr.ipaddress),&(so.site_ipaddr),
+			  sizeof(so.site_ipaddr));
+		    bcopy(&(rootrec.mod_time),&(so.site_mod_time),
+			  sizeof(so.site_mod_time));
+		    clink = atoplink(&so,flags|A2PL_ARDIR|A2PL_ROOT);
+		    if(clink) vl_insert(clink,vd,VLI_NOSORT);
+
+		    if(munmap(site_begin,statbuf.st_size) == -1) {
+			plog(L_DB_ERROR,NOREQ,"archie munmap() failed on %s",db_file(tmp2));
+			return(PRARCH_CLEANUP);
+		    }
+		    
+		    fclose(fp);
+		    free(test[i]);
+		}
+		else plog(L_DB_ERROR,NOREQ,"fopen failed for %s",db_file(tmp2));
+	    }
+	    return(PRARCH_SUCCESS);
+	}
+	/* No regular expression or wildcards */
+	else {
+	    if(( host_name = get_host_file_name( site_name )) == (char *)NULL )
+		return(PRARCH_SUCCESS);	/* No match */
+	    
+	    if((fp = fopen(host_name,"r")) != (FILE *) NULL) {
+		
+		if(fstat(fileno(fp),&statbuf) == -1) {
+		    plog(L_DB_ERROR,NOREQ,"can't stat site file %s",db_file(tmp2));
+		    fclose(fp);
+		    return(PRARCH_CANT_OPEN_FILE);
+		}
+		
+		site_begin = mmap(0,statbuf.st_size,PROT_READ,MAP_SHARED,
+				  fileno(fp),0);   
+		
+		if((site_begin == (caddr_t)-1) || (site_begin == (caddr_t)NULL)){
+		    plog(L_DB_ERROR,NOREQ,"can't map site file %s",db_file(tmp2));
+		    fclose(fp);
+		    return(PRARCH_CANT_OPEN_FILE);
+		}
+		
+		bzero(&so,sizeof(so));
+		if(print_sinfo(site_begin,so.site_name,hostip_str,date_str) != 0) {
+		    plog(L_DB_ERROR,NOREQ,"can't obtain site info from %s",
+			 db_file(tmp2));
+		    munmap(site_begin,statbuf.st_size);
+		    fclose(fp);
+		    return(PRARCH_DB_ERROR);
+		}
+		
+		/* The root is the first record in the site after site info */
+		rootrec = *(((site_rec *) site_begin));
+		bcopy(&rootrec,&(so.site_ent),sizeof(rootrec));
+		bcopy(&(rootrec.in_or_addr.ipaddress),&(so.site_ipaddr),
+		      sizeof(so.site_ipaddr));
+		bcopy(&(rootrec.mod_time),&(so.site_mod_time),
+		      sizeof(so.site_mod_time));
+		clink = atoplink(&so,flags|A2PL_ARDIR|A2PL_ROOT);
+		if(clink) {
+		    clink->name = stcopyr(site_name,clink->name);
+		    vl_insert(clink,vd,VLI_NOSORT);
+		}
+		
+		if(munmap(site_begin,statbuf.st_size) == -1) {
+		    plog(L_DB_ERROR,NOREQ,"archie munmap() failed on %s",db_file(tmp2));
+		    return(PRARCH_CLEANUP);
+		}
+		
+		fclose(fp);
+		return(PRARCH_SUCCESS);
+	    }
+	    else return(PRARCH_CANT_OPEN_FILE);
+	}
+    }
+    
+    bzero(&so,sizeof(so));
+    
+    if(( host_name = get_host_file_name( site_name )) == (char *)NULL )
+	return(PRARCH_DONT_HAVE_SITE);
+    
+    if((fp = fopen(host_name, "r")) == NULL) 
+	return(PRARCH_CANT_OPEN_FILE);
+        
+    if(fstat(fileno(fp),&statbuf) == -1) {
+	fclose(fp);
+	return(PRARCH_CANT_OPEN_FILE);
+    }
+    
+    site_begin = mmap(0,statbuf.st_size,PROT_READ,MAP_SHARED,
+		      fileno(fp),0);   
+    
+    if((site_begin == (caddr_t) -1) || (site_begin == (caddr_t) NULL))  {
+	fclose(fp);
+	return(PRARCH_CANT_OPEN_FILE);
+    }
+    
+    if(print_sinfo(site_begin,so.site_name,hostip_str,date_str) != 0) {
+	munmap(site_begin,statbuf.st_size);
+	fclose(fp);
+	return(PRARCH_DB_ERROR);
+    }
+    
+    site_end = (site_rec *)site_begin + statbuf.st_size / sizeof(site_rec);
+    
+    rootrec = *(((site_rec *) site_begin));
+
+    bcopy(&(rootrec.in_or_addr.ipaddress),&(so.site_ipaddr),
+	  sizeof(so.site_ipaddr));
+    bcopy(&(rootrec.mod_time),&(so.site_mod_time),
+	  sizeof(so.site_mod_time));
+
+    for(recno = 1;(site_ptr = (site_rec *)site_begin + recno) < site_end; 
+	recno++){
+	
+	if((loopcount++ & 0x3ff) == 0) ardp_accept();
+	
+	curr_site_rec = *site_ptr;
+	
+	if(last_parent != curr_site_rec.parent_ind){
+	    
+	    if(find_ancestors(site_begin, recno, result) != 0) {
+		munmap(site_begin,statbuf.st_size);
+		fclose(fp);
+		return(PRARCH_DB_ERROR);
+	    }
+	    
+	    last_parent = curr_site_rec.parent_ind;
+	    
+	    /* Don't want to check the leading / */
+	    if(strcmp(dirname,result+1) == 0)  {
+		correct_dir++;
+		strcpy(so.site_path,result);
+	    }
+	    else if(correct_dir) break;
+	}
+	bcopy(&curr_site_rec,&(so.site_ent),sizeof(curr_site_rec));
+	if(correct_dir) {
+	    if((loopcount & 0x7f) == 0) ardp_accept();
+	    clink = atoplink(&so,flags);
+	    if(clink) vl_insert(clink,vd,VLI_NOSORT);
+	}
+    }
+    
+    munmap(site_begin,statbuf.st_size);
+    fclose(fp);
+    return(PRARCH_SUCCESS);
+}
--- a/prospero/lib/psrv/archie2/prarch_match.c
+++ b/prospero/lib/psrv/archie2/prarch_match.c
@@ -0,0 +1,637 @@
+/*XXX Note to make this thread safe, need to mutex re_comp and re_exec */
+#include <stdio.h>
+#include <sys/types.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdlib.h>           /* For malloc and free */
+
+#define _toupper(c)	((c)-'a'+'A')
+
+#ifdef MMAP
+#include <sys/mman.h>
+#endif
+
+/* Archie definitions */
+#include <ndbm.h>
+#include <defines.h>
+#include <structs.h>
+#include <database.h>
+#include <error.h>
+
+#include "prarch.h"
+
+#include <ardp.h>
+#include <pfs.h>
+#include <perrno.h>
+#include <plog.h>
+
+VLINK	atoplink();
+VLINK	atoqlink();
+
+char *re_comp();
+char *make_lcase();
+int get_match_list();
+
+extern char *strings_begin;
+extern long strings_table_size;
+extern DBM *fast_strings;
+
+/* So we can adjust our cache policy based on queue length */
+extern int  pQlen;
+
+static	char	lowertable[256] = { 
+'\000','\001','\002','\003','\004','\005','\006','\007',
+'\010','\011','\012','\013','\014','\015','\016','\017',
+'\020','\021','\022','\023','\024','\025','\026','\027',
+'\030','\031','\032','\033','\034','\035','\036','\037',
+' ','!','"','#','$','%','&','\'',
+'(',')','*','+',',','-','.','/',
+'0','1','2','3','4','5','6','7',
+'8','9',':',';','<','=','>','?',
+'@','a','b','c','d','e','f','g',
+'h','i','j','k','l','m','n','o',
+'p','q','r','s','t','u','v','w',
+'x','y','z','[','\\',']','^','_',
+'`','a','b','c','d','e','f','g',
+'h','i','j','k','l','m','n','o',
+'p','q','r','s','t','u','v','w',
+'x','y','z','{','|','}','~','\177',
+'\200','\201','\202','\203','\204','\205','\206','\207',
+'\210','\211','\212','\213','\214','\215','\216','\217',
+'\220','\221','\222','\223','\224','\225','\226','\227',
+'\230','\231','\232','\233','\234','\235','\236','\237',
+'\240','\241','\242','\243','\244','\245','\246','\247',
+'\250','\251','\252','\253','\254','\255','\256','\257',
+'\260','\261','\262','\263','\264','\265','\266','\267',
+'\270','\271','\272','\273','\274','\275','\276','\277',
+'\300','\301','\302','\303','\304','\305','\306','\307',
+'\310','\311','\312','\313','\314','\315','\316','\317',
+'\320','\321','\322','\323','\324','\325','\326','\327',
+'\330','\331','\332','\333','\334','\335','\336','\337',
+'\340','\341','\342','\343','\344','\345','\346','\347',
+'\350','\351','\352','\353','\354','\355','\356','\357',
+'\360','\361','\362','\363','\364','\365','\366','\367',
+'\370','\371','\372','\373','\374','\375','\376','\377'};
+
+#define MATCH_CACHE_SIZE     15
+
+struct match_cache {
+    char                *arg;	     /* Matched regular expression          */
+    int			max_hits;    /* Maximum matchess <0 = found all     */
+    int			offset;      /* Offset                              */
+    search_sel 		search_type; /* Search method (the one used)        */
+    search_sel          req_type;    /* Requested method                    */
+    VLINK		matches;     /* Matches                             */
+    VLINK		more;	     /* Additional matches                  */
+    int			flags;       /* Flags: for link attributes          */
+    struct match_cache 	*next;       /* Next entry in cache                 */
+};
+
+static struct match_cache *mcache = NULL;
+
+static int		  cachecount = 0;
+
+/*
+ * prarch_match - Search archie database for specified file
+ *
+ * 	PRARCH_MATCH searches the archie database and returns
+ *      a list of files matching the provided regular expression
+ *      
+ *  ARGS:  program_name - regular expression for files to match
+ *             max_hits - maximum number of entries to return (max hits)
+ *               offset - start the search after this many hits
+ *          search_type - search method 
+ *                   vd - pointer to directory to be filled in
+ *            archiedir - flag - directory links should be to archie
+ *          onlystrings - flag - only return strings, not matches
+ *
+ *   Search method is one of:   S_FULL_REGEX
+ *		                S_EXACT 
+ *                              S_SUB_NCASE_STR 
+ *                              S_SUB_CASE_STR 
+ */
+int prarch_match(char	*program_name, /* Regular expression to be matched  */
+		 int	max_hits,      /* Maximum number of entries to rtrn */
+		 int	max_match,     /* Maximum number of unique strings  */
+		 int	max_hitspm,    /* Maximum hits per match            */
+		 int	offset,	       /* Skip # matches before starting    */
+		 search_sel search_type, /* Search method                   */
+		 VDIR	vd,	       /* Directory to be filled in         */
+		 int	flags,         /* Flag for link attributes          */
+		 int	onlystrings)   /* Only return matching strings      */
+{
+   /*
+   * Search the database for the string specified by 'program_name'.  Use the
+   * fast dbm strings database if 'is_exact' is set, otherwise search through
+   * the strings table.  Stop searching after all matches have been found, or
+   * 'max_hits' matches have been found, whichever comes first.  
+   */
+  char 		s_string[MAX_STRING_LEN];
+  char		*strings_ptr;
+  char		*strings_curr_off;
+  strings_header str_head;
+  datum 	search_key, key_value;
+  search_sel 	new_search_type = S_EXACT;    /* Alternate search method */
+  search_sel 	or_search_type = search_type; /* Original search method */
+  int 		nocase = 0;
+  int 		hits_exceeded = FALSE;	      /* should be boolean? */
+  char 		*strings_end;
+  int 		match_number;
+  int 		patlen;
+  site_out 	**site_outptr;
+  site_out 	site_outrec;
+  int 		i;
+  VLINK		cur_link;
+  int		loopcount = 0;
+  int		retval;
+  int		match_rem = max_match;
+
+  if(!program_name || !(*program_name)) return(PRARCH_BAD_ARG);
+
+  if((0 < max_hits) && (max_hits < match_rem)) match_rem = max_hits;
+  if((0 < max_hits) && (max_hits < max_hitspm)) max_hitspm = max_hits;
+
+  strcpy(s_string, program_name);
+
+  /* See if we can use a less expensive search method */
+  if((search_type == S_FULL_REGEX) || (search_type == S_E_FULL_REGEX)) {
+      /* Regex search assumes wildcards on both ends, so remove from string */
+      if(strncmp(program_name,".*",2) == 0)
+	  strcpy(s_string, program_name+2);
+      if((i = strlen(s_string)) >= 2) {
+	  if(strcmp(s_string+i-2,".*") == 0)
+	      *(s_string+i-2) = '\0';
+      }
+
+      /* If no special characters, then fall back to substring search */
+      if((search_type == S_FULL_REGEX) && 
+	 (strpbrk(s_string,"\\^$.,[]<>*+?|(){}/") == NULL)) 
+	  or_search_type = search_type = S_SUB_CASE_STR;
+      else if((search_type == S_E_FULL_REGEX) && 
+	      (strpbrk(s_string,"\\^$.,[]<>*+?|(){}/") == NULL))
+	  or_search_type = search_type = S_E_SUB_CASE_STR;
+  }
+
+  /* The caching code assumes we are handed an empty directory */
+  /* if not, return an error for now.  Eventually we will get  */
+  /* rid of that assumption                                    */
+  if(vd->links) {
+      plog(L_DIR_ERR, NOREQ, "Prarch_match handed non empty dir",0);
+      return(PRARCH_BAD_ARG);
+  }
+
+  if(!onlystrings && (check_cache(s_string,max_hits,offset,search_type,
+		 flags,&(vd->links)) == TRUE)) {
+      plog(L_DB_INFO, NOREQ, "Responding with cached data",0);
+      return(PSUCCESS);
+  }
+
+  site_outptr = (site_out **) malloc((unsigned)(sizeof(site_out) * 
+						(max_hits + offset)));
+  if(!site_outptr) return(PRARCH_OUT_OF_MEMORY);
+
+ startsearch:
+
+  strings_ptr = strings_begin;
+  strings_end = strings_begin + (int) strings_table_size;
+
+  match_number = 0;
+
+  switch(search_type){
+
+  case S_E_SUB_CASE_STR:
+      new_search_type = S_SUB_CASE_STR;
+      goto exact_match;
+  case S_E_SUB_NCASE_STR:
+      new_search_type = S_SUB_NCASE_STR;
+      goto exact_match;
+  case S_E_FULL_REGEX:
+      new_search_type = S_FULL_REGEX;
+  exact_match:
+  case S_EXACT:
+
+      search_key.dptr = s_string;
+      search_key.dsize = strlen(s_string) + 1;
+
+      ardp_accept();
+      key_value = dbm_fetch(fast_strings, search_key) ;
+
+      if(key_value.dptr != (char *)NULL){ /* string in table */
+
+	int string_pos;
+
+	bcopy(key_value.dptr,(char *)&string_pos, key_value.dsize);
+
+	strings_ptr += string_pos;
+
+	bcopy(strings_ptr,(char *)&str_head,sizeof(strings_header));
+
+	ardp_accept();
+
+	if(onlystrings) {
+	    cur_link = atoqlink(strings_ptr,max_hits,max_match,max_hitspm);
+	    if(cur_link) vl_insert(cur_link,vd,VLI_NOSORT);
+	    if(--match_rem <= 0) {
+		hits_exceeded = TRUE;
+		break;
+	    }
+	}
+	else if(str_head.filet_index != -1) {
+	    retval = get_match_list((int) str_head.filet_index, max_hitspm,
+				    &match_number, site_outptr, FALSE);
+	    
+	    if((retval != A_OK) && (retval != HITS_EXCEEDED)) {
+	      plog(L_DB_ERROR, NOREQ,"get_match_list failed (%d)",retval,0);
+	      goto cleanup;
+	    }
+
+	    if( match_number >= max_hits + offset ){
+		hits_exceeded = TRUE;
+		break;
+	    }
+	}
+      }
+      else if (search_type != S_EXACT) { /* Not found - but try other method */
+	  search_type = new_search_type;
+	  goto startsearch;
+      }
+      break;
+
+  case S_FULL_REGEX:
+	
+      if(re_comp(s_string) != (char *)NULL){
+	  return (PRARCH_BAD_REGEX);
+      }
+
+      str_head.str_len = -1;
+
+      ardp_accept();
+
+      while((strings_curr_off = strings_ptr + str_head.str_len + 1) < strings_end){
+
+	if((loopcount++ & 0x7ff) == 0) ardp_accept();
+
+	strings_ptr = strings_curr_off;
+
+	bcopy(strings_ptr,(char *)&str_head,sizeof(strings_header));
+
+	strings_ptr += sizeof(strings_header);
+	    
+	if(re_exec( strings_ptr ) == 1 ){ /* TRUE */
+	  strings_curr_off = strings_ptr;
+
+	  ardp_accept();
+
+	  if(onlystrings) {
+	    if(strstr(strings_ptr," -> ") == NULL) { /* No broken strings */
+		cur_link = atoqlink(strings_ptr,max_hits,max_match,max_hitspm);
+		if(cur_link) vl_insert(cur_link,vd,VLI_NOSORT);
+		if(--match_rem <= 0) {
+		    hits_exceeded = TRUE;
+		    break;
+		}
+	    }
+	  } 
+	  else if(str_head.filet_index != -1){
+	    retval = get_match_list((int) str_head.filet_index, max_hitspm,
+				    &match_number, site_outptr, FALSE);
+
+	    if((retval != A_OK) && (retval != HITS_EXCEEDED)) {
+	      plog(L_DB_ERROR, NOREQ,"get_match_list failed (%d)",retval,0);
+	      goto cleanup;
+	    }
+
+	    if( match_number >= max_hits + offset ){
+	      hits_exceeded = TRUE;
+	      break;
+	    }
+	  }
+        }
+      }
+
+      break;
+
+#define TABLESIZE 256
+
+  case S_SUB_NCASE_STR:
+      nocase++;
+  case S_SUB_CASE_STR: 	  {
+      char			pattern[MAX_STRING_LEN];
+      int			skiptab[TABLESIZE];
+      register int		pc, tc;
+      register int		local_loopcount = 0xfff;
+      char			*bp1;
+      int			skip;
+      int			plen;
+      int			plen_1;
+      int			tlen;
+      unsigned char		tchar; 
+
+      plen = strlen(s_string);
+      plen_1 = plen -1;
+
+      /* Old code (replaced by inline code taken from initskip)       */
+      /* patlen = strlen(s_string ) ;                                 */
+      /* initskip(s_string, patlen, search_type == S_SUB_NCASE_STR) ; */
+
+      if(nocase) {
+	  for(pc = 0; s_string[pc]; pc++)
+	      pattern[pc] = lowertable[s_string[pc]];
+	  pattern[pc] = '\0';
+      }
+      else strcpy(pattern,s_string);
+
+      for( i = 0 ; i < TABLESIZE ; i++ ) 
+	  skiptab[ i ] = plen;
+
+      /* Note that we want both ucase and lcase in this table if nocase */
+      for( i = 0, tchar = *pattern; i < plen ; i++, tchar = *(pattern + i)) {
+	  skiptab[tchar] = plen - 1 - i;
+	  if(nocase && islower(tchar)) 
+	      skiptab[_toupper(tchar)] = plen - 1 - i;
+      }
+      
+      /* Begin heavily optimized and non portable code */
+
+      /* Note that we are depending on str_head being 8 bytes */
+      tlen = -9;                          /* str_head.str_len */
+
+      strings_curr_off = strings_ptr;
+
+      while((strings_curr_off += tlen + 9) < strings_end) {
+	  if(--local_loopcount == 0) {
+	      ardp_accept();
+	      local_loopcount = 0xfff;
+	  }
+
+	  strings_ptr = strings_curr_off;
+
+	  /* This is a kludge, non-portable, but it eliminates a pr call  */
+	  /* Note that the size is 8 on suns. Is there a better way?      */
+	  /* bcopy(strings_ptr,(char *)&str_head,sizeof(strings_header)); */
+	  bp1 = (char *) &str_head;
+	  /* The copying of the file index is done only on a match */
+	  bp1[4] = strings_ptr[4]; bp1[5] = strings_ptr[5];
+	  /* bp1[6] = strings_ptr[6]; bp1[7] = strings_ptr[7];     */
+
+	  tlen = (unsigned short) str_head.str_len;
+
+	  /* To catch database corruption, this is a sanity check */
+	  if((tlen < 0) || (tlen > MAX_STRING_LEN)) {
+	      plog(L_DB_ERROR, NOREQ,"Database corrupt: string length out of bounds",0);
+	      break;
+	  }
+
+	  /* Old code (replaced by inline code taken from strfind) */
+	  /* if(strfind(strings_ptr,str_head.str_len))             */
+
+	  if( tlen <= plen_1 ) continue;
+	  pc = tc = plen_1;
+
+	  strings_ptr += 8;
+
+	  /* Moved the nocase test outside the inner loop for performace */
+	  /* Clauses are identical except for the first if               */
+	  if(nocase) do {
+	      tchar = strings_ptr[tc];
+
+	      /* improve efficiency of this test */
+	      if(lowertable[tchar] == pattern[pc]) {--pc; --tc;}
+	      else {
+		  skip = skiptab[tchar] ;
+		  tc += (skip < plen_1 - pc) ? plen : skip ;
+		  pc = plen_1 ;
+	      } 
+	  } while( pc >= 0 && tc < tlen ) ;
+	  else /* (!nocase) */ do {
+	      tchar = strings_ptr[tc];
+
+	      /* improve efficiency of this test */
+	      if(tchar == pattern[pc]) {--pc; --tc;}
+	      else {
+		  skip = skiptab[tchar] ;
+		  tc += (skip < plen_1 - pc) ? plen : skip ;
+		  pc = plen_1 ;
+	      } 
+	  } while( pc >= 0 && tc < tlen ) ;
+
+	  if(pc >= 0) continue;
+
+	  /* We have a match */
+
+	  /* Finish copying str_head - strings_curr_off */
+	  /* is old strings_ptr.                        */
+	  bp1[0] = strings_curr_off[0]; bp1[1] = strings_curr_off[1];
+	  bp1[2] = strings_curr_off[2]; bp1[3] = strings_curr_off[3];
+
+	  /* End heavily optimized and non portable code */
+
+	  ardp_accept();
+
+	  if(onlystrings) {
+	    if(strstr(strings_ptr," -> ") == NULL) { /* No broken strings */
+		cur_link = atoqlink(strings_ptr,max_hits,max_match,max_hitspm);
+		if(cur_link) vl_insert(cur_link,vd,VLI_NOSORT);
+		if(--match_rem <= 0) {
+		    hits_exceeded = TRUE;
+		    break;
+		}
+	    }
+	  } 
+	  else if(str_head.filet_index != -1){
+	    retval = get_match_list((int) str_head.filet_index, max_hitspm,
+				    &match_number, site_outptr, FALSE);
+
+	    if((retval != A_OK) && (retval != HITS_EXCEEDED)) {
+	      plog(L_DB_ERROR,NOREQ,"get_match_list failed (%d)",retval,0);
+	      goto cleanup;
+	    }
+
+	    if( match_number >= max_hits + offset ) {
+	      hits_exceeded = TRUE;
+	      break;
+	    }
+	  }
+	}
+    }
+      break;
+
+    default:
+      return(PRARCH_BAD_ARG);
+
+    cleanup:
+      for(i =  0;i <  match_number; i++) free((char *)site_outptr[i]);
+      free((char *)site_outptr);
+      return(PRARCH_DB_ERROR);
+    }
+
+  for(i =  0;i <  match_number; i++){
+    if((i & 0x7f) == 0) ardp_accept();
+    site_outrec = *site_outptr[i];
+    if(i >= offset) {
+      cur_link = atoplink(site_outrec,flags);
+      if(cur_link) vl_insert(cur_link,vd,VLI_NOSORT);
+    }
+    free((char *)site_outptr[i]);
+  }
+  free((char *)site_outptr);
+
+  if(hits_exceeded) {
+    /* Insert a continuation entry */
+  }
+    
+  if((search_type == S_EXACT) && (pQlen > (MATCH_CACHE_SIZE - 5)))
+    return(PRARCH_SUCCESS);
+  
+  if(!onlystrings)
+    add_to_cache(vd->links,s_string, (hits_exceeded ? max_hits : -max_hits),
+		 offset,search_type,or_search_type,flags);
+
+  return(PRARCH_SUCCESS);
+}
+
+
+/* Check for cached results */
+check_cache(arg,max_hits,offset,qtype,flags,linkpp)
+    char	*arg;
+    int		max_hits;
+    int		offset;
+    search_sel	qtype;
+    int		flags;
+    VLINK	*linkpp;
+    {    
+	struct match_cache 	*cachep = mcache;
+	struct match_cache 	*pcachep = NULL;
+	VLINK			tmp_link, cur_link;
+	VLINK			rest = NULL;
+	VLINK			next = NULL;
+	int			count = max_hits;
+
+	while(cachep) {
+	    if(((qtype == cachep->search_type)||(qtype == cachep->req_type))&&
+	       (cachep->offset == offset) &&
+	       /* All results are in cache - or enough to satisfy request */
+	       ((cachep->max_hits < 0) || (max_hits <= cachep->max_hits)) &&
+	       (strcmp(cachep->arg,arg) == 0) &&
+	       (cachep->flags == flags)) {
+		/* We have a match.  Move to front of list */
+		if(pcachep) {
+		    pcachep->next = cachep->next;
+		    cachep->next = mcache;
+		    mcache = cachep;
+		}
+
+		/* We now have to clear the expanded bits or the links  */
+		/* returned in previous queries will not be returned    */
+		/* We also need to truncate the list of there are more  */
+		/* matches than requested                               */
+		cur_link = cachep->matches;
+
+		/* IMPORTANT: This code assumes the list is one         */
+		/* dimensional, which is the case because we called     */
+		/* vl_insert with the VLI_NOSORT option                 */
+		while(cur_link) {
+		    cur_link->expanded = FALSE;
+		    if((--count == 0) && cur_link->next) {
+			/* truncate list */
+			if(cachep->more) {
+			    cur_link->next->previous = cachep->more->previous;
+			    cachep->more->previous = cachep->matches->previous;
+			    cachep->matches->previous->next = cachep->more;
+			}
+			else {
+			    cachep->more = cur_link->next;
+			    cachep->more->previous = cachep->matches->previous;
+			}
+			cur_link->next = NULL;
+			cachep->matches->previous = cur_link;
+		    }
+		    else if ((cur_link->next == NULL) && (count != 0) &&
+			     cachep->more) {
+			/* Merge lists */
+			cachep->matches->previous = cachep->more->previous;
+			cur_link->next = cachep->more;
+			cachep->more->previous = cur_link;
+			cachep->more = NULL;
+		    }
+		    cur_link = cur_link->next;
+		}
+		*linkpp = cachep->matches;
+		return(TRUE);
+	    }
+	    pcachep = cachep;
+	    cachep = cachep->next;
+	}
+	*linkpp = NULL;
+	return(FALSE);
+    }
+
+	
+/* Cache the response for later use */
+add_to_cache(vl,arg,max_hits,offset,search_type,req_type,flags)
+    VLINK	vl;
+    char	*arg;
+    int		max_hits;
+    int		offset;
+    search_sel	search_type;
+    search_sel	req_type;
+    int		flags;
+    {
+      struct match_cache 	*newresults = NULL;
+      struct match_cache 	*pcachep = NULL;
+
+      if(cachecount < MATCH_CACHE_SIZE) { /* Create a new entry */
+	newresults = (struct match_cache *) malloc(sizeof(struct match_cache));
+	cachecount++;
+	newresults->next = mcache;
+	mcache = newresults;
+	newresults->arg = stcopy(arg);
+	newresults->max_hits = max_hits;
+	newresults->offset = offset;
+	newresults->search_type = search_type;
+	newresults->req_type = req_type;
+	newresults->flags = flags;
+	newresults->matches = NULL;
+	newresults->more = NULL;
+    }
+      else { /* Use last entry - Assumes list has at least two entries */
+	  pcachep = mcache;
+	  while(pcachep->next) pcachep = pcachep->next;
+	  newresults = pcachep;
+
+	  /* move to front of list */
+	  newresults->next = mcache;
+	  mcache = newresults;
+
+	  /* Fix the last entry so we don't have a cycle */
+	  while(pcachep->next != newresults) pcachep = pcachep->next;
+	  pcachep->next = NULL;
+
+	  /* Free the old results */
+	  if(newresults->matches) {
+	      newresults->matches->dontfree = FALSE;
+	      vllfree(newresults->matches);
+	      newresults->matches = NULL;
+	  }
+	  if(newresults->more) {
+	      newresults->more->dontfree = FALSE;
+	      vllfree(newresults->more);
+	      newresults->more = NULL;
+	  }
+
+	  newresults->arg = stcopyr(arg,newresults->arg);
+	  newresults->max_hits = max_hits;
+	  newresults->offset = offset;
+	  newresults->search_type = search_type;
+	  newresults->req_type = req_type;
+	  newresults->flags = flags;
+      }
+
+      /* Since we are caching the data.  If there are any links, */
+      /* note that they should not be freed when sent back       */
+      if(vl) vl->dontfree = TRUE;
+    
+      newresults->matches = vl;
+  }
+      
+