
638 lines
19 KiB
Raw Normal View History

2024-05-27 16:13:40 +02:00
/*XXX Note to make this thread safe, need to mutex re_comp and re_exec */
#include <stdio.h>
#include <sys/types.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h> /* For malloc and free */
#define _toupper(c) ((c)-'a'+'A')
#ifdef MMAP
#include <sys/mman.h>
/* Archie definitions */
#include <ndbm.h>
#include <defines.h>
#include <structs.h>
#include <database.h>
#include <error.h>
#include "prarch.h"
#include <ardp.h>
#include <pfs.h>
#include <perrno.h>
#include <plog.h>
VLINK atoplink();
VLINK atoqlink();
char *re_comp();
char *make_lcase();
int get_match_list();
extern char *strings_begin;
extern long strings_table_size;
extern DBM *fast_strings;
/* So we can adjust our cache policy based on queue length */
extern int pQlen;
static char lowertable[256] = {
' ','!','"','#','$','%','&','\'',
struct match_cache {
char *arg; /* Matched regular expression */
int max_hits; /* Maximum matchess <0 = found all */
int offset; /* Offset */
search_sel search_type; /* Search method (the one used) */
search_sel req_type; /* Requested method */
VLINK matches; /* Matches */
VLINK more; /* Additional matches */
int flags; /* Flags: for link attributes */
struct match_cache *next; /* Next entry in cache */
static struct match_cache *mcache = NULL;
static int cachecount = 0;
* prarch_match - Search archie database for specified file
* PRARCH_MATCH searches the archie database and returns
* a list of files matching the provided regular expression
* ARGS: program_name - regular expression for files to match
* max_hits - maximum number of entries to return (max hits)
* offset - start the search after this many hits
* search_type - search method
* vd - pointer to directory to be filled in
* archiedir - flag - directory links should be to archie
* onlystrings - flag - only return strings, not matches
* Search method is one of: S_FULL_REGEX
int prarch_match(char *program_name, /* Regular expression to be matched */
int max_hits, /* Maximum number of entries to rtrn */
int max_match, /* Maximum number of unique strings */
int max_hitspm, /* Maximum hits per match */
int offset, /* Skip # matches before starting */
search_sel search_type, /* Search method */
VDIR vd, /* Directory to be filled in */
int flags, /* Flag for link attributes */
int onlystrings) /* Only return matching strings */
* Search the database for the string specified by 'program_name'. Use the
* fast dbm strings database if 'is_exact' is set, otherwise search through
* the strings table. Stop searching after all matches have been found, or
* 'max_hits' matches have been found, whichever comes first.
char s_string[MAX_STRING_LEN];
char *strings_ptr;
char *strings_curr_off;
strings_header str_head;
datum search_key, key_value;
search_sel new_search_type = S_EXACT; /* Alternate search method */
search_sel or_search_type = search_type; /* Original search method */
int nocase = 0;
int hits_exceeded = FALSE; /* should be boolean? */
char *strings_end;
int match_number;
int patlen;
site_out **site_outptr;
site_out site_outrec;
int i;
VLINK cur_link;
int loopcount = 0;
int retval;
int match_rem = max_match;
if(!program_name || !(*program_name)) return(PRARCH_BAD_ARG);
if((0 < max_hits) && (max_hits < match_rem)) match_rem = max_hits;
if((0 < max_hits) && (max_hits < max_hitspm)) max_hitspm = max_hits;
strcpy(s_string, program_name);
/* See if we can use a less expensive search method */
if((search_type == S_FULL_REGEX) || (search_type == S_E_FULL_REGEX)) {
/* Regex search assumes wildcards on both ends, so remove from string */
if(strncmp(program_name,".*",2) == 0)
strcpy(s_string, program_name+2);
if((i = strlen(s_string)) >= 2) {
if(strcmp(s_string+i-2,".*") == 0)
*(s_string+i-2) = '\0';
/* If no special characters, then fall back to substring search */
if((search_type == S_FULL_REGEX) &&
(strpbrk(s_string,"\\^$.,[]<>*+?|(){}/") == NULL))
or_search_type = search_type = S_SUB_CASE_STR;
else if((search_type == S_E_FULL_REGEX) &&
(strpbrk(s_string,"\\^$.,[]<>*+?|(){}/") == NULL))
or_search_type = search_type = S_E_SUB_CASE_STR;
/* The caching code assumes we are handed an empty directory */
/* if not, return an error for now. Eventually we will get */
/* rid of that assumption */
if(vd->links) {
plog(L_DIR_ERR, NOREQ, "Prarch_match handed non empty dir",0);
if(!onlystrings && (check_cache(s_string,max_hits,offset,search_type,
flags,&(vd->links)) == TRUE)) {
plog(L_DB_INFO, NOREQ, "Responding with cached data",0);
site_outptr = (site_out **) malloc((unsigned)(sizeof(site_out) *
(max_hits + offset)));
if(!site_outptr) return(PRARCH_OUT_OF_MEMORY);
strings_ptr = strings_begin;
strings_end = strings_begin + (int) strings_table_size;
match_number = 0;
new_search_type = S_SUB_CASE_STR;
goto exact_match;
new_search_type = S_SUB_NCASE_STR;
goto exact_match;
new_search_type = S_FULL_REGEX;
case S_EXACT:
search_key.dptr = s_string;
search_key.dsize = strlen(s_string) + 1;
key_value = dbm_fetch(fast_strings, search_key) ;
if(key_value.dptr != (char *)NULL){ /* string in table */
int string_pos;
bcopy(key_value.dptr,(char *)&string_pos, key_value.dsize);
strings_ptr += string_pos;
bcopy(strings_ptr,(char *)&str_head,sizeof(strings_header));
if(onlystrings) {
cur_link = atoqlink(strings_ptr,max_hits,max_match,max_hitspm);
if(cur_link) vl_insert(cur_link,vd,VLI_NOSORT);
if(--match_rem <= 0) {
hits_exceeded = TRUE;
else if(str_head.filet_index != -1) {
retval = get_match_list((int) str_head.filet_index, max_hitspm,
&match_number, site_outptr, FALSE);
if((retval != A_OK) && (retval != HITS_EXCEEDED)) {
plog(L_DB_ERROR, NOREQ,"get_match_list failed (%d)",retval,0);
goto cleanup;
if( match_number >= max_hits + offset ){
hits_exceeded = TRUE;
else if (search_type != S_EXACT) { /* Not found - but try other method */
search_type = new_search_type;
goto startsearch;
if(re_comp(s_string) != (char *)NULL){
str_head.str_len = -1;
while((strings_curr_off = strings_ptr + str_head.str_len + 1) < strings_end){
if((loopcount++ & 0x7ff) == 0) ardp_accept();
strings_ptr = strings_curr_off;
bcopy(strings_ptr,(char *)&str_head,sizeof(strings_header));
strings_ptr += sizeof(strings_header);
if(re_exec( strings_ptr ) == 1 ){ /* TRUE */
strings_curr_off = strings_ptr;
if(onlystrings) {
if(strstr(strings_ptr," -> ") == NULL) { /* No broken strings */
cur_link = atoqlink(strings_ptr,max_hits,max_match,max_hitspm);
if(cur_link) vl_insert(cur_link,vd,VLI_NOSORT);
if(--match_rem <= 0) {
hits_exceeded = TRUE;
else if(str_head.filet_index != -1){
retval = get_match_list((int) str_head.filet_index, max_hitspm,
&match_number, site_outptr, FALSE);
if((retval != A_OK) && (retval != HITS_EXCEEDED)) {
plog(L_DB_ERROR, NOREQ,"get_match_list failed (%d)",retval,0);
goto cleanup;
if( match_number >= max_hits + offset ){
hits_exceeded = TRUE;
#define TABLESIZE 256
case S_SUB_CASE_STR: {
char pattern[MAX_STRING_LEN];
int skiptab[TABLESIZE];
register int pc, tc;
register int local_loopcount = 0xfff;
char *bp1;
int skip;
int plen;
int plen_1;
int tlen;
unsigned char tchar;
plen = strlen(s_string);
plen_1 = plen -1;
/* Old code (replaced by inline code taken from initskip) */
/* patlen = strlen(s_string ) ; */
/* initskip(s_string, patlen, search_type == S_SUB_NCASE_STR) ; */
if(nocase) {
for(pc = 0; s_string[pc]; pc++)
pattern[pc] = lowertable[s_string[pc]];
pattern[pc] = '\0';
else strcpy(pattern,s_string);
for( i = 0 ; i < TABLESIZE ; i++ )
skiptab[ i ] = plen;
/* Note that we want both ucase and lcase in this table if nocase */
for( i = 0, tchar = *pattern; i < plen ; i++, tchar = *(pattern + i)) {
skiptab[tchar] = plen - 1 - i;
if(nocase && islower(tchar))
skiptab[_toupper(tchar)] = plen - 1 - i;
/* Begin heavily optimized and non portable code */
/* Note that we are depending on str_head being 8 bytes */
tlen = -9; /* str_head.str_len */
strings_curr_off = strings_ptr;
while((strings_curr_off += tlen + 9) < strings_end) {
if(--local_loopcount == 0) {
local_loopcount = 0xfff;
strings_ptr = strings_curr_off;
/* This is a kludge, non-portable, but it eliminates a pr call */
/* Note that the size is 8 on suns. Is there a better way? */
/* bcopy(strings_ptr,(char *)&str_head,sizeof(strings_header)); */
bp1 = (char *) &str_head;
/* The copying of the file index is done only on a match */
bp1[4] = strings_ptr[4]; bp1[5] = strings_ptr[5];
/* bp1[6] = strings_ptr[6]; bp1[7] = strings_ptr[7]; */
tlen = (unsigned short) str_head.str_len;
/* To catch database corruption, this is a sanity check */
if((tlen < 0) || (tlen > MAX_STRING_LEN)) {
plog(L_DB_ERROR, NOREQ,"Database corrupt: string length out of bounds",0);
/* Old code (replaced by inline code taken from strfind) */
/* if(strfind(strings_ptr,str_head.str_len)) */
if( tlen <= plen_1 ) continue;
pc = tc = plen_1;
strings_ptr += 8;
/* Moved the nocase test outside the inner loop for performace */
/* Clauses are identical except for the first if */
if(nocase) do {
tchar = strings_ptr[tc];
/* improve efficiency of this test */
if(lowertable[tchar] == pattern[pc]) {--pc; --tc;}
else {
skip = skiptab[tchar] ;
tc += (skip < plen_1 - pc) ? plen : skip ;
pc = plen_1 ;
} while( pc >= 0 && tc < tlen ) ;
else /* (!nocase) */ do {
tchar = strings_ptr[tc];
/* improve efficiency of this test */
if(tchar == pattern[pc]) {--pc; --tc;}
else {
skip = skiptab[tchar] ;
tc += (skip < plen_1 - pc) ? plen : skip ;
pc = plen_1 ;
} while( pc >= 0 && tc < tlen ) ;
if(pc >= 0) continue;
/* We have a match */
/* Finish copying str_head - strings_curr_off */
/* is old strings_ptr. */
bp1[0] = strings_curr_off[0]; bp1[1] = strings_curr_off[1];
bp1[2] = strings_curr_off[2]; bp1[3] = strings_curr_off[3];
/* End heavily optimized and non portable code */
if(onlystrings) {
if(strstr(strings_ptr," -> ") == NULL) { /* No broken strings */
cur_link = atoqlink(strings_ptr,max_hits,max_match,max_hitspm);
if(cur_link) vl_insert(cur_link,vd,VLI_NOSORT);
if(--match_rem <= 0) {
hits_exceeded = TRUE;
else if(str_head.filet_index != -1){
retval = get_match_list((int) str_head.filet_index, max_hitspm,
&match_number, site_outptr, FALSE);
if((retval != A_OK) && (retval != HITS_EXCEEDED)) {
plog(L_DB_ERROR,NOREQ,"get_match_list failed (%d)",retval,0);
goto cleanup;
if( match_number >= max_hits + offset ) {
hits_exceeded = TRUE;
for(i = 0;i < match_number; i++) free((char *)site_outptr[i]);
free((char *)site_outptr);
for(i = 0;i < match_number; i++){
if((i & 0x7f) == 0) ardp_accept();
site_outrec = *site_outptr[i];
if(i >= offset) {
cur_link = atoplink(site_outrec,flags);
if(cur_link) vl_insert(cur_link,vd,VLI_NOSORT);
free((char *)site_outptr[i]);
free((char *)site_outptr);
if(hits_exceeded) {
/* Insert a continuation entry */
if((search_type == S_EXACT) && (pQlen > (MATCH_CACHE_SIZE - 5)))
add_to_cache(vd->links,s_string, (hits_exceeded ? max_hits : -max_hits),
/* Check for cached results */
char *arg;
int max_hits;
int offset;
search_sel qtype;
int flags;
VLINK *linkpp;
struct match_cache *cachep = mcache;
struct match_cache *pcachep = NULL;
VLINK tmp_link, cur_link;
VLINK rest = NULL;
VLINK next = NULL;
int count = max_hits;
while(cachep) {
if(((qtype == cachep->search_type)||(qtype == cachep->req_type))&&
(cachep->offset == offset) &&
/* All results are in cache - or enough to satisfy request */
((cachep->max_hits < 0) || (max_hits <= cachep->max_hits)) &&
(strcmp(cachep->arg,arg) == 0) &&
(cachep->flags == flags)) {
/* We have a match. Move to front of list */
if(pcachep) {
pcachep->next = cachep->next;
cachep->next = mcache;
mcache = cachep;
/* We now have to clear the expanded bits or the links */
/* returned in previous queries will not be returned */
/* We also need to truncate the list of there are more */
/* matches than requested */
cur_link = cachep->matches;
/* IMPORTANT: This code assumes the list is one */
/* dimensional, which is the case because we called */
/* vl_insert with the VLI_NOSORT option */
while(cur_link) {
cur_link->expanded = FALSE;
if((--count == 0) && cur_link->next) {
/* truncate list */
if(cachep->more) {
cur_link->next->previous = cachep->more->previous;
cachep->more->previous = cachep->matches->previous;
cachep->matches->previous->next = cachep->more;
else {
cachep->more = cur_link->next;
cachep->more->previous = cachep->matches->previous;
cur_link->next = NULL;
cachep->matches->previous = cur_link;
else if ((cur_link->next == NULL) && (count != 0) &&
cachep->more) {
/* Merge lists */
cachep->matches->previous = cachep->more->previous;
cur_link->next = cachep->more;
cachep->more->previous = cur_link;
cachep->more = NULL;
cur_link = cur_link->next;
*linkpp = cachep->matches;
pcachep = cachep;
cachep = cachep->next;
*linkpp = NULL;
/* Cache the response for later use */
char *arg;
int max_hits;
int offset;
search_sel search_type;
search_sel req_type;
int flags;
struct match_cache *newresults = NULL;
struct match_cache *pcachep = NULL;
if(cachecount < MATCH_CACHE_SIZE) { /* Create a new entry */
newresults = (struct match_cache *) malloc(sizeof(struct match_cache));
newresults->next = mcache;
mcache = newresults;
newresults->arg = stcopy(arg);
newresults->max_hits = max_hits;
newresults->offset = offset;
newresults->search_type = search_type;
newresults->req_type = req_type;
newresults->flags = flags;
newresults->matches = NULL;
newresults->more = NULL;
else { /* Use last entry - Assumes list has at least two entries */
pcachep = mcache;
while(pcachep->next) pcachep = pcachep->next;
newresults = pcachep;
/* move to front of list */
newresults->next = mcache;
mcache = newresults;
/* Fix the last entry so we don't have a cycle */
while(pcachep->next != newresults) pcachep = pcachep->next;
pcachep->next = NULL;
/* Free the old results */
if(newresults->matches) {
newresults->matches->dontfree = FALSE;
newresults->matches = NULL;
if(newresults->more) {
newresults->more->dontfree = FALSE;
newresults->more = NULL;
newresults->arg = stcopyr(arg,newresults->arg);
newresults->max_hits = max_hits;
newresults->offset = offset;
newresults->search_type = search_type;
newresults->req_type = req_type;
newresults->flags = flags;
/* Since we are caching the data. If there are any links, */
/* note that they should not be freed when sent back */
if(vl) vl->dontfree = TRUE;
newresults->matches = vl;