diff --git a/FORMAT b/FORMAT index 53e5ab4..c5498a4 100644 --- a/FORMAT +++ b/FORMAT @@ -32,7 +32,10 @@ All integers are stored LITTLE ENDIAN. /* indices_offset points here */ uint32_t record_index[record_count]; struct { - uint32_t index_type; /* 0 == sorted array of pointers, rest reserved */ + uint32_t index_type; /* 0 == sorted array of pointers, + 1 == sorted array of (pointer,record number) tuples, + faster but twice as large + rest reserved */ uint32_t next; /* offset of next index */ /* for index_type==0: */ uint32_t indexed_attribute; /* offset of attribute name */ diff --git a/README b/README index 3e7891f..5b685d7 100644 --- a/README +++ b/README @@ -14,8 +14,14 @@ tinyldap now supports an external database representation with indexes. Use "parse" to create the file "data" from an LDIF file called "exp.ldif" (I can't give you my test data, sorry). Then use "addindex" to add indexes if you like. To make an index case insentive (and the -corresponding attribute, too), give a third argument to addindex (e.g. -"./addindex data sn i"; in case I extend this later, stick with "i"). +corresponding attribute, too), pass an "i" in third command line +argument to addindex (e.g. "./addindex data sn i"). addindex also +supports a second index type, where the offset table also contains the +record number (will save run time, but the index is twice as large). To +enable it, pass a "f" in the third command line argument. So, to have a +fast case-insensitive index, use "if" or "fi" as third argument to +addindex. + Use "dumpidx" to have the contents of data displayed on screen. tinyldap has been modified to use data instead of the in-memory linked list. diff --git a/addindex.c b/addindex.c index 23af38f..bef278f 100644 --- a/addindex.c +++ b/addindex.c @@ -33,14 +33,23 @@ int main(int argc,char* argv[]) { char* filename=argv[1]; uint32 magic,attribute_count,record_count,indices_offset,size_of_string_table; uint32 wanted,casesensitive,dn,objectClass; + int ignorecase,fastindex; + + ignorecase=fastindex=0; mstorage_init(&idx); if (argc<3) { - buffer_putsflush(buffer_2,"usage: ./addindex filename attribute [i]\n" - "if i is present, make index case insensitive.\n"); + buffer_putsflush(buffer_2,"usage: ./addindex filename attribute [i][f]\n" + "if i is present, make index case insensitive.\n" + "if f is present, make index twice as large, but quicker.\n"); return 1; } + + if (argc>3) { + if (strchr(argv[3],'i')) ignorecase=1; + if (strchr(argv[3],'f')) fastindex=1; + } map=mmap_read(filename,&filelen); uint32_unpack(map,&magic); if (magic!=0xfefe1da9) { @@ -84,7 +93,7 @@ int main(int argc,char* argv[]) { } { - unsigned long i,counted=0; + uint32 i,counted=0; char* x=map+5*4+size_of_string_table+attribute_count*8; for (i=0; i3) - qsort(idx.root,counted,4,compari); + if (ignorecase) + qsort(idx.root,counted,4*(fastindex+1),compari); else - qsort(idx.root,counted,4,compar); + qsort(idx.root,counted,4*(fastindex+1),compar); buffer_putsflush(buffer_1," done.\n"); munmap(map,filelen); { @@ -126,22 +141,28 @@ int main(int argc,char* argv[]) { buffer_putsflush(buffer_2,"could not re-open database file read-write\n"); exit(1); } - ftruncate(fd,filelen+(counted+3)*4); - map=mmap(0,filelen+(counted+3)*4,PROT_WRITE,MAP_SHARED,fd,0); + ftruncate(fd,filelen+(counted+3)*4*(fastindex+1)); + map=mmap(0,filelen+(counted+3)*4*(fastindex+1),PROT_WRITE,MAP_SHARED,fd,0); if (map==(char*)-1) { buffer_putsflush(buffer_2,"could not mmap database file read-write\n"); exit(1); } - uint32_pack(map+casesensitive,argc>3?1:0); - uint32_pack(map+filelen,0); - uint32_pack(map+filelen+4,filelen+(counted+3)*4); + uint32_pack(map+casesensitive,ignorecase); + uint32_pack(map+filelen,fastindex); + uint32_pack(map+filelen+4,filelen+(counted+3)*4*(fastindex+1)); uint32_pack(map+filelen+8,wanted); { char* x=map+filelen+12; unsigned long i; for (i=0; iava.desc,map+indexed_attribute)) return 1; ofs=next; @@ -229,23 +229,50 @@ static long findrec(uint32 dat) { uint32* records=(uint32*)(map+indices_offset); uint32 bottom=0; uint32 top=record_count-1; +#ifdef DEBUG + buffer_puts(buffer_2,"findrec("); + buffer_putulong(buffer_2,dat); + buffer_putsflush(buffer_2,")... "); +#endif while ((top>=bottom)) { uint32 mid=(top+bottom)/2; uint32 l; l=uint32_read(map+uint32_read((char*)(&records[mid]))+8); +#if 0 + buffer_puts(buffer_2,"findrec match["); + buffer_putulong(buffer_2,bottom); + buffer_puts(buffer_2,".."); + buffer_putulong(buffer_2,top); + buffer_puts(buffer_2,"]: "); + buffer_putulong(buffer_2,l); + buffer_puts(buffer_2," <-> "); + buffer_putulong(buffer_2,dat); + buffer_putsflush(buffer_2,": "); +#endif if (l<=dat) { if (mid>=record_count-1) l=uint32_read(map+uint32_read((char*)(&records[0]))+12); else l=uint32_read(map+uint32_read((char*)(&records[mid+1]))+8); - if (l>dat) return mid; /* found! */ + if (l>dat) { +#if 0 + buffer_putsflush(buffer_2,"found!\n"); +#endif +#ifdef DEBUG + buffer_putsflush(buffer_2,"done!\n"); +#endif + return mid; /* found! */ + } bottom=mid+1; } else if (mid) top=mid-1; else break; +#if 0 + buffer_putsflush(buffer_2,"nope :-(\n"); +#endif } buffer_putsflush(buffer_2,"findrec failed!\n"); return -1; @@ -277,7 +304,7 @@ static inline int isset(unsigned long* r,unsigned long bit) { * for all records that match the value in s. Set the corresponding * bits to 1 in bitfield. */ static void tagmatches(uint32* index,unsigned int elements,struct string* s, - unsigned long* bitfield,int (*match)(struct string* s,const char* c)) { + unsigned long* bitfield,int (*match)(struct string* s,const char* c),uint32 index_type) { uint32 bottom=0; uint32 top=elements; emptyset(bitfield); @@ -287,7 +314,7 @@ static void tagmatches(uint32* index,unsigned int elements,struct string* s, uint32 k; int l; - k=uint32_read((char*)(&index[mid])); + k=uint32_read((char*)(&index[mid<=0) + if (index_type==0) + rec=findrec(k); + else if (index_type==1) + rec=uint32_read((char*)(&index[(mid<=0) setbit(bitfield,rec); /* there may be multiple matches. * Look before and after mid, too */ for (k=mid-1; k>0; --k) { - m=uint32_read((char*)(&index[k])); + m=uint32_read((char*)(&index[k<=0) + if (index_type==0) + rec=findrec(m); + else if (index_type==1) + rec=uint32_read((char*)(&index[(k<=0) setbit(bitfield,rec); } else break; } for (k=mid+1; k=0) + if (index_type==0) + rec=findrec(m); + else if (index_type==1) + rec=uint32_read((char*)(&index[(k<=0) setbit(bitfield,rec); } else break; } @@ -400,10 +445,10 @@ static int useindex(struct Filter* f,unsigned long* bitfield) { index_type=uint32_read(map+ofs); next=uint32_read(map+ofs+4); indexed_attribute=uint32_read(map+ofs+8); - if (index_type==0) + if (index_type<=1) if (!matchstring(&f->ava.desc,map+indexed_attribute)) { - tagmatches((uint32*)(map+ofs+12),(next-ofs-12)/4,&f->substrings->s,bitfield, - f->attrflag&1?matchcaseprefix:matchprefix); + tagmatches((uint32*)(map+ofs+12),(next-ofs-12)/(4<substrings->s,bitfield, + f->attrflag&1?matchcaseprefix:matchprefix,index_type); return 1; } ofs=next; @@ -433,10 +478,10 @@ static int useindex(struct Filter* f,unsigned long* bitfield) { index_type=uint32_read(map+ofs); next=uint32_read(map+ofs+4); indexed_attribute=uint32_read(map+ofs+8); - if (index_type==0) + if (index_type<=1) if (!matchstring(&f->ava.desc,map+indexed_attribute)) { - tagmatches((uint32*)(map+ofs+12),(next-ofs-12)/4,&f->ava.value,bitfield, - f->attrflag&1?matchcasestring:matchstring); + tagmatches((uint32*)(map+ofs+12),(next-ofs-12)/(4<ava.value,bitfield, + f->attrflag&1?matchcasestring:matchstring,index_type); return 1; } ofs=next;