added type 1 indexes: also saves the record number in addition to the

offset of the attribute.  Faster but also doubles the index size.
This commit is contained in:
leitner
2005-04-01 20:09:38 +00:00
parent 7559449020
commit 78f1555c63
5 changed files with 112 additions and 32 deletions

5
FORMAT
View File

@@ -32,7 +32,10 @@ All integers are stored LITTLE ENDIAN.
/* indices_offset points here */
uint32_t record_index[record_count];
struct {
uint32_t index_type; /* 0 == sorted array of pointers, rest reserved */
uint32_t index_type; /* 0 == sorted array of pointers,
1 == sorted array of (pointer,record number) tuples,
faster but twice as large
rest reserved */
uint32_t next; /* offset of next index */
/* for index_type==0: */
uint32_t indexed_attribute; /* offset of attribute name */

10
README
View File

@@ -14,8 +14,14 @@ tinyldap now supports an external database representation with indexes.
Use "parse" to create the file "data" from an LDIF file called
"exp.ldif" (I can't give you my test data, sorry). Then use "addindex"
to add indexes if you like. To make an index case insentive (and the
corresponding attribute, too), give a third argument to addindex (e.g.
"./addindex data sn i"; in case I extend this later, stick with "i").
corresponding attribute, too), pass an "i" in third command line
argument to addindex (e.g. "./addindex data sn i"). addindex also
supports a second index type, where the offset table also contains the
record number (will save run time, but the index is twice as large). To
enable it, pass a "f" in the third command line argument. So, to have a
fast case-insensitive index, use "if" or "fi" as third argument to
addindex.
Use "dumpidx" to have the contents of data displayed on screen.
tinyldap has been modified to use data instead of the in-memory linked
list.

View File

@@ -33,14 +33,23 @@ int main(int argc,char* argv[]) {
char* filename=argv[1];
uint32 magic,attribute_count,record_count,indices_offset,size_of_string_table;
uint32 wanted,casesensitive,dn,objectClass;
int ignorecase,fastindex;
ignorecase=fastindex=0;
mstorage_init(&idx);
if (argc<3) {
buffer_putsflush(buffer_2,"usage: ./addindex filename attribute [i]\n"
"if i is present, make index case insensitive.\n");
buffer_putsflush(buffer_2,"usage: ./addindex filename attribute [i][f]\n"
"if i is present, make index case insensitive.\n"
"if f is present, make index twice as large, but quicker.\n");
return 1;
}
if (argc>3) {
if (strchr(argv[3],'i')) ignorecase=1;
if (strchr(argv[3],'f')) fastindex=1;
}
map=mmap_read(filename,&filelen);
uint32_unpack(map,&magic);
if (magic!=0xfefe1da9) {
@@ -84,7 +93,7 @@ int main(int argc,char* argv[]) {
}
{
unsigned long i,counted=0;
uint32 i,counted=0;
char* x=map+5*4+size_of_string_table+attribute_count*8;
for (i=0; i<record_count; ++i) {
uint32 j,k;
@@ -92,11 +101,15 @@ int main(int argc,char* argv[]) {
if (wanted==dn) {
uint32_unpack(x+8,&k);
mstorage_add(&idx,(char*)&k,4);
if (fastindex)
mstorage_add(&idx,(char*)&i,4);
++counted;
x+=j*8;
} else if (wanted==objectClass) {
uint32_unpack(x+12,&k);
mstorage_add(&idx,(char*)&k,4);
if (fastindex)
mstorage_add(&idx,(char*)&i,4);
++counted;
x+=j*8;
} else {
@@ -106,6 +119,8 @@ int main(int argc,char* argv[]) {
if (k==wanted) {
uint32_unpack(x+4,&k);
mstorage_add(&idx,(char*)&k,4);
if (fastindex)
mstorage_add(&idx,(char*)&i,4);
++counted;
}
x+=8;
@@ -114,10 +129,10 @@ int main(int argc,char* argv[]) {
}
buffer_putulong(buffer_1,counted);
buffer_putsflush(buffer_1," entries to be sorted...");
if (argc>3)
qsort(idx.root,counted,4,compari);
if (ignorecase)
qsort(idx.root,counted,4*(fastindex+1),compari);
else
qsort(idx.root,counted,4,compar);
qsort(idx.root,counted,4*(fastindex+1),compar);
buffer_putsflush(buffer_1," done.\n");
munmap(map,filelen);
{
@@ -126,22 +141,28 @@ int main(int argc,char* argv[]) {
buffer_putsflush(buffer_2,"could not re-open database file read-write\n");
exit(1);
}
ftruncate(fd,filelen+(counted+3)*4);
map=mmap(0,filelen+(counted+3)*4,PROT_WRITE,MAP_SHARED,fd,0);
ftruncate(fd,filelen+(counted+3)*4*(fastindex+1));
map=mmap(0,filelen+(counted+3)*4*(fastindex+1),PROT_WRITE,MAP_SHARED,fd,0);
if (map==(char*)-1) {
buffer_putsflush(buffer_2,"could not mmap database file read-write\n");
exit(1);
}
uint32_pack(map+casesensitive,argc>3?1:0);
uint32_pack(map+filelen,0);
uint32_pack(map+filelen+4,filelen+(counted+3)*4);
uint32_pack(map+casesensitive,ignorecase);
uint32_pack(map+filelen,fastindex);
uint32_pack(map+filelen+4,filelen+(counted+3)*4*(fastindex+1));
uint32_pack(map+filelen+8,wanted);
{
char* x=map+filelen+12;
unsigned long i;
for (i=0; i<counted; ++i) {
uint32_pack(x,((uint32*)idx.root)[i]);
x+=4;
if (fastindex) {
uint32_pack(x,((uint32*)idx.root)[i*2]);
uint32_pack(x+4,((uint32*)idx.root)[i*2+1]);
x+=8;
} else {
uint32_pack(x,((uint32*)idx.root)[i]);
x+=4;
}
}
}
}

View File

@@ -94,8 +94,13 @@ int main(int argc,char* argv[]) {
case 0:
buffer_puts(buffer_1,"sorted table");
break;
case 1:
buffer_puts(buffer_1,"sorted table with record pointer");
break;
default:
buffer_puts(buffer_1,"unknown");
buffer_puts(buffer_1,"unknown (");
buffer_putulong(buffer_1,index_type);
buffer_puts(buffer_1,")");
break;
}
buffer_puts(buffer_1,"\nnext: ");

View File

@@ -200,7 +200,7 @@ static int indexable(struct Filter* f) {
index_type=uint32_read(map+ofs);
next=uint32_read(map+ofs+4);
indexed_attribute=uint32_read(map+ofs+8);
if (index_type==0)
if (index_type<=1)
if (!matchstring(&f->ava.desc,map+indexed_attribute))
return 1;
ofs=next;
@@ -229,23 +229,50 @@ static long findrec(uint32 dat) {
uint32* records=(uint32*)(map+indices_offset);
uint32 bottom=0;
uint32 top=record_count-1;
#ifdef DEBUG
buffer_puts(buffer_2,"findrec(");
buffer_putulong(buffer_2,dat);
buffer_putsflush(buffer_2,")... ");
#endif
while ((top>=bottom)) {
uint32 mid=(top+bottom)/2;
uint32 l;
l=uint32_read(map+uint32_read((char*)(&records[mid]))+8);
#if 0
buffer_puts(buffer_2,"findrec match[");
buffer_putulong(buffer_2,bottom);
buffer_puts(buffer_2,"..");
buffer_putulong(buffer_2,top);
buffer_puts(buffer_2,"]: ");
buffer_putulong(buffer_2,l);
buffer_puts(buffer_2," <-> ");
buffer_putulong(buffer_2,dat);
buffer_putsflush(buffer_2,": ");
#endif
if (l<=dat) {
if (mid>=record_count-1)
l=uint32_read(map+uint32_read((char*)(&records[0]))+12);
else
l=uint32_read(map+uint32_read((char*)(&records[mid+1]))+8);
if (l>dat) return mid; /* found! */
if (l>dat) {
#if 0
buffer_putsflush(buffer_2,"found!\n");
#endif
#ifdef DEBUG
buffer_putsflush(buffer_2,"done!\n");
#endif
return mid; /* found! */
}
bottom=mid+1;
} else
if (mid)
top=mid-1;
else
break;
#if 0
buffer_putsflush(buffer_2,"nope :-(\n");
#endif
}
buffer_putsflush(buffer_2,"findrec failed!\n");
return -1;
@@ -277,7 +304,7 @@ static inline int isset(unsigned long* r,unsigned long bit) {
* for all records that match the value in s. Set the corresponding
* bits to 1 in bitfield. */
static void tagmatches(uint32* index,unsigned int elements,struct string* s,
unsigned long* bitfield,int (*match)(struct string* s,const char* c)) {
unsigned long* bitfield,int (*match)(struct string* s,const char* c),uint32 index_type) {
uint32 bottom=0;
uint32 top=elements;
emptyset(bitfield);
@@ -287,7 +314,7 @@ static void tagmatches(uint32* index,unsigned int elements,struct string* s,
uint32 k;
int l;
k=uint32_read((char*)(&index[mid]));
k=uint32_read((char*)(&index[mid<<index_type]));
#ifdef DEBUG
buffer_puts(buffer_2,"match[");
buffer_putulong(buffer_2,bottom);
@@ -306,21 +333,39 @@ static void tagmatches(uint32* index,unsigned int elements,struct string* s,
#ifdef DEBUG
buffer_putsflush(buffer_2,"MATCH!\n");
#endif
if ((rec=findrec(k))>=0)
if (index_type==0)
rec=findrec(k);
else if (index_type==1)
rec=uint32_read((char*)(&index[(mid<<index_type)+1]));
else {
buffer_puts(buffer_2,"unsupported index type ");
buffer_putulong(buffer_2,index_type);
buffer_puts(buffer_2," in tagmatches!\n");
return;
}
if (rec>=0)
setbit(bitfield,rec);
/* there may be multiple matches.
* Look before and after mid, too */
for (k=mid-1; k>0; --k) {
m=uint32_read((char*)(&index[k]));
m=uint32_read((char*)(&index[k<<index_type]));
if ((l=match(s,map+m))==0) {
if ((rec=findrec(m))>=0)
if (index_type==0)
rec=findrec(m);
else if (index_type==1)
rec=uint32_read((char*)(&index[(k<<index_type)+1]));
if (rec>=0)
setbit(bitfield,rec);
} else break;
}
for (k=mid+1; k<elements; ++k) {
m=uint32_read((char*)(&index[k]));
m=uint32_read((char*)(&index[k<<index_type]));
if ((l=match(s,map+m))==0) {
if ((rec=findrec(m))>=0)
if (index_type==0)
rec=findrec(m);
else if (index_type==1)
rec=uint32_read((char*)(&index[(k<<index_type)+1]));
if (rec>=0)
setbit(bitfield,rec);
} else break;
}
@@ -400,10 +445,10 @@ static int useindex(struct Filter* f,unsigned long* bitfield) {
index_type=uint32_read(map+ofs);
next=uint32_read(map+ofs+4);
indexed_attribute=uint32_read(map+ofs+8);
if (index_type==0)
if (index_type<=1)
if (!matchstring(&f->ava.desc,map+indexed_attribute)) {
tagmatches((uint32*)(map+ofs+12),(next-ofs-12)/4,&f->substrings->s,bitfield,
f->attrflag&1?matchcaseprefix:matchprefix);
tagmatches((uint32*)(map+ofs+12),(next-ofs-12)/(4<<index_type),&f->substrings->s,bitfield,
f->attrflag&1?matchcaseprefix:matchprefix,index_type);
return 1;
}
ofs=next;
@@ -433,10 +478,10 @@ static int useindex(struct Filter* f,unsigned long* bitfield) {
index_type=uint32_read(map+ofs);
next=uint32_read(map+ofs+4);
indexed_attribute=uint32_read(map+ofs+8);
if (index_type==0)
if (index_type<=1)
if (!matchstring(&f->ava.desc,map+indexed_attribute)) {
tagmatches((uint32*)(map+ofs+12),(next-ofs-12)/4,&f->ava.value,bitfield,
f->attrflag&1?matchcasestring:matchstring);
tagmatches((uint32*)(map+ofs+12),(next-ofs-12)/(4<<index_type),&f->ava.value,bitfield,
f->attrflag&1?matchcasestring:matchstring,index_type);
return 1;
}
ofs=next;