added type 1 indexes: also saves the record number in addition to the
offset of the attribute. Faster but also doubles the index size.
This commit is contained in:
5
FORMAT
5
FORMAT
@@ -32,7 +32,10 @@ All integers are stored LITTLE ENDIAN.
|
||||
/* indices_offset points here */
|
||||
uint32_t record_index[record_count];
|
||||
struct {
|
||||
uint32_t index_type; /* 0 == sorted array of pointers, rest reserved */
|
||||
uint32_t index_type; /* 0 == sorted array of pointers,
|
||||
1 == sorted array of (pointer,record number) tuples,
|
||||
faster but twice as large
|
||||
rest reserved */
|
||||
uint32_t next; /* offset of next index */
|
||||
/* for index_type==0: */
|
||||
uint32_t indexed_attribute; /* offset of attribute name */
|
||||
|
||||
10
README
10
README
@@ -14,8 +14,14 @@ tinyldap now supports an external database representation with indexes.
|
||||
Use "parse" to create the file "data" from an LDIF file called
|
||||
"exp.ldif" (I can't give you my test data, sorry). Then use "addindex"
|
||||
to add indexes if you like. To make an index case insentive (and the
|
||||
corresponding attribute, too), give a third argument to addindex (e.g.
|
||||
"./addindex data sn i"; in case I extend this later, stick with "i").
|
||||
corresponding attribute, too), pass an "i" in third command line
|
||||
argument to addindex (e.g. "./addindex data sn i"). addindex also
|
||||
supports a second index type, where the offset table also contains the
|
||||
record number (will save run time, but the index is twice as large). To
|
||||
enable it, pass a "f" in the third command line argument. So, to have a
|
||||
fast case-insensitive index, use "if" or "fi" as third argument to
|
||||
addindex.
|
||||
|
||||
Use "dumpidx" to have the contents of data displayed on screen.
|
||||
tinyldap has been modified to use data instead of the in-memory linked
|
||||
list.
|
||||
|
||||
47
addindex.c
47
addindex.c
@@ -33,14 +33,23 @@ int main(int argc,char* argv[]) {
|
||||
char* filename=argv[1];
|
||||
uint32 magic,attribute_count,record_count,indices_offset,size_of_string_table;
|
||||
uint32 wanted,casesensitive,dn,objectClass;
|
||||
int ignorecase,fastindex;
|
||||
|
||||
ignorecase=fastindex=0;
|
||||
|
||||
mstorage_init(&idx);
|
||||
|
||||
if (argc<3) {
|
||||
buffer_putsflush(buffer_2,"usage: ./addindex filename attribute [i]\n"
|
||||
"if i is present, make index case insensitive.\n");
|
||||
buffer_putsflush(buffer_2,"usage: ./addindex filename attribute [i][f]\n"
|
||||
"if i is present, make index case insensitive.\n"
|
||||
"if f is present, make index twice as large, but quicker.\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (argc>3) {
|
||||
if (strchr(argv[3],'i')) ignorecase=1;
|
||||
if (strchr(argv[3],'f')) fastindex=1;
|
||||
}
|
||||
map=mmap_read(filename,&filelen);
|
||||
uint32_unpack(map,&magic);
|
||||
if (magic!=0xfefe1da9) {
|
||||
@@ -84,7 +93,7 @@ int main(int argc,char* argv[]) {
|
||||
}
|
||||
|
||||
{
|
||||
unsigned long i,counted=0;
|
||||
uint32 i,counted=0;
|
||||
char* x=map+5*4+size_of_string_table+attribute_count*8;
|
||||
for (i=0; i<record_count; ++i) {
|
||||
uint32 j,k;
|
||||
@@ -92,11 +101,15 @@ int main(int argc,char* argv[]) {
|
||||
if (wanted==dn) {
|
||||
uint32_unpack(x+8,&k);
|
||||
mstorage_add(&idx,(char*)&k,4);
|
||||
if (fastindex)
|
||||
mstorage_add(&idx,(char*)&i,4);
|
||||
++counted;
|
||||
x+=j*8;
|
||||
} else if (wanted==objectClass) {
|
||||
uint32_unpack(x+12,&k);
|
||||
mstorage_add(&idx,(char*)&k,4);
|
||||
if (fastindex)
|
||||
mstorage_add(&idx,(char*)&i,4);
|
||||
++counted;
|
||||
x+=j*8;
|
||||
} else {
|
||||
@@ -106,6 +119,8 @@ int main(int argc,char* argv[]) {
|
||||
if (k==wanted) {
|
||||
uint32_unpack(x+4,&k);
|
||||
mstorage_add(&idx,(char*)&k,4);
|
||||
if (fastindex)
|
||||
mstorage_add(&idx,(char*)&i,4);
|
||||
++counted;
|
||||
}
|
||||
x+=8;
|
||||
@@ -114,10 +129,10 @@ int main(int argc,char* argv[]) {
|
||||
}
|
||||
buffer_putulong(buffer_1,counted);
|
||||
buffer_putsflush(buffer_1," entries to be sorted...");
|
||||
if (argc>3)
|
||||
qsort(idx.root,counted,4,compari);
|
||||
if (ignorecase)
|
||||
qsort(idx.root,counted,4*(fastindex+1),compari);
|
||||
else
|
||||
qsort(idx.root,counted,4,compar);
|
||||
qsort(idx.root,counted,4*(fastindex+1),compar);
|
||||
buffer_putsflush(buffer_1," done.\n");
|
||||
munmap(map,filelen);
|
||||
{
|
||||
@@ -126,22 +141,28 @@ int main(int argc,char* argv[]) {
|
||||
buffer_putsflush(buffer_2,"could not re-open database file read-write\n");
|
||||
exit(1);
|
||||
}
|
||||
ftruncate(fd,filelen+(counted+3)*4);
|
||||
map=mmap(0,filelen+(counted+3)*4,PROT_WRITE,MAP_SHARED,fd,0);
|
||||
ftruncate(fd,filelen+(counted+3)*4*(fastindex+1));
|
||||
map=mmap(0,filelen+(counted+3)*4*(fastindex+1),PROT_WRITE,MAP_SHARED,fd,0);
|
||||
if (map==(char*)-1) {
|
||||
buffer_putsflush(buffer_2,"could not mmap database file read-write\n");
|
||||
exit(1);
|
||||
}
|
||||
uint32_pack(map+casesensitive,argc>3?1:0);
|
||||
uint32_pack(map+filelen,0);
|
||||
uint32_pack(map+filelen+4,filelen+(counted+3)*4);
|
||||
uint32_pack(map+casesensitive,ignorecase);
|
||||
uint32_pack(map+filelen,fastindex);
|
||||
uint32_pack(map+filelen+4,filelen+(counted+3)*4*(fastindex+1));
|
||||
uint32_pack(map+filelen+8,wanted);
|
||||
{
|
||||
char* x=map+filelen+12;
|
||||
unsigned long i;
|
||||
for (i=0; i<counted; ++i) {
|
||||
uint32_pack(x,((uint32*)idx.root)[i]);
|
||||
x+=4;
|
||||
if (fastindex) {
|
||||
uint32_pack(x,((uint32*)idx.root)[i*2]);
|
||||
uint32_pack(x+4,((uint32*)idx.root)[i*2+1]);
|
||||
x+=8;
|
||||
} else {
|
||||
uint32_pack(x,((uint32*)idx.root)[i]);
|
||||
x+=4;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -94,8 +94,13 @@ int main(int argc,char* argv[]) {
|
||||
case 0:
|
||||
buffer_puts(buffer_1,"sorted table");
|
||||
break;
|
||||
case 1:
|
||||
buffer_puts(buffer_1,"sorted table with record pointer");
|
||||
break;
|
||||
default:
|
||||
buffer_puts(buffer_1,"unknown");
|
||||
buffer_puts(buffer_1,"unknown (");
|
||||
buffer_putulong(buffer_1,index_type);
|
||||
buffer_puts(buffer_1,")");
|
||||
break;
|
||||
}
|
||||
buffer_puts(buffer_1,"\nnext: ");
|
||||
|
||||
75
tinyldap.c
75
tinyldap.c
@@ -200,7 +200,7 @@ static int indexable(struct Filter* f) {
|
||||
index_type=uint32_read(map+ofs);
|
||||
next=uint32_read(map+ofs+4);
|
||||
indexed_attribute=uint32_read(map+ofs+8);
|
||||
if (index_type==0)
|
||||
if (index_type<=1)
|
||||
if (!matchstring(&f->ava.desc,map+indexed_attribute))
|
||||
return 1;
|
||||
ofs=next;
|
||||
@@ -229,23 +229,50 @@ static long findrec(uint32 dat) {
|
||||
uint32* records=(uint32*)(map+indices_offset);
|
||||
uint32 bottom=0;
|
||||
uint32 top=record_count-1;
|
||||
#ifdef DEBUG
|
||||
buffer_puts(buffer_2,"findrec(");
|
||||
buffer_putulong(buffer_2,dat);
|
||||
buffer_putsflush(buffer_2,")... ");
|
||||
#endif
|
||||
while ((top>=bottom)) {
|
||||
uint32 mid=(top+bottom)/2;
|
||||
uint32 l;
|
||||
|
||||
l=uint32_read(map+uint32_read((char*)(&records[mid]))+8);
|
||||
#if 0
|
||||
buffer_puts(buffer_2,"findrec match[");
|
||||
buffer_putulong(buffer_2,bottom);
|
||||
buffer_puts(buffer_2,"..");
|
||||
buffer_putulong(buffer_2,top);
|
||||
buffer_puts(buffer_2,"]: ");
|
||||
buffer_putulong(buffer_2,l);
|
||||
buffer_puts(buffer_2," <-> ");
|
||||
buffer_putulong(buffer_2,dat);
|
||||
buffer_putsflush(buffer_2,": ");
|
||||
#endif
|
||||
if (l<=dat) {
|
||||
if (mid>=record_count-1)
|
||||
l=uint32_read(map+uint32_read((char*)(&records[0]))+12);
|
||||
else
|
||||
l=uint32_read(map+uint32_read((char*)(&records[mid+1]))+8);
|
||||
if (l>dat) return mid; /* found! */
|
||||
if (l>dat) {
|
||||
#if 0
|
||||
buffer_putsflush(buffer_2,"found!\n");
|
||||
#endif
|
||||
#ifdef DEBUG
|
||||
buffer_putsflush(buffer_2,"done!\n");
|
||||
#endif
|
||||
return mid; /* found! */
|
||||
}
|
||||
bottom=mid+1;
|
||||
} else
|
||||
if (mid)
|
||||
top=mid-1;
|
||||
else
|
||||
break;
|
||||
#if 0
|
||||
buffer_putsflush(buffer_2,"nope :-(\n");
|
||||
#endif
|
||||
}
|
||||
buffer_putsflush(buffer_2,"findrec failed!\n");
|
||||
return -1;
|
||||
@@ -277,7 +304,7 @@ static inline int isset(unsigned long* r,unsigned long bit) {
|
||||
* for all records that match the value in s. Set the corresponding
|
||||
* bits to 1 in bitfield. */
|
||||
static void tagmatches(uint32* index,unsigned int elements,struct string* s,
|
||||
unsigned long* bitfield,int (*match)(struct string* s,const char* c)) {
|
||||
unsigned long* bitfield,int (*match)(struct string* s,const char* c),uint32 index_type) {
|
||||
uint32 bottom=0;
|
||||
uint32 top=elements;
|
||||
emptyset(bitfield);
|
||||
@@ -287,7 +314,7 @@ static void tagmatches(uint32* index,unsigned int elements,struct string* s,
|
||||
uint32 k;
|
||||
int l;
|
||||
|
||||
k=uint32_read((char*)(&index[mid]));
|
||||
k=uint32_read((char*)(&index[mid<<index_type]));
|
||||
#ifdef DEBUG
|
||||
buffer_puts(buffer_2,"match[");
|
||||
buffer_putulong(buffer_2,bottom);
|
||||
@@ -306,21 +333,39 @@ static void tagmatches(uint32* index,unsigned int elements,struct string* s,
|
||||
#ifdef DEBUG
|
||||
buffer_putsflush(buffer_2,"MATCH!\n");
|
||||
#endif
|
||||
if ((rec=findrec(k))>=0)
|
||||
if (index_type==0)
|
||||
rec=findrec(k);
|
||||
else if (index_type==1)
|
||||
rec=uint32_read((char*)(&index[(mid<<index_type)+1]));
|
||||
else {
|
||||
buffer_puts(buffer_2,"unsupported index type ");
|
||||
buffer_putulong(buffer_2,index_type);
|
||||
buffer_puts(buffer_2," in tagmatches!\n");
|
||||
return;
|
||||
}
|
||||
if (rec>=0)
|
||||
setbit(bitfield,rec);
|
||||
/* there may be multiple matches.
|
||||
* Look before and after mid, too */
|
||||
for (k=mid-1; k>0; --k) {
|
||||
m=uint32_read((char*)(&index[k]));
|
||||
m=uint32_read((char*)(&index[k<<index_type]));
|
||||
if ((l=match(s,map+m))==0) {
|
||||
if ((rec=findrec(m))>=0)
|
||||
if (index_type==0)
|
||||
rec=findrec(m);
|
||||
else if (index_type==1)
|
||||
rec=uint32_read((char*)(&index[(k<<index_type)+1]));
|
||||
if (rec>=0)
|
||||
setbit(bitfield,rec);
|
||||
} else break;
|
||||
}
|
||||
for (k=mid+1; k<elements; ++k) {
|
||||
m=uint32_read((char*)(&index[k]));
|
||||
m=uint32_read((char*)(&index[k<<index_type]));
|
||||
if ((l=match(s,map+m))==0) {
|
||||
if ((rec=findrec(m))>=0)
|
||||
if (index_type==0)
|
||||
rec=findrec(m);
|
||||
else if (index_type==1)
|
||||
rec=uint32_read((char*)(&index[(k<<index_type)+1]));
|
||||
if (rec>=0)
|
||||
setbit(bitfield,rec);
|
||||
} else break;
|
||||
}
|
||||
@@ -400,10 +445,10 @@ static int useindex(struct Filter* f,unsigned long* bitfield) {
|
||||
index_type=uint32_read(map+ofs);
|
||||
next=uint32_read(map+ofs+4);
|
||||
indexed_attribute=uint32_read(map+ofs+8);
|
||||
if (index_type==0)
|
||||
if (index_type<=1)
|
||||
if (!matchstring(&f->ava.desc,map+indexed_attribute)) {
|
||||
tagmatches((uint32*)(map+ofs+12),(next-ofs-12)/4,&f->substrings->s,bitfield,
|
||||
f->attrflag&1?matchcaseprefix:matchprefix);
|
||||
tagmatches((uint32*)(map+ofs+12),(next-ofs-12)/(4<<index_type),&f->substrings->s,bitfield,
|
||||
f->attrflag&1?matchcaseprefix:matchprefix,index_type);
|
||||
return 1;
|
||||
}
|
||||
ofs=next;
|
||||
@@ -433,10 +478,10 @@ static int useindex(struct Filter* f,unsigned long* bitfield) {
|
||||
index_type=uint32_read(map+ofs);
|
||||
next=uint32_read(map+ofs+4);
|
||||
indexed_attribute=uint32_read(map+ofs+8);
|
||||
if (index_type==0)
|
||||
if (index_type<=1)
|
||||
if (!matchstring(&f->ava.desc,map+indexed_attribute)) {
|
||||
tagmatches((uint32*)(map+ofs+12),(next-ofs-12)/4,&f->ava.value,bitfield,
|
||||
f->attrflag&1?matchcasestring:matchstring);
|
||||
tagmatches((uint32*)(map+ofs+12),(next-ofs-12)/(4<<index_type),&f->ava.value,bitfield,
|
||||
f->attrflag&1?matchcasestring:matchstring,index_type);
|
||||
return 1;
|
||||
}
|
||||
ofs=next;
|
||||
|
||||
Reference in New Issue
Block a user