indexed substrings (prefixes only, for now), and untested AND and OR

support.
This commit is contained in:
leitner
2002-03-26 15:51:53 +00:00
parent c76ee598c0
commit 4792ff7c58
5 changed files with 117 additions and 82 deletions

3
FORMAT
View File

@@ -25,7 +25,8 @@ All integers are stored LITTLE ENDIAN.
char string_table[size_of_string_table]; char string_table[size_of_string_table];
uint32_t attribute_names[attribute_count]; uint32_t attribute_names[attribute_count];
uint32_t attribute_flags[attribute_count]; /* 1: match case insensitively */ uint32_t attribute_flags[attribute_count]; /* 1: match case insensitively */
uint32_t records[record_count][]; uint32_t records[record_count][]; /* in the same order as the records
are physically on disk */
/* indices_offset points here */ /* indices_offset points here */
uint32_t record_index[record_count]; uint32_t record_index[record_count];
struct { struct {

View File

@@ -1,4 +1,4 @@
#DEBUG=1 DEBUG=1
all: t1 t2 parse dumpidx addindex bindrequest tinyldap tinyldap_standalone tinyldap_debug ldapclient ldapclient_str # t all: t1 t2 parse dumpidx addindex bindrequest tinyldap tinyldap_standalone tinyldap_debug ldapclient ldapclient_str # t
@@ -15,7 +15,8 @@ scan_ldapstring.o scan_ldapsearchfilter.o scan_ldapsearchrequest.o \
freefilter.o freeava.o scan_ldapava.o fmt_ldapsearchresultentry.o \ freefilter.o freeava.o scan_ldapava.o fmt_ldapsearchresultentry.o \
fmt_ldapstring.o freepal.o scan_ldapsearchresultentry.o \ fmt_ldapstring.o freepal.o scan_ldapsearchresultentry.o \
fmt_ldapresult.o fmt_ldappal.o fmt_ldapadl.o fmt_ldapava.o \ fmt_ldapresult.o fmt_ldappal.o fmt_ldapadl.o fmt_ldapava.o \
fmt_ldapsearchfilter.o fmt_ldapsearchrequest.o matchstring.o fmt_ldapsearchfilter.o fmt_ldapsearchrequest.o matchstring.o \
matchprefix.o
ldif.a: ldif_parse.o ldap_match.o ldap_match_mapped.o ldif.a: ldif_parse.o ldap_match.o ldap_match_mapped.o

1
ldap.h
View File

@@ -7,6 +7,7 @@ struct string {
}; };
int matchstring(struct string* s,const char* c); int matchstring(struct string* s,const char* c);
int matchprefix(struct string* s,const char* c);
struct AttributeValueAssertion { struct AttributeValueAssertion {
struct string desc, value; struct string desc, value;

18
matchprefix.c Normal file
View File

@@ -0,0 +1,18 @@
#include "byte.h"
#include "ldif.h"
/* behave like strcmp, but also return 0 if s is a prefix of c. */
int matchprefix(struct string* s,const char* c) {
unsigned int l,l1,i;
if (!c) return -1;
l1=l=strlen(c);
if (s->l<l1) l1=s->l;
i=byte_diff(s->s,l1,c);
if (i) return i;
/* one is a prefix of the other */
if (l==s->l) return 0;
if (c[l1]) /* is c the longer string? */
return 0;
return -(int)(s->s[l1]);
}

View File

@@ -23,16 +23,17 @@ uint32 record_set_length;
#define BUFSIZE 8192 #define BUFSIZE 8192
/* find out whether this filter can be accelerated with the indices */
static int indexable(struct Filter* f) { static int indexable(struct Filter* f) {
struct Filter* y=f->x; struct Filter* y=f->x;
if (!f) return 1; if (!f) return 0;
switch (f->type) { switch (f->type) {
case AND: case AND:
while (y) { while (y) {
if (!indexable(y)) return 0; if (indexable(y)) return 1;
y=y->next; y=y->next;
} }
return 1; return 0;
case OR: case OR:
while (y) { while (y) {
if (!indexable(y)) return 0; if (!indexable(y)) return 0;
@@ -67,6 +68,18 @@ static int indexable(struct Filter* f) {
} }
} }
/* each record can have more than one attribute with the same name, i.e.
* two email addresses. Thus, the index can't just be a sorted list of
* pointers the records (because a record with two email addresses needs
* to be in the index twice, once for each email address). So our index
* is a sorted list of pointers to the attributes. Thus, a look-up in
* the index does not yield the record but the attribute. We need to be
* able to find the record for a given attribute. To do that, we
* exploit the fact that the strings in the string table are in the same
* order as the records, so we can do a binary search over the record
* table to find the record with the attribute. This does not work for
* objectClass, because the classes are stored in a different string
* table to remove duplicates. */
/* find record given a data pointer */ /* find record given a data pointer */
static uint32 findrec(uint32 dat) { static uint32 findrec(uint32 dat) {
uint32* records=(uint32*)(map+indices_offset); uint32* records=(uint32*)(map+indices_offset);
@@ -77,17 +90,6 @@ static uint32 findrec(uint32 dat) {
uint32 k,l; uint32 k,l;
uint32_unpack(&records[mid],&k); uint32_unpack(&records[mid],&k);
uint32_unpack(map+k+8,&l); uint32_unpack(map+k+8,&l);
#if 0
buffer_puts(buffer_2,"findrec: look for ");
buffer_putulong(buffer_2,dat);
buffer_puts(buffer_2," in record ");
buffer_putulong(buffer_2,mid);
buffer_puts(buffer_2," @");
buffer_putulong(buffer_2,l);
buffer_putsflush(buffer_2,".\n");
#endif
if (l<dat) { if (l<dat) {
if (mid<record_count) { if (mid<record_count) {
uint32_unpack(&records[mid+1],&k); uint32_unpack(&records[mid+1],&k);
@@ -108,124 +110,121 @@ static uint32 findrec(uint32 dat) {
return 0; return 0;
} }
/* basic bit-set support: set all bits to zero */
static inline void emptyset(unsigned long* r) { static inline void emptyset(unsigned long* r) {
unsigned long i; unsigned long i;
for (i=0; i<record_set_length; ++i) r[i]=0; for (i=0; i<record_set_length; ++i) r[i]=0;
} }
/* basic bit-set support: set all bits to zero */
static inline void fillset(unsigned long* r) {
unsigned long i;
for (i=0; i<record_set_length; ++i) r[i]=(unsigned long)-1;
}
/* basic bit-set support: set one bit to 1 */
static inline void setbit(unsigned long* r,unsigned long bit) { static inline void setbit(unsigned long* r,unsigned long bit) {
r[bit/(8*sizeof(long))] |= (1<<(bit&(8*sizeof(long)-1))); r[bit/(8*sizeof(long))] |= (1<<(bit&(8*sizeof(long)-1)));
} }
/* basic bit-set support: see if given bit is set */
static inline int isset(unsigned long* r,unsigned long bit) { static inline int isset(unsigned long* r,unsigned long bit) {
return r[bit/(8*sizeof(long))] & (1<<(bit&(8*sizeof(long)-1))); return r[bit/(8*sizeof(long))] & (1<<(bit&(8*sizeof(long)-1)));
} }
/* find record given a data pointer */ /* use index (sorted table of offsets to records) to do a binary search
static void tagmatches(uint32* index,unsigned int elements,struct string* s,unsigned long* bitfield) { * for all records that match the value in s. Set the corresponding
* bits to 1 in bitfield. */
static void tagmatches(uint32* index,unsigned int elements,struct string* s,
unsigned long* bitfield,int (*match)(struct string* s,const char* c)) {
uint32 bottom=0; uint32 bottom=0;
uint32 top=elements; uint32 top=elements;
emptyset(bitfield); emptyset(bitfield);
#if 0
{
long i,l;
for (i=0; i<elements; ++i) {
uint32 k;
uint32_unpack(&index[i],&k);
if ((l=matchstring(s,map+k))==0) {
buffer_puts(buffer_2,"found ");
buffer_puts(buffer_2,map+k);
buffer_putsflush(buffer_2,"\n");
}
if (i+1<elements) {
uint32 m;
uint32_unpack(&index[i+1],&m);
if (strcmp(map+k,map+m)>0)
buffer_putsflush(buffer_2,"not properly sorted!\n");
}
}
}
#endif
while ((top>=bottom)) { while ((top>=bottom)) {
uint32 mid=(top+bottom)/2; uint32 mid=(top+bottom)/2;
uint32 k; uint32 k;
int l; int l;
#if 0
buffer_puts(buffer_2,"bottom=");
buffer_putulong(buffer_2,bottom);
buffer_puts(buffer_2,", mid=");
buffer_putulong(buffer_2,mid);
buffer_puts(buffer_2,", top=");
buffer_putulong(buffer_2,top);
buffer_puts(buffer_2,", elements=");
buffer_putulong(buffer_2,elements);
buffer_putsflush(buffer_2,".\n");
#endif
uint32_unpack(&index[mid],&k); uint32_unpack(&index[mid],&k);
if ((l=matchstring(s,map+k))==0) { if ((l=match(s,map+k))==0) {
/* match! */ /* match! */
uint32 rec; uint32 rec;
uint32 oldk=k; uint32 m;
if ((rec=findrec(k))) if ((rec=findrec(k)))
setbit(bitfield,rec); setbit(bitfield,rec);
/* there may be multiple matches. /* there may be multiple matches.
* Look before and after mid, too */ * Look before and after mid, too */
for (oldk=k; k>0; ) { for (k=mid-1; k>0; --k) {
k-=4; uint32_unpack(&index[k],&m);
if ((l=matchstring(s,map+k))==0) { if ((l=match(s,map+m))==0) {
if ((rec=findrec(k))) if ((rec=findrec(m)))
setbit(bitfield,rec); setbit(bitfield,rec);
} else break; } else break;
} }
for (k=oldk; k<elements; ++k) { for (k=mid+1; k<elements; ++k) {
k+=4; uint32_unpack(&index[k],&m);
if ((l=matchstring(s,map+k))==0) { if ((l=match(s,map+m))==0) {
if ((rec=findrec(k))) if ((rec=findrec(m)))
setbit(bitfield,rec); setbit(bitfield,rec);
} else break; } else break;
} }
return; return;
} }
#if 0
buffer_puts(buffer_2," \"");
buffer_put(buffer_2,s->s,s->l);
buffer_puts(buffer_2,"\" vs. \"");
buffer_puts(buffer_2,map+k);
buffer_puts(buffer_2," -> ");
buffer_putlong(buffer_2,l);
buffer_putsflush(buffer_2,"\n");
#endif
if (l<0) { if (l<0) {
if (mid) if (mid)
top=mid-1; top=mid-1;
else else
break; break; /* since our offsets are unsigned, we need to avoid the -1 case */
} else } else
bottom=mid+1; bottom=mid+1;
} }
} }
/* Use the indices to answer a query with the given filter.
* For all matching records, set the corresponding bit to 1 in bitfield.
* Note that this match can be approximate. Before answering, the
* matches are verified with ldap_match_mapped, so the index can also
* be used if it only helps eliminate some of the possible matches (for
* example an AND query where only one of the involved attributes has an
* index). */
static int useindex(struct Filter* f,unsigned long* bitfield) { static int useindex(struct Filter* f,unsigned long* bitfield) {
struct Filter* y=f->x; struct Filter* y=f->x;
if (!f) return 1; if (!f) return 1;
switch (f->type) { switch (f->type) {
case AND: case AND:
while (y) { {
if (!indexable(y)) return 0; unsigned long* tmp=alloca(record_set_length*sizeof(unsigned long));
y=y->next; int ok=0;
fillset(bitfield);
while (y) {
if (useindex(y,tmp)) {
unsigned int i;
for (i=0; i<record_set_length; ++i)
bitfield[i] &= tmp[i];
ok=1;
}
y=y->next;
}
return ok;
} }
return 1;
case OR: case OR:
while (y) { {
if (!indexable(y)) return 0; unsigned long* tmp=alloca(record_set_length*sizeof(unsigned long));
y=y->next; int ok=1;
emptyset(bitfield);
while (y) {
if (useindex(y,tmp)) {
unsigned int i;
for (i=0; i<record_set_length; ++i)
bitfield[i] |= tmp[i];
} else
ok=0;
y=y->next;
}
return ok;
} }
return 1;
#if 0 #if 0
/* doesn't make much sense to try to speed up negated queries */ /* doesn't make much sense to try to speed up negated queries */
case NOT: case NOT:
@@ -233,7 +232,22 @@ static int useindex(struct Filter* f,unsigned long* bitfield) {
#endif #endif
case SUBSTRING: case SUBSTRING:
if (f->substrings->substrtype!=prefix) return 0; if (f->substrings->substrtype!=prefix) return 0;
/* fall through */ {
uint32 ofs;
for (ofs=indices_offset+record_count*4; ofs<(unsigned long)filelen;) {
uint32 index_type,next,indexed_attribute;
uint32_unpack(map+ofs,&index_type);
uint32_unpack(map+ofs+4,&next);
uint32_unpack(map+ofs+8,&indexed_attribute);
if (index_type==0)
if (!matchstring(&f->ava.desc,map+indexed_attribute)) {
tagmatches((uint32*)(map+ofs+12),(next-ofs-12)/4,&f->substrings->s,bitfield,matchprefix);
return 1;
}
ofs=next;
}
}
return 0;
case EQUAL: case EQUAL:
{ {
uint32 ofs; uint32 ofs;
@@ -244,7 +258,7 @@ static int useindex(struct Filter* f,unsigned long* bitfield) {
uint32_unpack(map+ofs+8,&indexed_attribute); uint32_unpack(map+ofs+8,&indexed_attribute);
if (index_type==0) if (index_type==0)
if (!matchstring(&f->ava.desc,map+indexed_attribute)) { if (!matchstring(&f->ava.desc,map+indexed_attribute)) {
tagmatches((uint32*)(map+ofs+12),(next-ofs-12)/4,&f->ava.value,bitfield); tagmatches((uint32*)(map+ofs+12),(next-ofs-12)/4,&f->ava.value,bitfield,matchstring);
return 1; return 1;
} }
ofs=next; ofs=next;