From 4792ff7c581eb9e0c2aed10b1e9ed91c1cfffeb5 Mon Sep 17 00:00:00 2001 From: leitner Date: Tue, 26 Mar 2002 15:51:53 +0000 Subject: [PATCH] indexed substrings (prefixes only, for now), and untested AND and OR support. --- FORMAT | 3 +- Makefile | 5 +- ldap.h | 1 + matchprefix.c | 18 ++++++ tinyldap.c | 172 +++++++++++++++++++++++++++----------------------- 5 files changed, 117 insertions(+), 82 deletions(-) create mode 100644 matchprefix.c diff --git a/FORMAT b/FORMAT index 1003263..fc3f6d2 100644 --- a/FORMAT +++ b/FORMAT @@ -25,7 +25,8 @@ All integers are stored LITTLE ENDIAN. char string_table[size_of_string_table]; uint32_t attribute_names[attribute_count]; uint32_t attribute_flags[attribute_count]; /* 1: match case insensitively */ - uint32_t records[record_count][]; + uint32_t records[record_count][]; /* in the same order as the records + are physically on disk */ /* indices_offset points here */ uint32_t record_index[record_count]; struct { diff --git a/Makefile b/Makefile index ae861e0..ac1ece0 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -#DEBUG=1 +DEBUG=1 all: t1 t2 parse dumpidx addindex bindrequest tinyldap tinyldap_standalone tinyldap_debug ldapclient ldapclient_str # t @@ -15,7 +15,8 @@ scan_ldapstring.o scan_ldapsearchfilter.o scan_ldapsearchrequest.o \ freefilter.o freeava.o scan_ldapava.o fmt_ldapsearchresultentry.o \ fmt_ldapstring.o freepal.o scan_ldapsearchresultentry.o \ fmt_ldapresult.o fmt_ldappal.o fmt_ldapadl.o fmt_ldapava.o \ -fmt_ldapsearchfilter.o fmt_ldapsearchrequest.o matchstring.o +fmt_ldapsearchfilter.o fmt_ldapsearchrequest.o matchstring.o \ +matchprefix.o ldif.a: ldif_parse.o ldap_match.o ldap_match_mapped.o diff --git a/ldap.h b/ldap.h index 39b3504..7fbec64 100644 --- a/ldap.h +++ b/ldap.h @@ -7,6 +7,7 @@ struct string { }; int matchstring(struct string* s,const char* c); +int matchprefix(struct string* s,const char* c); struct AttributeValueAssertion { struct string desc, value; diff --git a/matchprefix.c b/matchprefix.c new file mode 100644 index 0000000..20c01fc --- /dev/null +++ b/matchprefix.c @@ -0,0 +1,18 @@ +#include "byte.h" +#include "ldif.h" + +/* behave like strcmp, but also return 0 if s is a prefix of c. */ +int matchprefix(struct string* s,const char* c) { + unsigned int l,l1,i; + if (!c) return -1; + l1=l=strlen(c); + if (s->ll; + i=byte_diff(s->s,l1,c); + if (i) return i; + /* one is a prefix of the other */ + if (l==s->l) return 0; + if (c[l1]) /* is c the longer string? */ + return 0; + return -(int)(s->s[l1]); +} + diff --git a/tinyldap.c b/tinyldap.c index c075a74..c703200 100644 --- a/tinyldap.c +++ b/tinyldap.c @@ -23,16 +23,17 @@ uint32 record_set_length; #define BUFSIZE 8192 +/* find out whether this filter can be accelerated with the indices */ static int indexable(struct Filter* f) { struct Filter* y=f->x; - if (!f) return 1; + if (!f) return 0; switch (f->type) { case AND: while (y) { - if (!indexable(y)) return 0; + if (indexable(y)) return 1; y=y->next; } - return 1; + return 0; case OR: while (y) { if (!indexable(y)) return 0; @@ -67,6 +68,18 @@ static int indexable(struct Filter* f) { } } +/* each record can have more than one attribute with the same name, i.e. + * two email addresses. Thus, the index can't just be a sorted list of + * pointers the records (because a record with two email addresses needs + * to be in the index twice, once for each email address). So our index + * is a sorted list of pointers to the attributes. Thus, a look-up in + * the index does not yield the record but the attribute. We need to be + * able to find the record for a given attribute. To do that, we + * exploit the fact that the strings in the string table are in the same + * order as the records, so we can do a binary search over the record + * table to find the record with the attribute. This does not work for + * objectClass, because the classes are stored in a different string + * table to remove duplicates. */ /* find record given a data pointer */ static uint32 findrec(uint32 dat) { uint32* records=(uint32*)(map+indices_offset); @@ -77,17 +90,6 @@ static uint32 findrec(uint32 dat) { uint32 k,l; uint32_unpack(&records[mid],&k); uint32_unpack(map+k+8,&l); - -#if 0 - buffer_puts(buffer_2,"findrec: look for "); - buffer_putulong(buffer_2,dat); - buffer_puts(buffer_2," in record "); - buffer_putulong(buffer_2,mid); - buffer_puts(buffer_2," @"); - buffer_putulong(buffer_2,l); - buffer_putsflush(buffer_2,".\n"); -#endif - if (l0) - buffer_putsflush(buffer_2,"not properly sorted!\n"); - } - } - } -#endif - while ((top>=bottom)) { uint32 mid=(top+bottom)/2; uint32 k; int l; -#if 0 - buffer_puts(buffer_2,"bottom="); - buffer_putulong(buffer_2,bottom); - buffer_puts(buffer_2,", mid="); - buffer_putulong(buffer_2,mid); - buffer_puts(buffer_2,", top="); - buffer_putulong(buffer_2,top); - buffer_puts(buffer_2,", elements="); - buffer_putulong(buffer_2,elements); - buffer_putsflush(buffer_2,".\n"); -#endif - uint32_unpack(&index[mid],&k); - if ((l=matchstring(s,map+k))==0) { + if ((l=match(s,map+k))==0) { /* match! */ uint32 rec; - uint32 oldk=k; + uint32 m; if ((rec=findrec(k))) setbit(bitfield,rec); /* there may be multiple matches. * Look before and after mid, too */ - for (oldk=k; k>0; ) { - k-=4; - if ((l=matchstring(s,map+k))==0) { - if ((rec=findrec(k))) + for (k=mid-1; k>0; --k) { + uint32_unpack(&index[k],&m); + if ((l=match(s,map+m))==0) { + if ((rec=findrec(m))) setbit(bitfield,rec); } else break; } - for (k=oldk; ks,s->l); - buffer_puts(buffer_2,"\" vs. \""); - buffer_puts(buffer_2,map+k); - buffer_puts(buffer_2," -> "); - buffer_putlong(buffer_2,l); - buffer_putsflush(buffer_2,"\n"); -#endif if (l<0) { if (mid) top=mid-1; else - break; + break; /* since our offsets are unsigned, we need to avoid the -1 case */ } else bottom=mid+1; } } +/* Use the indices to answer a query with the given filter. + * For all matching records, set the corresponding bit to 1 in bitfield. + * Note that this match can be approximate. Before answering, the + * matches are verified with ldap_match_mapped, so the index can also + * be used if it only helps eliminate some of the possible matches (for + * example an AND query where only one of the involved attributes has an + * index). */ static int useindex(struct Filter* f,unsigned long* bitfield) { struct Filter* y=f->x; if (!f) return 1; switch (f->type) { case AND: - while (y) { - if (!indexable(y)) return 0; - y=y->next; + { + unsigned long* tmp=alloca(record_set_length*sizeof(unsigned long)); + int ok=0; + fillset(bitfield); + while (y) { + if (useindex(y,tmp)) { + unsigned int i; + for (i=0; inext; + } + return ok; } - return 1; case OR: - while (y) { - if (!indexable(y)) return 0; - y=y->next; + { + unsigned long* tmp=alloca(record_set_length*sizeof(unsigned long)); + int ok=1; + emptyset(bitfield); + while (y) { + if (useindex(y,tmp)) { + unsigned int i; + for (i=0; inext; + } + return ok; } - return 1; #if 0 /* doesn't make much sense to try to speed up negated queries */ case NOT: @@ -233,7 +232,22 @@ static int useindex(struct Filter* f,unsigned long* bitfield) { #endif case SUBSTRING: if (f->substrings->substrtype!=prefix) return 0; - /* fall through */ + { + uint32 ofs; + for (ofs=indices_offset+record_count*4; ofs<(unsigned long)filelen;) { + uint32 index_type,next,indexed_attribute; + uint32_unpack(map+ofs,&index_type); + uint32_unpack(map+ofs+4,&next); + uint32_unpack(map+ofs+8,&indexed_attribute); + if (index_type==0) + if (!matchstring(&f->ava.desc,map+indexed_attribute)) { + tagmatches((uint32*)(map+ofs+12),(next-ofs-12)/4,&f->substrings->s,bitfield,matchprefix); + return 1; + } + ofs=next; + } + } + return 0; case EQUAL: { uint32 ofs; @@ -244,7 +258,7 @@ static int useindex(struct Filter* f,unsigned long* bitfield) { uint32_unpack(map+ofs+8,&indexed_attribute); if (index_type==0) if (!matchstring(&f->ava.desc,map+indexed_attribute)) { - tagmatches((uint32*)(map+ofs+12),(next-ofs-12)/4,&f->ava.value,bitfield); + tagmatches((uint32*)(map+ofs+12),(next-ofs-12)/4,&f->ava.value,bitfield,matchstring); return 1; } ofs=next;