From 0221bf98a981f7ed6ce9406016820be3b4a6c1fe Mon Sep 17 00:00:00 2001 From: leitner Date: Wed, 3 Apr 2002 23:53:12 +0000 Subject: [PATCH] add case sensitive matching --- Makefile | 6 +- addindex.c | 27 +++++-- byte_case_diff.c | 14 ++++ case.h | 3 + dumpidx.c | 2 + ldap.h | 8 ++ ldap_match_mapped.c | 63 ++++++++++----- matchcaseprefix.c | 18 +++++ matchcasestring.c | 18 +++++ tinyldap.c | 183 ++++++++++++++++++++++++++++++++++++++++++-- 10 files changed, 307 insertions(+), 35 deletions(-) create mode 100644 byte_case_diff.c create mode 100644 case.h create mode 100644 matchcaseprefix.c create mode 100644 matchcasestring.c diff --git a/Makefile b/Makefile index 2815524..a058fb8 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,8 @@ freefilter.o freeava.o scan_ldapava.o fmt_ldapsearchresultentry.o \ fmt_ldapstring.o freepal.o scan_ldapsearchresultentry.o \ fmt_ldapresult.o fmt_ldappal.o fmt_ldapadl.o fmt_ldapava.o \ fmt_ldapsearchfilter.o fmt_ldapsearchrequest.o matchstring.o \ -matchprefix.o scan_ldapmodifyrequest.o +matchprefix.o byte_case_diff.o matchcasestring.o matchcaseprefix.o \ +scan_ldapmodifyrequest.o ldif.a: ldif_parse.o ldap_match.o ldap_match_mapped.o @@ -42,9 +43,8 @@ endif t1 parse: ldif.a storage.a t2: ldap.a asn1.a t3 t4 t5 addindex: storage.a -bindrequest tinyldap tinyldap_standalone tinyldap_debug ldapclient ldapclient_str: ldap.a asn1.a - tinyldap tinyldap_standalone tinyldap_debug: ldif.a storage.a +bindrequest tinyldap tinyldap_standalone tinyldap_debug ldapclient ldapclient_str: ldap.a asn1.a tinyldap_standalone: tinyldap.c $(DIET) $(CC) $(CFLAGS) -DSTANDALONE -o $@ $^ -lowfat diff --git a/addindex.c b/addindex.c index 7e6ff62..bdfb81b 100644 --- a/addindex.c +++ b/addindex.c @@ -2,6 +2,7 @@ #include #include #include +#include #include "buffer.h" #include "mmap.h" #include "uint32.h" @@ -12,17 +13,21 @@ char* map; int compar(const void* a,const void* b) { return strcmp(map+*(uint32*)a,map+*(uint32*)b); -// return *(uint32*)b - *(uint32*)a; +} + +int compari(const void* a,const void* b) { + return strcasecmp(map+*(uint32*)a,map+*(uint32*)b); } int main(int argc,char* argv[]) { long filelen; - char* filename=argv[1]?argv[1]:"data"; + char* filename=argv[1]; uint32 magic,attribute_count,record_count,indices_offset,size_of_string_table; - uint32 wanted,dn,objectClass; + uint32 wanted,casesensitive,dn,objectClass; if (argc<3) { - buffer_putsflush(buffer_2,"usage: ./addindex filename attribute\n"); + buffer_putsflush(buffer_2,"usage: ./addindex filename attribute [i]\n" + "if i is present, make index case insensitive.\n"); return 1; } map=mmap_read(filename,&filelen); @@ -45,7 +50,13 @@ int main(int argc,char* argv[]) { uint32_unpack(x,&j); if (!strcmp(map+j,argv[2])) { buffer_putsflush(buffer_2,"found attribute!\n"); - wanted=j; + wanted=j; casesensitive=x+attribute_count*4-map; + uint32_unpack(map+casesensitive,&j); + if (j) { + buffer_putsflush(buffer_2,"case sensitivity flag is nonzero?!\n"); + return 1; + } + break; } else if (!strcmp(map+j,"dn")) dn=j; else if (!strcmp(map+j,"objectClass")) @@ -87,7 +98,10 @@ int main(int argc,char* argv[]) { } buffer_putulong(buffer_1,counted); buffer_putsflush(buffer_1," entries to be sorted..."); - qsort(idx.root,counted,4,compar); + if (argc>3) + qsort(idx.root,counted,4,compari); + else + qsort(idx.root,counted,4,compar); buffer_putsflush(buffer_1," done.\n"); munmap(map,filelen); { @@ -102,6 +116,7 @@ int main(int argc,char* argv[]) { buffer_putsflush(buffer_2,"could not mmap database file read-write\n"); exit(1); } + uint32_pack(map+casesensitive,argc>3?1:0); uint32_pack(map+filelen,0); uint32_pack(map+filelen+4,filelen+(counted+3)*4); uint32_pack(map+filelen+8,wanted); diff --git a/byte_case_diff.c b/byte_case_diff.c new file mode 100644 index 0000000..d596a9c --- /dev/null +++ b/byte_case_diff.c @@ -0,0 +1,14 @@ +#include + +int byte_case_diff(const void* a, unsigned int len, const void* b) { + register const char* s=a; + register const char* t=b; + register const char* u=t+len; + register int j; + j=0; + for (;;) { + if (t==u) break; if ((j=(tolower(*s)-tolower(*t)))) break; ++s; ++t; + } + return j; +} + diff --git a/case.h b/case.h new file mode 100644 index 0000000..b0fa6f5 --- /dev/null +++ b/case.h @@ -0,0 +1,3 @@ + +int byte_case_diff(const void* a, unsigned int len, const void* b); + diff --git a/dumpidx.c b/dumpidx.c index 0f4d568..8d5c09d 100644 --- a/dumpidx.c +++ b/dumpidx.c @@ -33,6 +33,8 @@ int main() { uint32 j; uint32_unpack(x,&j); buffer_puts(buffer_1,map+j); + uint32_unpack(x+attribute_count*4,&j); + if (j&1) buffer_puts(buffer_1," (case insensitive)"); buffer_putsflush(buffer_1,"\n"); x+=4; } diff --git a/ldap.h b/ldap.h index 6a06bf5..b30491f 100644 --- a/ldap.h +++ b/ldap.h @@ -1,13 +1,17 @@ #ifndef _LDAP_H #define _LDAP_H +#include "uint32.h" + struct string { unsigned long l; const char* s; }; int matchstring(struct string* s,const char* c); +int matchcasestring(struct string* s,const char* c); int matchprefix(struct string* s,const char* c); +int matchcaseprefix(struct string* s,const char* c); struct AttributeValueAssertion { struct string desc, value; @@ -35,9 +39,13 @@ struct Filter { AND=0, OR=1, NOT=2, EQUAL=3, SUBSTRING=4, GREATEQUAL=5, LESSEQUAL=6, PRESENT=7, APPROX=8, EXTENSIBLE=9 } type; struct AttributeValueAssertion ava; + uint32 attrofs; /* offset of attribute name in index */ + uint32 attrflag; /* "case sensitivity" flag from index */ struct Substring* substrings; struct AttributeDescriptionList *a; struct Filter* x,*next; + /* x is the subject of this filter (AND, OR and NOT) */ + /* next is used to form a linked list of subjects */ }; struct SearchRequest { diff --git a/ldap_match_mapped.c b/ldap_match_mapped.c index f2a5587..7b38d11 100644 --- a/ldap_match_mapped.c +++ b/ldap_match_mapped.c @@ -3,28 +3,36 @@ #include "byte.h" #include "str.h" #include "uint32.h" +#include "case.h" #include #include extern char* map; extern long filelen; extern uint32 magic,attribute_count,record_count,indices_offset,size_of_string_table; +extern uint32 dn_ofs,objectClass_ofs; -static int substringmatch(struct Substring* x,const char* attr) { +static int substringmatch(struct Substring* x,const char* attr,int ignorecase) { + int (*diff)(const void* a, unsigned int len, const void* b); + if (ignorecase) + diff=byte_case_diff; + else + diff=byte_diff; while (x) { unsigned int i; if (x->s.l>strlen(attr)) return 0; switch (x->substrtype) { case prefix: - if (byte_diff(x->s.s,x->s.l,attr)) return 0; + if (diff(x->s.s,x->s.l,attr)) return 0; found: break; case any: + if (x->s.ls.l-strlen(attr); ++i) - if (byte_equal(x->s.s+i,x->s.l,attr)) goto found; + if (!diff(x->s.s+i,x->s.l,attr)) goto found; return 0; case suffix: - if (byte_diff(x->s.s+x->s.l-strlen(attr),x->s.l,attr)) return 0; + if (diff(x->s.s+x->s.l-strlen(attr),x->s.l,attr)) return 0; } x=x->next; } @@ -52,21 +60,35 @@ int ldap_matchfilter_mapped(uint32 ofs,struct Filter* f) { return !ldap_matchfilter_mapped(ofs,y); case EQUAL: { - uint32 i=2,j,k; + uint32 i,j,k; uint32_unpack(map+ofs,&j); - if (!matchstring(&f->ava.desc,"dn")) { +// if (!matchstring(&f->ava.desc,"dn")) { + if (f->attrofs==dn_ofs) { uint32_unpack(map+ofs+8,&k); - if (!matchstring(&f->ava.value,map+k)) return 1; - } else if (!matchstring(&f->ava.desc,"objectName")) { + if (f->attrflag&1) { + if (!matchcasestring(&f->ava.value,map+k)) return 1; + } else { + if (!matchstring(&f->ava.value,map+k)) return 1; + } +// } else if (!matchstring(&f->ava.desc,"objectName")) { + } else if (f->attrofs==objectClass_ofs) { uint32_unpack(map+ofs+12,&k); - if (!matchstring(&f->ava.value,map+k)) return 1; + if (f->attrflag&1) { + if (!matchcasestring(&f->ava.value,map+k)) return 1; + } else { + if (!matchstring(&f->ava.value,map+k)) return 1; + } } for (i=2; iava.desc,map+k)) { +// if (!matchstring(&f->ava.desc,map+k)) { + if (f->attrofs==k) { uint32_unpack(map+ofs+i*8+4,&k); - if (!matchstring(&f->ava.value,map+k)) - return 1; + if (f->attrflag&1) { + if (!matchcasestring(&f->ava.value,map+k)) return 1; + } else { + if (!matchstring(&f->ava.value,map+k)) return 1; + } } } return 0; @@ -74,20 +96,23 @@ int ldap_matchfilter_mapped(uint32 ofs,struct Filter* f) { break; case SUBSTRING: { - uint32 i=2,j,k; + uint32 i,j,k; uint32_unpack(map+ofs,&j); - if (matchstring(&f->ava.desc,"dn")) { +// if (matchstring(&f->ava.desc,"dn")) { + if (f->attrofs==dn_ofs) { uint32_unpack(map+ofs+8,&k); - if (substringmatch(f->substrings,map+k)) return 1; - } else if (matchstring(&f->ava.desc,"objectName")) { + if (substringmatch(f->substrings,map+k,f->attrflag&1)) return 1; +// } else if (matchstring(&f->ava.desc,"objectName")) { + } else if (f->attrofs==objectClass_ofs) { uint32_unpack(map+ofs+12,&k); - if (substringmatch(f->substrings,map+k)) return 1; + if (substringmatch(f->substrings,map+k,f->attrflag&1)) return 1; } for (i=2; iava.desc,map+k)) { +// if (!matchstring(&f->ava.desc,map+k)) { + if (f->attrofs==k) { uint32_unpack(map+ofs+i*8+4,&k); - if (substringmatch(f->substrings,map+k)) + if (substringmatch(f->substrings,map+k,f->attrflag&1)) return 1; } } diff --git a/matchcaseprefix.c b/matchcaseprefix.c new file mode 100644 index 0000000..ed79d5d --- /dev/null +++ b/matchcaseprefix.c @@ -0,0 +1,18 @@ +#include "case.h" +#include "ldif.h" + +/* behave like strcmp, but also return 0 if s is a prefix of c. */ +int matchcaseprefix(struct string* s,const char* c) { + unsigned int l,l1,i; + if (!c) return -1; + l1=l=strlen(c); + if (s->ll; + i=byte_case_diff(s->s,l1,c); + if (i) return i; + /* one is a prefix of the other */ + if (l==s->l) return 0; + if (c[l1]) /* is c the longer string? */ + return 0; + return -(int)(s->s[l1]); +} + diff --git a/matchcasestring.c b/matchcasestring.c new file mode 100644 index 0000000..7414643 --- /dev/null +++ b/matchcasestring.c @@ -0,0 +1,18 @@ +#include "case.h" +#include "ldif.h" + +/* like matchstring, but case insensitively */ +int matchcasestring(struct string* s,const char* c) { + unsigned int l,l1,i; + if (!c) return -1; + l1=l=strlen(c); + if (s->ll; + i=byte_case_diff(s->s,l1,c); + if (i) return i; + /* one is a prefix of the other */ + if (l==s->l) return 0; + if (c[l1]) /* is c the longer string? */ + return c[l1]; + return -(int)(s->s[l1]); +} + diff --git a/tinyldap.c b/tinyldap.c index 511f0ba..e93bfbf 100644 --- a/tinyldap.c +++ b/tinyldap.c @@ -13,7 +13,8 @@ #include #endif -static int verbose=0; +static const int verbose=0; +static const int debug=1; char* map; long filelen; uint32 magic,attribute_count,record_count,indices_offset,size_of_string_table; @@ -21,8 +22,130 @@ uint32 magic,attribute_count,record_count,indices_offset,size_of_string_table; /* how many longs are needed to have one bit for each record? */ uint32 record_set_length; +/* some pre-looked-up attribute offsets to speed up ldap_match_mapped */ +uint32 dn_ofs,objectClass_ofs; + #define BUFSIZE 8192 +/* debugging support functions, adapted from t2.c */ +static void printava(struct AttributeValueAssertion* a,const char* rel) { + buffer_puts(buffer_2,"["); + buffer_put(buffer_2,a->desc.s,a->desc.l); + buffer_puts(buffer_2," "); + buffer_puts(buffer_2,rel); + buffer_puts(buffer_2," "); + buffer_put(buffer_2,a->value.s,a->value.l); + buffer_puts(buffer_2,"]"); +} + +static void printal(struct AttributeDescriptionList* a) { + while (a) { + buffer_put(buffer_2,a->a.s,a->a.l); + a=a->next; + if (a) buffer_puts(buffer_2,","); + } + if (a) buffer_puts(buffer_2,"\n"); +} + +static void printfilter(struct Filter* f) { + switch (f->type) { + case AND: + buffer_puts(buffer_2,"&("); +mergesub: + printfilter(f->x); + buffer_puts(buffer_2,")\n"); + break; + case OR: + buffer_puts(buffer_2,"|("); + goto mergesub; + break; + case NOT: + buffer_puts(buffer_2,"!("); + goto mergesub; + case EQUAL: + printava(&f->ava,"=="); + break; + case SUBSTRING: + { + struct Substring* s=f->substrings; + int first=1; + buffer_put(buffer_2,f->ava.desc.s,f->ava.desc.l); + buffer_puts(buffer_2," has "); + while (s) { + if (!first) buffer_puts(buffer_2," and "); first=0; + switch(s->substrtype) { + case prefix: buffer_puts(buffer_2,"prefix \""); break; + case any: buffer_puts(buffer_2,"substr \""); break; + case suffix: buffer_puts(buffer_2,"suffix \""); break; + } + buffer_put(buffer_2,s->s.s,s->s.l); + buffer_puts(buffer_2,"\""); + s=s->next; + } + } + break; + case GREATEQUAL: + printava(&f->ava,">="); + break; + case LESSEQUAL: + printava(&f->ava,"<="); + break; + case PRESENT: + printava(&f->ava,"\\exist"); + break; + case APPROX: + printava(&f->ava,"\\approx"); + break; + case EXTENSIBLE: + buffer_puts(buffer_2,"[extensible]"); + break; + } + if (f->next) { + buffer_puts(buffer_2,","); + printfilter(f->next); + } + buffer_flush(buffer_2); +} + +/* recursively fill in attrofs and attrflag */ +static void fixup(struct Filter* f) { + if (!f) return; + switch (f->type) { + case EQUAL: + case SUBSTRING: + case GREATEQUAL: + case LESSEQUAL: + case PRESENT: + case APPROX: + { + char* x=map+5*4+size_of_string_table; + unsigned int i; + f->attrofs=f->attrflag=0; + for (i=0; iava.desc,map+j)) { + f->attrofs=j; + uint32_unpack(x+-attribute_count*4,&f->attrflag); + break; + } + x+=4; + } + if (!f->attrofs) { + buffer_puts(buffer_2,"cannot find attribute \""); + buffer_put(buffer_2,f->ava.desc.s,f->ava.desc.l); + buffer_putsflush(buffer_2,"\"!\n"); + } + } + case AND: + case OR: + case NOT: + if (f->x) fixup(f->x); + default: + } + if (f->next) fixup(f->next); +} + /* find out whether this filter can be accelerated with the indices */ static int indexable(struct Filter* f) { struct Filter* y=f->x; @@ -241,7 +364,8 @@ static int useindex(struct Filter* f,unsigned long* bitfield) { uint32_unpack(map+ofs+8,&indexed_attribute); if (index_type==0) if (!matchstring(&f->ava.desc,map+indexed_attribute)) { - tagmatches((uint32*)(map+ofs+12),(next-ofs-12)/4,&f->substrings->s,bitfield,matchprefix); + tagmatches((uint32*)(map+ofs+12),(next-ofs-12)/4,&f->substrings->s,bitfield, + f->attrflag&1?matchcaseprefix:matchprefix); return 1; } ofs=next; @@ -258,7 +382,8 @@ static int useindex(struct Filter* f,unsigned long* bitfield) { uint32_unpack(map+ofs+8,&indexed_attribute); if (index_type==0) if (!matchstring(&f->ava.desc,map+indexed_attribute)) { - tagmatches((uint32*)(map+ofs+12),(next-ofs-12)/4,&f->ava.value,bitfield,matchstring); + tagmatches((uint32*)(map+ofs+12),(next-ofs-12)/4,&f->ava.value,bitfield, + f->attrflag&1?matchcasestring:matchstring); return 1; } ofs=next; @@ -275,7 +400,7 @@ static void answerwith(uint32 ofs,struct SearchRequest* sr,long messageid,int ou struct SearchResultEntry sre; struct PartialAttributeList** pal=&sre.attributes; - if (0) { + if (debug) { char* x=map+ofs; uint32 j,k; uint32_unpack(x,&j); @@ -443,10 +568,31 @@ int handle(int in,int out) { } #endif if ((tmp=scan_ldapsearchrequest(buf+res,buf+res+len,&sr))) { + + if (debug) { + const char* scopes[]={"baseObject","singleLevel","wholeSubtree"}; + const char* alias[]={"neverDerefAliases","derefInSearching","derefFindingBaseObj","derefAlways"}; + buffer_puts(buffer_2,"search request: baseObject \""); + buffer_put(buffer_2,sr.baseObject.s,sr.baseObject.l); + buffer_puts(buffer_2,"\", scope "); + buffer_puts(buffer_2,scopes[sr.scope]); + buffer_puts(buffer_2,", "); + buffer_puts(buffer_2,alias[sr.derefAliases]); + buffer_puts(buffer_2,"\nsize limit "); + buffer_putulong(buffer_2,sr.sizeLimit); + buffer_puts(buffer_2,", time limit "); + buffer_putulong(buffer_2,sr.timeLimit); + buffer_puts(buffer_2,"\n"); + printfilter(sr.filter); + buffer_puts(buffer_2,"attributes: "); + printal(sr.attributes); + buffer_putsflush(buffer_2,"\n\n"); + } + fixup(sr.filter); if (indexable(sr.filter)) { unsigned long* result; unsigned long i; -// buffer_putsflush(buffer_2,"query is indexable!\n"); + if (debug) buffer_putsflush(buffer_2,"query can be answered with index!\n"); record_set_length=(record_count+sizeof(unsigned long)*8-1) / (sizeof(long)*8); result=alloca(record_set_length*sizeof(unsigned long)); /* Use the index to find matching data. Put the offsets @@ -524,11 +670,15 @@ int handle(int in,int out) { exit(1); } } + case AbandonRequest: + /* do nothing */ + break; default: buffer_puts(buffer_2,"unknown request type "); buffer_putulong(buffer_2,op); buffer_putsflush(buffer_2,"\n"); - exit(1); + return 0; +// exit(1); } Len+=res; #if 0 @@ -563,6 +713,26 @@ int main() { uint32_unpack(map+3*4,&indices_offset); uint32_unpack(map+4*4,&size_of_string_table); + /* look up "dn" and "objectClass" */ + { + char* x=map+5*4+size_of_string_table; + unsigned int i; + dn_ofs=objectClass_ofs=0; + for (i=0; i