indexed substrings (prefixes only, for now), and untested AND and OR
support.
This commit is contained in:
3
FORMAT
3
FORMAT
@@ -25,7 +25,8 @@ All integers are stored LITTLE ENDIAN.
|
|||||||
char string_table[size_of_string_table];
|
char string_table[size_of_string_table];
|
||||||
uint32_t attribute_names[attribute_count];
|
uint32_t attribute_names[attribute_count];
|
||||||
uint32_t attribute_flags[attribute_count]; /* 1: match case insensitively */
|
uint32_t attribute_flags[attribute_count]; /* 1: match case insensitively */
|
||||||
uint32_t records[record_count][];
|
uint32_t records[record_count][]; /* in the same order as the records
|
||||||
|
are physically on disk */
|
||||||
/* indices_offset points here */
|
/* indices_offset points here */
|
||||||
uint32_t record_index[record_count];
|
uint32_t record_index[record_count];
|
||||||
struct {
|
struct {
|
||||||
|
|||||||
5
Makefile
5
Makefile
@@ -1,4 +1,4 @@
|
|||||||
#DEBUG=1
|
DEBUG=1
|
||||||
|
|
||||||
all: t1 t2 parse dumpidx addindex bindrequest tinyldap tinyldap_standalone tinyldap_debug ldapclient ldapclient_str # t
|
all: t1 t2 parse dumpidx addindex bindrequest tinyldap tinyldap_standalone tinyldap_debug ldapclient ldapclient_str # t
|
||||||
|
|
||||||
@@ -15,7 +15,8 @@ scan_ldapstring.o scan_ldapsearchfilter.o scan_ldapsearchrequest.o \
|
|||||||
freefilter.o freeava.o scan_ldapava.o fmt_ldapsearchresultentry.o \
|
freefilter.o freeava.o scan_ldapava.o fmt_ldapsearchresultentry.o \
|
||||||
fmt_ldapstring.o freepal.o scan_ldapsearchresultentry.o \
|
fmt_ldapstring.o freepal.o scan_ldapsearchresultentry.o \
|
||||||
fmt_ldapresult.o fmt_ldappal.o fmt_ldapadl.o fmt_ldapava.o \
|
fmt_ldapresult.o fmt_ldappal.o fmt_ldapadl.o fmt_ldapava.o \
|
||||||
fmt_ldapsearchfilter.o fmt_ldapsearchrequest.o matchstring.o
|
fmt_ldapsearchfilter.o fmt_ldapsearchrequest.o matchstring.o \
|
||||||
|
matchprefix.o
|
||||||
|
|
||||||
ldif.a: ldif_parse.o ldap_match.o ldap_match_mapped.o
|
ldif.a: ldif_parse.o ldap_match.o ldap_match_mapped.o
|
||||||
|
|
||||||
|
|||||||
1
ldap.h
1
ldap.h
@@ -7,6 +7,7 @@ struct string {
|
|||||||
};
|
};
|
||||||
|
|
||||||
int matchstring(struct string* s,const char* c);
|
int matchstring(struct string* s,const char* c);
|
||||||
|
int matchprefix(struct string* s,const char* c);
|
||||||
|
|
||||||
struct AttributeValueAssertion {
|
struct AttributeValueAssertion {
|
||||||
struct string desc, value;
|
struct string desc, value;
|
||||||
|
|||||||
18
matchprefix.c
Normal file
18
matchprefix.c
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
#include "byte.h"
|
||||||
|
#include "ldif.h"
|
||||||
|
|
||||||
|
/* behave like strcmp, but also return 0 if s is a prefix of c. */
|
||||||
|
int matchprefix(struct string* s,const char* c) {
|
||||||
|
unsigned int l,l1,i;
|
||||||
|
if (!c) return -1;
|
||||||
|
l1=l=strlen(c);
|
||||||
|
if (s->l<l1) l1=s->l;
|
||||||
|
i=byte_diff(s->s,l1,c);
|
||||||
|
if (i) return i;
|
||||||
|
/* one is a prefix of the other */
|
||||||
|
if (l==s->l) return 0;
|
||||||
|
if (c[l1]) /* is c the longer string? */
|
||||||
|
return 0;
|
||||||
|
return -(int)(s->s[l1]);
|
||||||
|
}
|
||||||
|
|
||||||
172
tinyldap.c
172
tinyldap.c
@@ -23,16 +23,17 @@ uint32 record_set_length;
|
|||||||
|
|
||||||
#define BUFSIZE 8192
|
#define BUFSIZE 8192
|
||||||
|
|
||||||
|
/* find out whether this filter can be accelerated with the indices */
|
||||||
static int indexable(struct Filter* f) {
|
static int indexable(struct Filter* f) {
|
||||||
struct Filter* y=f->x;
|
struct Filter* y=f->x;
|
||||||
if (!f) return 1;
|
if (!f) return 0;
|
||||||
switch (f->type) {
|
switch (f->type) {
|
||||||
case AND:
|
case AND:
|
||||||
while (y) {
|
while (y) {
|
||||||
if (!indexable(y)) return 0;
|
if (indexable(y)) return 1;
|
||||||
y=y->next;
|
y=y->next;
|
||||||
}
|
}
|
||||||
return 1;
|
return 0;
|
||||||
case OR:
|
case OR:
|
||||||
while (y) {
|
while (y) {
|
||||||
if (!indexable(y)) return 0;
|
if (!indexable(y)) return 0;
|
||||||
@@ -67,6 +68,18 @@ static int indexable(struct Filter* f) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* each record can have more than one attribute with the same name, i.e.
|
||||||
|
* two email addresses. Thus, the index can't just be a sorted list of
|
||||||
|
* pointers the records (because a record with two email addresses needs
|
||||||
|
* to be in the index twice, once for each email address). So our index
|
||||||
|
* is a sorted list of pointers to the attributes. Thus, a look-up in
|
||||||
|
* the index does not yield the record but the attribute. We need to be
|
||||||
|
* able to find the record for a given attribute. To do that, we
|
||||||
|
* exploit the fact that the strings in the string table are in the same
|
||||||
|
* order as the records, so we can do a binary search over the record
|
||||||
|
* table to find the record with the attribute. This does not work for
|
||||||
|
* objectClass, because the classes are stored in a different string
|
||||||
|
* table to remove duplicates. */
|
||||||
/* find record given a data pointer */
|
/* find record given a data pointer */
|
||||||
static uint32 findrec(uint32 dat) {
|
static uint32 findrec(uint32 dat) {
|
||||||
uint32* records=(uint32*)(map+indices_offset);
|
uint32* records=(uint32*)(map+indices_offset);
|
||||||
@@ -77,17 +90,6 @@ static uint32 findrec(uint32 dat) {
|
|||||||
uint32 k,l;
|
uint32 k,l;
|
||||||
uint32_unpack(&records[mid],&k);
|
uint32_unpack(&records[mid],&k);
|
||||||
uint32_unpack(map+k+8,&l);
|
uint32_unpack(map+k+8,&l);
|
||||||
|
|
||||||
#if 0
|
|
||||||
buffer_puts(buffer_2,"findrec: look for ");
|
|
||||||
buffer_putulong(buffer_2,dat);
|
|
||||||
buffer_puts(buffer_2," in record ");
|
|
||||||
buffer_putulong(buffer_2,mid);
|
|
||||||
buffer_puts(buffer_2," @");
|
|
||||||
buffer_putulong(buffer_2,l);
|
|
||||||
buffer_putsflush(buffer_2,".\n");
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (l<dat) {
|
if (l<dat) {
|
||||||
if (mid<record_count) {
|
if (mid<record_count) {
|
||||||
uint32_unpack(&records[mid+1],&k);
|
uint32_unpack(&records[mid+1],&k);
|
||||||
@@ -108,124 +110,121 @@ static uint32 findrec(uint32 dat) {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* basic bit-set support: set all bits to zero */
|
||||||
static inline void emptyset(unsigned long* r) {
|
static inline void emptyset(unsigned long* r) {
|
||||||
unsigned long i;
|
unsigned long i;
|
||||||
for (i=0; i<record_set_length; ++i) r[i]=0;
|
for (i=0; i<record_set_length; ++i) r[i]=0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* basic bit-set support: set all bits to zero */
|
||||||
|
static inline void fillset(unsigned long* r) {
|
||||||
|
unsigned long i;
|
||||||
|
for (i=0; i<record_set_length; ++i) r[i]=(unsigned long)-1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* basic bit-set support: set one bit to 1 */
|
||||||
static inline void setbit(unsigned long* r,unsigned long bit) {
|
static inline void setbit(unsigned long* r,unsigned long bit) {
|
||||||
r[bit/(8*sizeof(long))] |= (1<<(bit&(8*sizeof(long)-1)));
|
r[bit/(8*sizeof(long))] |= (1<<(bit&(8*sizeof(long)-1)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* basic bit-set support: see if given bit is set */
|
||||||
static inline int isset(unsigned long* r,unsigned long bit) {
|
static inline int isset(unsigned long* r,unsigned long bit) {
|
||||||
return r[bit/(8*sizeof(long))] & (1<<(bit&(8*sizeof(long)-1)));
|
return r[bit/(8*sizeof(long))] & (1<<(bit&(8*sizeof(long)-1)));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* find record given a data pointer */
|
/* use index (sorted table of offsets to records) to do a binary search
|
||||||
static void tagmatches(uint32* index,unsigned int elements,struct string* s,unsigned long* bitfield) {
|
* for all records that match the value in s. Set the corresponding
|
||||||
|
* bits to 1 in bitfield. */
|
||||||
|
static void tagmatches(uint32* index,unsigned int elements,struct string* s,
|
||||||
|
unsigned long* bitfield,int (*match)(struct string* s,const char* c)) {
|
||||||
uint32 bottom=0;
|
uint32 bottom=0;
|
||||||
uint32 top=elements;
|
uint32 top=elements;
|
||||||
emptyset(bitfield);
|
emptyset(bitfield);
|
||||||
|
|
||||||
#if 0
|
|
||||||
{
|
|
||||||
long i,l;
|
|
||||||
for (i=0; i<elements; ++i) {
|
|
||||||
uint32 k;
|
|
||||||
uint32_unpack(&index[i],&k);
|
|
||||||
if ((l=matchstring(s,map+k))==0) {
|
|
||||||
buffer_puts(buffer_2,"found ");
|
|
||||||
buffer_puts(buffer_2,map+k);
|
|
||||||
buffer_putsflush(buffer_2,"\n");
|
|
||||||
}
|
|
||||||
if (i+1<elements) {
|
|
||||||
uint32 m;
|
|
||||||
uint32_unpack(&index[i+1],&m);
|
|
||||||
if (strcmp(map+k,map+m)>0)
|
|
||||||
buffer_putsflush(buffer_2,"not properly sorted!\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
while ((top>=bottom)) {
|
while ((top>=bottom)) {
|
||||||
uint32 mid=(top+bottom)/2;
|
uint32 mid=(top+bottom)/2;
|
||||||
uint32 k;
|
uint32 k;
|
||||||
int l;
|
int l;
|
||||||
|
|
||||||
#if 0
|
|
||||||
buffer_puts(buffer_2,"bottom=");
|
|
||||||
buffer_putulong(buffer_2,bottom);
|
|
||||||
buffer_puts(buffer_2,", mid=");
|
|
||||||
buffer_putulong(buffer_2,mid);
|
|
||||||
buffer_puts(buffer_2,", top=");
|
|
||||||
buffer_putulong(buffer_2,top);
|
|
||||||
buffer_puts(buffer_2,", elements=");
|
|
||||||
buffer_putulong(buffer_2,elements);
|
|
||||||
buffer_putsflush(buffer_2,".\n");
|
|
||||||
#endif
|
|
||||||
|
|
||||||
uint32_unpack(&index[mid],&k);
|
uint32_unpack(&index[mid],&k);
|
||||||
if ((l=matchstring(s,map+k))==0) {
|
if ((l=match(s,map+k))==0) {
|
||||||
/* match! */
|
/* match! */
|
||||||
uint32 rec;
|
uint32 rec;
|
||||||
uint32 oldk=k;
|
uint32 m;
|
||||||
if ((rec=findrec(k)))
|
if ((rec=findrec(k)))
|
||||||
setbit(bitfield,rec);
|
setbit(bitfield,rec);
|
||||||
/* there may be multiple matches.
|
/* there may be multiple matches.
|
||||||
* Look before and after mid, too */
|
* Look before and after mid, too */
|
||||||
for (oldk=k; k>0; ) {
|
for (k=mid-1; k>0; --k) {
|
||||||
k-=4;
|
uint32_unpack(&index[k],&m);
|
||||||
if ((l=matchstring(s,map+k))==0) {
|
if ((l=match(s,map+m))==0) {
|
||||||
if ((rec=findrec(k)))
|
if ((rec=findrec(m)))
|
||||||
setbit(bitfield,rec);
|
setbit(bitfield,rec);
|
||||||
} else break;
|
} else break;
|
||||||
}
|
}
|
||||||
for (k=oldk; k<elements; ++k) {
|
for (k=mid+1; k<elements; ++k) {
|
||||||
k+=4;
|
uint32_unpack(&index[k],&m);
|
||||||
if ((l=matchstring(s,map+k))==0) {
|
if ((l=match(s,map+m))==0) {
|
||||||
if ((rec=findrec(k)))
|
if ((rec=findrec(m)))
|
||||||
setbit(bitfield,rec);
|
setbit(bitfield,rec);
|
||||||
} else break;
|
} else break;
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
#if 0
|
|
||||||
buffer_puts(buffer_2," \"");
|
|
||||||
buffer_put(buffer_2,s->s,s->l);
|
|
||||||
buffer_puts(buffer_2,"\" vs. \"");
|
|
||||||
buffer_puts(buffer_2,map+k);
|
|
||||||
buffer_puts(buffer_2," -> ");
|
|
||||||
buffer_putlong(buffer_2,l);
|
|
||||||
buffer_putsflush(buffer_2,"\n");
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (l<0) {
|
if (l<0) {
|
||||||
if (mid)
|
if (mid)
|
||||||
top=mid-1;
|
top=mid-1;
|
||||||
else
|
else
|
||||||
break;
|
break; /* since our offsets are unsigned, we need to avoid the -1 case */
|
||||||
} else
|
} else
|
||||||
bottom=mid+1;
|
bottom=mid+1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Use the indices to answer a query with the given filter.
|
||||||
|
* For all matching records, set the corresponding bit to 1 in bitfield.
|
||||||
|
* Note that this match can be approximate. Before answering, the
|
||||||
|
* matches are verified with ldap_match_mapped, so the index can also
|
||||||
|
* be used if it only helps eliminate some of the possible matches (for
|
||||||
|
* example an AND query where only one of the involved attributes has an
|
||||||
|
* index). */
|
||||||
static int useindex(struct Filter* f,unsigned long* bitfield) {
|
static int useindex(struct Filter* f,unsigned long* bitfield) {
|
||||||
struct Filter* y=f->x;
|
struct Filter* y=f->x;
|
||||||
if (!f) return 1;
|
if (!f) return 1;
|
||||||
switch (f->type) {
|
switch (f->type) {
|
||||||
case AND:
|
case AND:
|
||||||
while (y) {
|
{
|
||||||
if (!indexable(y)) return 0;
|
unsigned long* tmp=alloca(record_set_length*sizeof(unsigned long));
|
||||||
y=y->next;
|
int ok=0;
|
||||||
|
fillset(bitfield);
|
||||||
|
while (y) {
|
||||||
|
if (useindex(y,tmp)) {
|
||||||
|
unsigned int i;
|
||||||
|
for (i=0; i<record_set_length; ++i)
|
||||||
|
bitfield[i] &= tmp[i];
|
||||||
|
ok=1;
|
||||||
|
}
|
||||||
|
y=y->next;
|
||||||
|
}
|
||||||
|
return ok;
|
||||||
}
|
}
|
||||||
return 1;
|
|
||||||
case OR:
|
case OR:
|
||||||
while (y) {
|
{
|
||||||
if (!indexable(y)) return 0;
|
unsigned long* tmp=alloca(record_set_length*sizeof(unsigned long));
|
||||||
y=y->next;
|
int ok=1;
|
||||||
|
emptyset(bitfield);
|
||||||
|
while (y) {
|
||||||
|
if (useindex(y,tmp)) {
|
||||||
|
unsigned int i;
|
||||||
|
for (i=0; i<record_set_length; ++i)
|
||||||
|
bitfield[i] |= tmp[i];
|
||||||
|
} else
|
||||||
|
ok=0;
|
||||||
|
y=y->next;
|
||||||
|
}
|
||||||
|
return ok;
|
||||||
}
|
}
|
||||||
return 1;
|
|
||||||
#if 0
|
#if 0
|
||||||
/* doesn't make much sense to try to speed up negated queries */
|
/* doesn't make much sense to try to speed up negated queries */
|
||||||
case NOT:
|
case NOT:
|
||||||
@@ -233,7 +232,22 @@ static int useindex(struct Filter* f,unsigned long* bitfield) {
|
|||||||
#endif
|
#endif
|
||||||
case SUBSTRING:
|
case SUBSTRING:
|
||||||
if (f->substrings->substrtype!=prefix) return 0;
|
if (f->substrings->substrtype!=prefix) return 0;
|
||||||
/* fall through */
|
{
|
||||||
|
uint32 ofs;
|
||||||
|
for (ofs=indices_offset+record_count*4; ofs<(unsigned long)filelen;) {
|
||||||
|
uint32 index_type,next,indexed_attribute;
|
||||||
|
uint32_unpack(map+ofs,&index_type);
|
||||||
|
uint32_unpack(map+ofs+4,&next);
|
||||||
|
uint32_unpack(map+ofs+8,&indexed_attribute);
|
||||||
|
if (index_type==0)
|
||||||
|
if (!matchstring(&f->ava.desc,map+indexed_attribute)) {
|
||||||
|
tagmatches((uint32*)(map+ofs+12),(next-ofs-12)/4,&f->substrings->s,bitfield,matchprefix);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
ofs=next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
case EQUAL:
|
case EQUAL:
|
||||||
{
|
{
|
||||||
uint32 ofs;
|
uint32 ofs;
|
||||||
@@ -244,7 +258,7 @@ static int useindex(struct Filter* f,unsigned long* bitfield) {
|
|||||||
uint32_unpack(map+ofs+8,&indexed_attribute);
|
uint32_unpack(map+ofs+8,&indexed_attribute);
|
||||||
if (index_type==0)
|
if (index_type==0)
|
||||||
if (!matchstring(&f->ava.desc,map+indexed_attribute)) {
|
if (!matchstring(&f->ava.desc,map+indexed_attribute)) {
|
||||||
tagmatches((uint32*)(map+ofs+12),(next-ofs-12)/4,&f->ava.value,bitfield);
|
tagmatches((uint32*)(map+ofs+12),(next-ofs-12)/4,&f->ava.value,bitfield,matchstring);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
ofs=next;
|
ofs=next;
|
||||||
|
|||||||
Reference in New Issue
Block a user