diff --git a/Makefile b/Makefile index c5e3882..33baef5 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -#DEBUG=1 +DEBUG=1 all: t1 t2 parse dumpidx idx2ldif addindex bindrequest tinyldap \ tinyldap_standalone tinyldap_debug ldapclient ldapclient_str \ @@ -28,7 +28,7 @@ ldif.a: ldif_parse.o ldap_match_mapped.o storage.a: strstorage.o strduptab.o mstorage_add.o mduptab_add.o \ bstr_diff.o mduptab_adds.o bstr_diff2.o mstorage_add_bin.o \ mstorage_init.o mstorage_init_persistent.o mstorage_unmap.o \ -mduptab_init.o +mduptab_init.o mduptab_init_reuse.o auth.a: auth.o diff --git a/dumpidx.c b/dumpidx.c index 94d2df3..41cf05a 100644 --- a/dumpidx.c +++ b/dumpidx.c @@ -2,11 +2,20 @@ #include "mmap.h" #include "uint32.h" -int main() { +int main(int argc,char* argv[]) { int verbose=0; unsigned long filelen; - char* map=mmap_read("data",&filelen); + char* fn=argc<2?"data":argv[1]; + char* map=mmap_read(fn,&filelen); uint32 magic,attribute_count,record_count,indices_offset,size_of_string_table; + if (!map) { + buffer_puts(buffer_2,"could not open `"); + buffer_puts(buffer_2,fn); + buffer_puts(buffer_2,"´: "); + buffer_puterror(buffer_2); + buffer_putnlflush(buffer_2); + exit(1); + } buffer_puts(buffer_1,"magic: "); uint32_unpack(map,&magic); uint32_unpack(map+4,&attribute_count); diff --git a/ldif_parse.c b/ldif_parse.c index aa3dda4..fd51f3a 100644 --- a/ldif_parse.c +++ b/ldif_parse.c @@ -15,9 +15,18 @@ mduptab_t attributes,classes; mstorage_t stringtable; - long dn, objectClass; +/* this is called after each record. + * If it returns -1, ldif_parse will exit immediately. + * If it returns 0, ldif_parse will continue parsing and overwrite the + * current ldaprec. + * If it returns 1, ldif_parse will allocate a new ldaprec and link it + * using the next pointer in the current ldaprec. + * If the callback is NULL, a callback that always returns 1 is assumed. + * */ +int (*ldif_parse_callback)(struct ldaprec* l); + unsigned long ldifrecords; static void addattribute(struct ldaprec** l,long name,long val) { @@ -27,7 +36,11 @@ static void addattribute(struct ldaprec** l,long name,long val) { (*l)->a[(*l)->n].value=val; ++(*l)->n; } else { - buffer_putsflush(buffer_2,"LDIF parse error: too many attributes!\n"); + buffer_puts(buffer_2,"LDIF parse error: too many attributes!:\n "); + buffer_puts(buffer_2,attributes.strings.root+name); + buffer_puts(buffer_2,"\nat dn\n "); + buffer_puts(buffer_2,(*l)->dn+stringtable.root); + buffer_putnlflush(buffer_2); exit(1); } } @@ -129,8 +142,7 @@ lookagain: if (!stralloc_catb(&payload,buf,n)) goto nomem; goto lookagain; } else if (c=='\n') { - struct ldaprec* m=malloc(sizeof(struct ldaprec)); - if (!m) return 2; + struct ldaprec* m; if (!stralloc_0(&payload)) goto nomem; if (base64) { @@ -155,11 +167,28 @@ lookagain: if ((val=mstorage_add_bin(&stringtable,payload.s,len))<0) goto nomem; addattribute(l,tmp,val); + m=0; + if (ldif_parse_callback) { + switch (ldif_parse_callback(*l)) { + case -1: + return -1; + case 0: + m=*l; + break; +#if 0 + case 1: + m=0; + break; +#endif + } + } + if (!m) if (!(m=malloc(sizeof(struct ldaprec)))) return 2; + (*l)->next=m; m->n=0; m->dn=-1; m->next=0; ofs=0; // dumprec(*l); - l=&((*l)->next); + if (*l!=m) l=&((*l)->next); ++ldifrecords; continue; } else { @@ -194,7 +223,8 @@ lookagain: addattribute(l,tmp,val); #endif } while (!eof); - if ((*l)->dn<0) { + if (ldif_parse_callback && ldif_parse_callback(*l)==-1) return -1; + if ((*l)->dn<0 && ((*l)->next)) { struct ldaprec* m=(*l)->next; free((*l)); (*l)=m; diff --git a/mduptab.h b/mduptab.h index 9bf12fd..f640450 100644 --- a/mduptab.h +++ b/mduptab.h @@ -9,8 +9,10 @@ typedef struct mduptable { mstorage_t table,strings; + mstorage_t* Strings; } mduptab_t; void mduptab_init(mduptab_t* t); +void mduptab_init_reuse(mduptab_t* t,mstorage_t* s); long mduptab_add(mduptab_t* t,const char* s,unsigned int len); long mduptab_adds(mduptab_t* t,const char* s); diff --git a/mduptab_add.c b/mduptab_add.c index 8ea661c..b72532f 100644 --- a/mduptab_add.c +++ b/mduptab_add.c @@ -10,13 +10,13 @@ long mduptab_add(mduptab_t* t,const char* s,unsigned int len) { unsigned int i; unsigned long* l=(unsigned long*)t->table.root; long x,bak; - for (i=0; istrings.used/sizeof(unsigned long); ++i) - if (bstr_equal2(t->strings.root+l[i],s,len)) + for (i=0; itable.used/sizeof(unsigned long); ++i) + if (bstr_equal2(t->Strings->root+l[i],s,len)) return l[i]; - bak=t->strings.used; - if ((x=mstorage_add_bin(&t->strings,s,len))<0) return -1; + bak=t->Strings->used; + if ((x=mstorage_add_bin(t->Strings,s,len))<0) return -1; if (mstorage_add(&t->table,(const char*)&x,sizeof(x))<0) { - t->strings.used=bak; + t->Strings->used=bak; return -1; } return x; diff --git a/mduptab_init.c b/mduptab_init.c index 582cc5a..280a3de 100644 --- a/mduptab_init.c +++ b/mduptab_init.c @@ -3,4 +3,5 @@ void mduptab_init(mduptab_t* t) { mstorage_init(&t->table); mstorage_init(&t->strings); + t->Strings=&t->strings; } diff --git a/mduptab_init_reuse.c b/mduptab_init_reuse.c new file mode 100644 index 0000000..ea22404 --- /dev/null +++ b/mduptab_init_reuse.c @@ -0,0 +1,6 @@ +#include "mduptab.h" + +void mduptab_init_reuse(mduptab_t* t,mstorage_t* s) { + mstorage_init(&t->table); + t->Strings=s; +} diff --git a/mstorage.h b/mstorage.h index 8f1ae92..ce3c481 100644 --- a/mstorage.h +++ b/mstorage.h @@ -15,7 +15,7 @@ int mstorage_init_persistent(mstorage_t* p,int fd); /* Works like strstorage_add, but will return an * offset to mstorage_root, which is mmapped and may thus change. */ -/* negative offset == error */ +/* offset -1 ==> error */ long mstorage_add(mstorage_t* p,const char* s,unsigned long n); /* undo mapping */ diff --git a/parse.c b/parse.c index 651a6da..c19f3fa 100644 --- a/parse.c +++ b/parse.c @@ -1,3 +1,5 @@ +/* This is just the main() for "parse". The actual parser is in + * ldif_parse.c */ #include #include #include @@ -11,16 +13,44 @@ #include "uint32.h" #include "byte.h" +/* these are defined in ldif_parse.c. + * We extern them here so we can initialize them. + * This was not necessary until I reworked mstorage_t to support + * persistence via a file descriptor, which needs to be -1 and not 0 if + * unused. */ extern mduptab_t attributes,classes; + /* we do a minor optimization by saving the strings of names of + * attributes and objectClass values only once. mduptab_t is the data + * structure used for this, see mduptab.h */ extern mstorage_t stringtable; + /* this is a giant string table where all the strings (keys and + * values) of the data are written to. This is actually the memory + * mapped destination file. */ +extern int (*ldif_parse_callback)(struct ldaprec* l); + /* ldif_parse.c contains the actual ldif parser. It reads from a + * buffer (see libowfat, buffer.h) and creates a linked list of + * entries. This is unnecessarily wasteful, so I added the above + * callback, which is called after each record. If the callback + * is non-NULL and returns 1 when called with the record the parser + * just read in, the parser will assume the record has been stored + * somewhere else and not create a linked list but overwrite the same + * record in memory. This saves space and overhead. If we need to + * work on even larger files, this could even be reworked to be a + * persistent mmapped temp file. */ /* parse exp.ldif and write binary representation to "data". * please read "FORMAT" for a description of the file format */ +/* please note that tinyldap separates the data and the index although + * they are in the same file. This program only creates the binary + * representation, the actual indices are created by addindex. */ + #ifndef PAGE_SIZE #define PAGE_SIZE 4096 #endif +/* for debugging and error messages */ +/* ldaprec is the struct used by ldif_parse.c */ void dumprec(struct ldaprec* l) { int i; if (l->dn>=0) { @@ -30,10 +60,10 @@ void dumprec(struct ldaprec* l) { } else buffer_puts(buffer_1,"no dn?!\n"); for (i=0; in; ++i) { - buffer_puts(buffer_1,attributes.strings.root+l->a[i].name); + buffer_puts(buffer_1,attributes.Strings->root+l->a[i].name); buffer_puts(buffer_1,": "); if (l->a[i].name==objectClass) - buffer_puts(buffer_1,classes.strings.root+l->a[i].value); + buffer_puts(buffer_1,classes.Strings->root+l->a[i].value); else buffer_puts(buffer_1,stringtable.root+l->a[i].value); buffer_puts(buffer_1,"\n"); @@ -41,17 +71,70 @@ void dumprec(struct ldaprec* l) { buffer_putsflush(buffer_1,"\n"); } -extern mstorage_t stringtable; -extern mduptab_t attributes,classes; +/* Records are stored with a variable length externally, see FORMAT. + * We need to store the records and a table of the offsets of the + * records inside the data file in the data file. These data structures + * hold this data: */ +mstorage_t record_offsets; +mstorage_t records; +unsigned long offset_classes,record_count; + /* record_count is just a convenience, the same value is also visible + * as record_offsets.used/4 */ + +int ldif_callback(struct ldaprec* l) { + char x[8]; /* temp buf for endianness conversion */ + int i; + uint32 ofs; + uint32 oc; /* value of the first objectClass */ + int found; + + if (!l->n) return 0; + found=0; + for (i=0; in; ++i) { + if (l->a[i].name==objectClass) { + oc=l->a[i].value; + l->a[i].value=-1; + found=1; + break; + } + } + if (!found) { + buffer_putsflush(buffer_1,"ignoring record without objectClass...\n"); + dumprec(l); + return 0; + } + + uint32_pack(x,l->n+1); + uint32_pack(x+4,0); + if ((ofs=mstorage_add(&records,x,8))==(uint32)-1) return -1; + uint32_pack(x,l->dn); + uint32_pack(x+4,oc); + + if (mstorage_add(&records,x,8)==-1) return -1; + for (i=0; in; ++i) { + if (l->a[i].name==objectClass && l->a[i].value==-1) continue; + uint32_pack(x,l->a[i].name); + uint32_pack(x+4,l->a[i].value); + if (mstorage_add(&records,x,8)==-1) return -1; + } + uint32_pack(x,ofs); + if (mstorage_add(&record_offsets,x,4)==-1) return -1; + ++record_count; + return 0; +} int main(int argc,char* argv[]) { int fd; long len; char* destname=argc<3?"data":argv[2]; - unsigned long size_of_string_table,indices_offset,record_count; - long offset_stringtable,offset_classes,offset_attributes; + unsigned long size_of_string_table,indices_offset; + long offset_stringtable; char* map,* dest; + mstorage_init(&record_offsets); + mstorage_init(&records); + ldif_parse_callback=ldif_callback; + if ((fd=open(destname,O_RDWR|O_CREAT|O_TRUNC,0600))<0) { buffer_puts(buffer_2,"could not create destination data file "); derrout: @@ -65,8 +148,8 @@ derrout: buffer_puts(buffer_2,"mstorage_init_persistent: error mmapping "); goto derrout; } - mduptab_init(&attributes); - mduptab_init(&classes); + mduptab_init_reuse(&attributes,&stringtable); + mduptab_init_reuse(&classes,&stringtable); { char dummy[5*4]; @@ -79,7 +162,7 @@ derrout: return 1; } - size_of_string_table=stringtable.used+classes.strings.used+attributes.strings.used-5*4; + size_of_string_table=stringtable.used-5*4; size_of_string_table=(size_of_string_table+3)&-4; /* round up to 32 bits */ /* first find out how much space we need */ len = 5*sizeof(uint32_t); /* magic plus four counts */ @@ -88,37 +171,8 @@ derrout: // fdprintf(2,"offsets of records: %lu\n",len); - /* now for the hard part: the records */ - { - struct ldaprec* x=first; - record_count=0; - while (x) { - int oc=0,i; -// long old=len; - /* we add 8 for the pair and we substract 8 - * for the two saved pointers ("dn" and "objectClass") */ - if (x->dn>=0) len+=8; else { - if (x->n==0 && x->next==0) break; - buffer_putsflush(buffer_2,"record without dn?!\n"); - dumprec(x); - return 1; - } - for (i=0; in; ++i) { - len+=8; - if (x->a[i].name==objectClass) oc=1; - } - if (!oc) { - buffer_puts(buffer_2,"record \""); - buffer_puts(buffer_2,x->dn+stringtable.root); - buffer_putsflush(buffer_2,"\" has no objectClass?!\n"); - dumprec(x); - return 1; - } - ++record_count; -// fdprintf(2,"considering record \"%s\": length %d\n",x->dn+stringtable.root,len-old); - x=x->next; - } - } + len += records.used; + // fdprintf(2,"offsets of indices: %lu\n",len); indices_offset=len; len+=record_count*4; @@ -139,66 +193,31 @@ derrout: // size_of_string_table=stringtable.used+classes.strings.used+attributes.strings.used; offset_stringtable=5*4; - offset_classes= /* offset_stringtable+ */ stringtable.used; - offset_attributes=offset_classes+classes.strings.used; -// byte_copy(map+offset_stringtable,stringtable.used,stringtable.root); - byte_copy(map+offset_classes,classes.strings.used,classes.strings.root); - byte_copy(map+offset_attributes,attributes.strings.used,attributes.strings.root); -// fdprintf(2,"offset_classes=%lu, offset_attributes=%lu, attributes=%lu\n", -// offset_classes,offset_attributes,attributes.strings.used); + offset_classes=stringtable.used; + dest=map+offset_stringtable+size_of_string_table; { unsigned long i; for (i=0; in+1; - record_offsets[cur]=dest-map; ++cur; - uint32_pack(dest,i); uint32_pack(dest+4,0); dest+=8; - uint32_pack(dest,x->dn /* +offset_stringtable */); - for (i=0; in; ++i) { - if (x->a[i].name==objectClass) { - uint32_pack(dest+4,x->a[i].value+offset_classes); - x->a[i].name=-1; - break; - } - } - dest+=8; - for (i=0; in; ++i) { - if (x->a[i].name>=0) { - uint32_pack(dest,x->a[i].name+offset_attributes); - if (x->a[i].name==objectClass) - uint32_pack(dest+4,x->a[i].value+offset_classes); - else - uint32_pack(dest+4,x->a[i].value /* +offset_stringtable */); - dest+=8; - } - } - x=x->next; - } -// fdprintf(2,"actual offset of record_index: %lu\n",dest-map); - /* now the record_index */ - for (cur=0; cur