diff --git a/Makefile b/Makefile index c5051b0..39eff70 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -# DEBUG=1 +DEBUG=1 all: t1 t2 parse dumpidx idx2ldif addindex bindrequest tinyldap \ tinyldap_standalone tinyldap_debug ldapclient ldapclient_str \ diff --git a/ldif.h b/ldif.h index 2b784cc..b6e05e2 100644 --- a/ldif.h +++ b/ldif.h @@ -5,17 +5,17 @@ #define ATTRIBS 100 struct attribute { - long name, value; + uint32 name, value; }; struct ldaprec { - long dn; + uint32 dn; int n; /* number of attributes */ struct attribute a[ATTRIBS]; struct ldaprec* next; }; -extern long dn, mail, sn, cn, objectClass; +extern uint32 dn, mail, sn, cn, objectClass; extern struct ldaprec *first; extern unsigned long ldifrecords; diff --git a/ldif_parse.c b/ldif_parse.c index 1b18551..f188627 100644 --- a/ldif_parse.c +++ b/ldif_parse.c @@ -15,7 +15,8 @@ mduptab_t attributes,classes; mstorage_t stringtable; -long dn, objectClass; +uint32 dn, objectClass; +unsigned long lines; /* this is called after each record. * If it returns -1, ldif_parse will exit immediately. @@ -26,20 +27,21 @@ long dn, objectClass; * If the callback is NULL, a callback that always returns 1 is assumed. * */ int (*ldif_parse_callback)(struct ldaprec* l); +uint32 (*ldif_addstring_callback)(const char* s,unsigned long len); unsigned long ldifrecords; -static void addattribute(struct ldaprec** l,long name,long val) { +static void addattribute(struct ldaprec** l,uint32 name,uint32 val) { if (name==dn) (*l)->dn=val; else if ((*l)->na[(*l)->n].name=name; (*l)->a[(*l)->n].value=val; ++(*l)->n; } else { - buffer_puts(buffer_2,"LDIF parse error: too many attributes!:\n "); + buffer_puts(buffer_2,"LDIF parse error: too many attributes!: "); buffer_puts(buffer_2,attributes.Strings->root+name); - buffer_puts(buffer_2,"\nat dn\n "); - buffer_puts(buffer_2,(*l)->dn+stringtable.root); + buffer_puts(buffer_2," in line "); + buffer_putulong(buffer_2,lines); buffer_putnlflush(buffer_2); exit(1); } @@ -78,11 +80,36 @@ static int unbase64(char* buf) { return destlen; } +uint32 (*ldif_addstring_callback)(const char* s,unsigned long len); + +static uint32 addstring(const char* s,unsigned long len) { + return mstorage_add(&stringtable,s,len); +} + +static long commit_string_bin(const char* s,unsigned long n) { + unsigned int i; + static char zero; + uint32 x; + char intbuf[4]; + if (n==0 || (n==1 && s[0]==0)) goto encodebinary; + for (i=0; inext=0; (*l)->n=0; ldifrecords=0; do { - long tmp, val; + uint32 tmp, val; base64=binary=0; n=ofs+buffer_get_token(b,buf+ofs,8192-ofs,":",1); if (n==0) break; @@ -112,14 +139,14 @@ nomem: buf[i2]=0; if (str_equal("binary",buf+i2+1)) binary=1; } - if ((tmp=mduptab_adds(&attributes,buf+i))<0) goto nomem; + if ((tmp=mduptab_adds(&attributes,buf+i))==(uint32)-1) goto nomem; if (!stralloc_copys(&payload,"")) goto nomem; { char dummy; int res; /* read line, skipping initial whitespace */ for (n=0; (res=buffer_getc(b,&dummy))==1; ) { - if (dummy=='\n') break; + if (dummy=='\n') { ++lines; break; } if (!n && dummy==':' && base64==0) { base64=1; continue; } if (!n && (dummy==' ' || dummy=='\t')) continue; if (!stralloc_append(&payload,&dummy)) goto nomem; @@ -144,6 +171,8 @@ lookagain: } else if (c=='\n') { struct ldaprec* m; + ++lines; + if (payload.len) { if (!stralloc_0(&payload)) goto nomem; if (base64) { @@ -163,11 +192,11 @@ lookagain: #endif if (tmp==objectClass) { - if ((val=mduptab_add(&classes,payload.s,len-1))<0) goto nomem; + if ((val=mduptab_add(&classes,payload.s,len-1))==(uint32)-1) goto nomem; } else if (tmp==dn) { - if ((val=add_normalized(payload.s,len))==-1) goto nomem; + if ((val=add_normalized(payload.s,len))==(uint32)-1) goto nomem; } else - if ((val=mstorage_add_bin(&stringtable,payload.s,len))<0) goto nomem; + if ((val=commit_string_bin(payload.s,len))==(uint32)-1) goto nomem; addattribute(l,tmp,val); m=0; @@ -221,16 +250,16 @@ lookagain: #endif if (tmp==objectClass) { - if ((val=mduptab_add(&classes,payload.s,len-1))<0) goto nomem; + if ((val=mduptab_add(&classes,payload.s,len-1))==(uint32)-1) goto nomem; } else if (tmp==dn) { - if ((val=add_normalized(payload.s,payload.len))==-1) goto nomem; + if ((val=add_normalized(payload.s,payload.len))==(uint32)-1) goto nomem; } else - if ((val=mstorage_add_bin(&stringtable,payload.s,payload.len))<0) goto nomem; + if ((val=commit_string_bin(payload.s,len))==(uint32)-1) goto nomem; addattribute(l,tmp,val); #endif } while (!eof); if (ldif_parse_callback && ldif_parse_callback(*l)==-1) return -1; - if ((*l)->dn<0 && ((*l)->next)) { + if ((*l)->dn==(uint32)-1 && ((*l)->next)) { struct ldaprec* m=(*l)->next; free((*l)); (*l)=m; @@ -245,6 +274,7 @@ int ldif_parse(const char* filename) { int fd; buffer in; buffer* tmp; + if (ldif_addstring_callback==0) ldif_addstring_callback=addstring; if (filename[0]=='-' && !filename[1]) { tmp=buffer_0; fd=-1; @@ -256,6 +286,7 @@ int ldif_parse(const char* filename) { } dn=mduptab_adds(&attributes,"dn"); objectClass=mduptab_adds(&attributes,"objectClass"); + lines=0; { int res=parserec(tmp,&first); if (fd!=-1) close(fd); diff --git a/parse.c b/parse.c index a874bbd..1719b9e 100644 --- a/parse.c +++ b/parse.c @@ -1,5 +1,6 @@ /* This is just the main() for "parse". The actual parser is in * ldif_parse.c */ +#define _FILE_OFFSET_BITS 64 #include #include #include @@ -24,7 +25,7 @@ extern mduptab_t attributes,classes; /* we do a minor optimization by saving the strings of names of * attributes and objectClass values only once. mduptab_t is the data * structure used for this, see mduptab.h */ -extern mstorage_t stringtable; +// extern mstorage_t stringtable; /* this is a giant string table where all the strings (keys and * values) of the data are written to. This is actually the memory * mapped destination file. */ @@ -53,128 +54,119 @@ extern int (*ldif_parse_callback)(struct ldaprec* l); extern unsigned long mstorage_increment; -/* for debugging and error messages */ -/* ldaprec is the struct used by ldif_parse.c */ -void dumprec(struct ldaprec* l) { - int i; - if (l->dn>=0) { - buffer_puts(buffer_1,"dn: "); - buffer_puts(buffer_1,stringtable.root+l->dn); - buffer_puts(buffer_1,"\n"); - } else - buffer_puts(buffer_1,"no dn?!\n"); - for (i=0; in; ++i) { - buffer_puts(buffer_1,attributes.Strings->root+l->a[i].name); - buffer_puts(buffer_1,": "); - if (l->a[i].name==objectClass) - buffer_puts(buffer_1,classes.Strings->root+l->a[i].value); - else - buffer_puts(buffer_1,stringtable.root+l->a[i].value); - buffer_puts(buffer_1,"\n"); - } - buffer_putsflush(buffer_1,"\n"); -} +static unsigned long outofs; +static unsigned long recofs; /* Records are stored with a variable length externally, see FORMAT. * We need to store the records and a table of the offsets of the * records inside the data file in the data file. These data structures * hold this data: */ -mstorage_t record_offsets; -mstorage_t records; +// mstorage_t records; unsigned long offset_classes,record_count; - /* record_count is just a convenience, the same value is also visible - * as record_offsets.used/4 */ + +buffer outbuf,rbuf; static void printstats() { buffer_puts(buffer_2,"\r"); buffer_putulong(buffer_2,record_count); buffer_puts(buffer_2," records parsed, "); - buffer_putulong(buffer_2,stringtable.used/1024); + buffer_putulong(buffer_2,outofs/1024); buffer_puts(buffer_2,"k strings, "); - buffer_putulong(buffer_2,records.used/1024); - buffer_puts(buffer_2,"k records, "); - buffer_putulong(buffer_2,record_offsets.used/1024); - buffer_putsflush(buffer_2,"k record offsets. "); + buffer_putulong(buffer_2,recofs/1024); + buffer_putsflush(buffer_2,"k records. "); +} + +uint32 my_addstring(const char* s,unsigned long len) { + uint32 tmp=outofs; + if (buffer_put(&outbuf,s,len)) return -1; + outofs+=len; + return tmp; } int ldif_callback(struct ldaprec* l) { char x[8]; /* temp buf for endianness conversion */ int i; - uint32 ofs; - uint32 oc; /* value of the first objectClass */ - int found; +// uint32 ofs; + uint32 oc=(uint32)-1; /* value of the first objectClass */ if (!l->n) return 0; - found=0; for (i=0; in; ++i) { if (l->a[i].name==objectClass) { oc=l->a[i].value; l->a[i].value=-1; - found=1; break; } } - if (!found) { - buffer_putsflush(buffer_1,"ignoring record without objectClass...\n"); - dumprec(l); + if (oc==(uint32)-1) { + extern long lines; + buffer_puts(buffer_1,"ignoring record without objectClass... (line"); + buffer_putulong(buffer_1,lines); + buffer_putsflush(buffer_1,")\n"); return 0; } uint32_pack(x,l->n+1); uint32_pack(x+4,0); - if ((ofs=mstorage_add(&records,x,8))==(uint32)-1) return -1; + +// ofs=recofs; + if (buffer_put(&rbuf,x,8)) return -1; recofs+=8; +// if ((ofs=mstorage_add(&records,x,8))==(uint32)-1) return -1; + uint32_pack(x,l->dn); uint32_pack(x+4,oc); - if (mstorage_add(&records,x,8)==-1) return -1; + if (buffer_put(&rbuf,x,8)) return -1; recofs+=8; +// if (mstorage_add(&records,x,8)==-1) return -1; + for (i=0; in; ++i) { - if (l->a[i].name==objectClass && l->a[i].value==-1) continue; + if (l->a[i].name==objectClass && l->a[i].value==(uint32)-1) continue; uint32_pack(x,l->a[i].name); uint32_pack(x+4,l->a[i].value); - if (mstorage_add(&records,x,8)==-1) return -1; + if (buffer_put(&rbuf,x,8)) return -1; recofs+=8; +// if (mstorage_add(&records,x,8)==-1) return -1; } - uint32_pack(x,ofs); - if (mstorage_add(&record_offsets,x,4)==-1) return -1; +// uint32_pack(x,ofs); +// if (mstorage_add(&record_offsets,x,4)==-1) return -1; ++record_count; if ((record_count%10000)==0) printstats(); return 0; } +extern uint32 (*ldif_addstring_callback)(const char* s,unsigned long len); + int main(int argc,char* argv[]) { + char buf[64*1024]; + char recbuf[8*1024]; int fd,rfd; long len; char* destname=argc<3?"data":argv[2]; char* tempname; unsigned long size_of_string_table,indices_offset; long offset_stringtable; - char* map,* dest; + char* map; + uint32 attrofs,classofs; - mstorage_increment=1024*1024; /* always grow mstorages by 1 additional MiB to reduce mmap overhead */ + ldif_addstring_callback=my_addstring; tempname=alloca(strlen(destname)+10); - mstorage_init(&record_offsets); +// mstorage_init(&record_offsets); rfd=fmt_str(tempname,destname); rfd+=fmt_str(tempname+rfd,".rec"); tempname[rfd]=0; if ((rfd=open(tempname,O_RDWR|O_CREAT|O_TRUNC,0600))<0) { buffer_puts(buffer_2,"could not create temp file "); -temperrout: buffer_puts(buffer_2,tempname); goto derrout2; } - if (mstorage_init_persistent(&records,rfd)==-1) { - buffer_puts(buffer_2,"mstorage_init_persistent: error mmapping "); - goto temperrout; - } -// mstorage_init(&records); + buffer_init(&rbuf,write,rfd,recbuf,sizeof recbuf); + ldif_parse_callback=ldif_callback; if ((fd=open(destname,O_RDWR|O_CREAT|O_TRUNC,0600))<0) { buffer_puts(buffer_2,"could not create destination data file "); -derrout: buffer_puts(buffer_2,destname); derrout2: buffer_puts(buffer_2,": "); @@ -182,49 +174,120 @@ derrout2: buffer_putnlflush(buffer_2); return 1; } - if (mstorage_init_persistent(&stringtable,fd)==-1) { - buffer_puts(buffer_2,"mstorage_init_persistent: error mmapping "); - goto derrout; - } - mduptab_init_reuse(&attributes,&stringtable); - mduptab_init_reuse(&classes,&stringtable); + + buffer_init(&outbuf,write,fd,buf,sizeof buf); + + mduptab_init(&attributes); + mduptab_init(&classes); { char dummy[5*4]; - mstorage_add(&stringtable,dummy,5*4); + if (buffer_put(&outbuf,dummy,5*4)) +writeerror: + diesys(1,"write error (disk full?)"); + outofs=5*4; + recofs=0; } // if ((mduptab_adds(&attributes,"*"))<0) // die(1,"out of memory"); ldif_parse(argc<2?"exp.ldif":argv[1]); - if (!first) { - buffer_putsflush(buffer_2,"usage: parse [src-ldif-filename] [dest-bin-filename]\n"); - return 1; - } + if (!first) + die(1,"usage: parse [src-ldif-filename] [dest-bin-filename]\n"); printstats(); buffer_putsflush(buffer_2,"DONE!\n"); - size_of_string_table=stringtable.used-5*4; + if (buffer_flush(&rbuf)) goto writeerror; + + /* now we have to add the classes and attributes to the "string table". + problem is: we already wrote the offsets within the local tables to + the record table, so we need to do some relocation */ + + /* first, add the strings */ + attrofs=outofs; + if (buffer_put(&outbuf,attributes.strings.root,attributes.strings.used)) + goto writeerror; + outofs+=attributes.strings.used; + classofs=outofs; + if (buffer_put(&outbuf,classes.strings.root,classes.strings.used)) + goto writeerror; + outofs+=classes.strings.used; + + if (outofs&3) { /* round up to 32-bit boundary */ + if (buffer_put(&outbuf,"\x00\x00\x00",4-(outofs&3))) goto writeerror; + outofs+=4-(outofs&3); + } + buffer_flush(&outbuf); + + size_of_string_table=outofs-5*4; size_of_string_table=(size_of_string_table+3)&-4; /* round up to 32 bits */ /* first find out how much space we need */ - len = 5*sizeof(uint32_t); /* magic plus four counts */ - len += size_of_string_table; /* size of string table */ - len += attributes.table.used/sizeof(long)*8; /* attribute_names plus attribute_flags */ -// fdprintf(2,"offsets of records: %lu\n",len); + { + uint32 i,n; + char convbuf[4]; + n=attributes.table.used/sizeof(long); + for (i=0; i