/* This is just the main() for "parse". The actual parser is in * ldif_parse.c */ #define _FILE_OFFSET_BITS 64 #include #include #include #include #include #include #include #include "buffer.h" #include "ldif.h" #include "mduptab.h" #include "uint32.h" #include "byte.h" #include "fmt.h" #include "errmsg.h" /* these are defined in ldif_parse.c. * We extern them here so we can initialize them. * This was not necessary until I reworked mstorage_t to support * persistence via a file descriptor, which needs to be -1 and not 0 if * unused. */ extern mduptab_t attributes,classes; /* we do a minor optimization by saving the strings of names of * attributes and objectClass values only once. mduptab_t is the data * structure used for this, see mduptab.h */ // extern mstorage_t stringtable; /* this is a giant string table where all the strings (keys and * values) of the data are written to. This is actually the memory * mapped destination file. */ extern int (*ldif_parse_callback)(struct ldaprec* l); /* ldif_parse.c contains the actual ldif parser. It reads from a * buffer (see libowfat, buffer.h) and creates a linked list of * entries. This is unnecessarily wasteful, so I added the above * callback, which is called after each record. If the callback * is non-NULL and returns 1 when called with the record the parser * just read in, the parser will assume the record has been stored * somewhere else and not create a linked list but overwrite the same * record in memory. This saves space and overhead. If we need to * work on even larger files, this could even be reworked to be a * persistent mmapped temp file. */ /* parse exp.ldif and write binary representation to "data". * please read "FORMAT" for a description of the file format */ /* please note that tinyldap separates the data and the index although * they are in the same file. This program only creates the binary * representation, the actual indices are created by addindex. */ #ifndef PAGE_SIZE #define PAGE_SIZE 4096 #endif extern unsigned long mstorage_increment; static unsigned long outofs; static unsigned long recofs; /* Records are stored with a variable length externally, see FORMAT. * We need to store the records and a table of the offsets of the * records inside the data file in the data file. These data structures * hold this data: */ // mstorage_t records; unsigned long offset_classes,record_count; buffer outbuf,rbuf; static void printstats() { buffer_puts(buffer_2,"\r"); buffer_putulong(buffer_2,record_count); buffer_puts(buffer_2," records parsed, "); buffer_putulong(buffer_2,outofs/1024); buffer_puts(buffer_2,"k strings, "); buffer_putulong(buffer_2,recofs/1024); buffer_putsflush(buffer_2,"k records. "); } uint32 my_addstring(const char* s,unsigned long len) { uint32 tmp=outofs; if (buffer_put(&outbuf,s,len)) return -1; outofs+=len; return tmp; } int ldif_callback(struct ldaprec* l) { char x[8]; /* temp buf for endianness conversion */ unsigned int i; // uint32 ofs; uint32 oc=(uint32)-1; /* value of the first objectClass */ if (!l->n) return 0; for (i=0; in; ++i) { if (l->a[i].name==objectClass) { oc=l->a[i].value; l->a[i].value=-1; break; } } if (oc==(uint32)-1) { extern long lines; buffer_puts(buffer_1,"ignoring record without objectClass... (line "); buffer_putulong(buffer_1,lines); buffer_putsflush(buffer_1,")\n"); return 0; } uint32_pack(x,l->n+1); uint32_pack(x+4,0); // ofs=recofs; if (buffer_put(&rbuf,x,8)) return -1; recofs+=8; // if ((ofs=mstorage_add(&records,x,8))==(uint32)-1) return -1; uint32_pack(x,l->dn); uint32_pack(x+4,oc); if (buffer_put(&rbuf,x,8)) return -1; recofs+=8; // if (mstorage_add(&records,x,8)==-1) return -1; for (i=0; in; ++i) { if (l->a[i].name==objectClass && l->a[i].value==(uint32)-1) continue; uint32_pack(x,l->a[i].name); uint32_pack(x+4,l->a[i].value); if (buffer_put(&rbuf,x,8)) return -1; recofs+=8; // if (mstorage_add(&records,x,8)==-1) return -1; } // uint32_pack(x,ofs); // if (mstorage_add(&record_offsets,x,4)==-1) return -1; ++record_count; if ((record_count%10000)==0) printstats(); return 0; } extern uint32 (*ldif_addstring_callback)(const char* s,unsigned long len); int main(int argc,char* argv[]) { char buf[64*1024]; char recbuf[8*1024]; int fd,rfd; long len; char* destname=argc<3?"data":argv[2]; char* tempname; unsigned long size_of_string_table,indices_offset; long offset_stringtable; char* map; uint32 attrofs,classofs; ldif_addstring_callback=my_addstring; tempname=alloca(strlen(destname)+10); // mstorage_init(&record_offsets); rfd=fmt_str(tempname,destname); rfd+=fmt_str(tempname+rfd,".rec"); tempname[rfd]=0; if ((rfd=open(tempname,O_RDWR|O_CREAT|O_TRUNC,0600))<0) { buffer_puts(buffer_2,"could not create temp file "); buffer_puts(buffer_2,tempname); goto derrout2; } buffer_init(&rbuf,(void*)write,rfd,recbuf,sizeof recbuf); ldif_parse_callback=ldif_callback; if ((fd=open(destname,O_RDWR|O_CREAT|O_TRUNC,0600))<0) { buffer_puts(buffer_2,"could not create destination data file "); buffer_puts(buffer_2,destname); derrout2: buffer_puts(buffer_2,": "); buffer_puterror(buffer_2); buffer_putnlflush(buffer_2); return 1; } buffer_init(&outbuf,(void*)write,fd,buf,sizeof buf); mduptab_init(&attributes); mduptab_init(&classes); { char dummy[5*4]; if (buffer_put(&outbuf,dummy,5*4)) writeerror: diesys(1,"write error (disk full?)"); outofs=5*4; recofs=0; } // if ((mduptab_adds(&attributes,"*"))<0) // die(1,"out of memory"); ldif_parse(argc<2?"exp.ldif":argv[1]); if (!first) die(1,"usage: parse [src-ldif-filename] [dest-bin-filename]\n"); printstats(); buffer_putsflush(buffer_2,"DONE!\n"); if (buffer_flush(&rbuf)) goto writeerror; /* now we have to add the classes and attributes to the "string table". problem is: we already wrote the offsets within the local tables to the record table, so we need to do some relocation */ /* first, add the strings */ attrofs=outofs; if (buffer_put(&outbuf,attributes.strings.root,attributes.strings.used)) goto writeerror; outofs+=attributes.strings.used; classofs=outofs; if (buffer_put(&outbuf,classes.strings.root,classes.strings.used)) goto writeerror; outofs+=classes.strings.used; if (outofs&3) { /* round up to 32-bit boundary */ if (buffer_put(&outbuf,"\x00\x00\x00",4-(outofs&3))) goto writeerror; outofs+=4-(outofs&3); } buffer_flush(&outbuf); size_of_string_table=outofs-5*4; size_of_string_table=(size_of_string_table+3)&-4; /* round up to 32 bits */ /* first find out how much space we need */ { uint32 i,n; char convbuf[4]; n=attributes.table.used/sizeof(long); for (i=0; i