improve json escape decoder, add test suite
This commit is contained in:
@@ -9,15 +9,16 @@ size_t scan_jsonescape(const char *src,char *dest,size_t *destlen) {
|
||||
unsigned int prev,cur,todo;
|
||||
prev=cur=(unsigned int)-1;
|
||||
for (i=0; s[i]; ++i) {
|
||||
if (s[i]=='"') {
|
||||
if (prev!=(unsigned int)-1) goto abort;
|
||||
goto done;
|
||||
}
|
||||
if ((c=s[i])=='\\') {
|
||||
switch (s[i+1]) {
|
||||
case '"':
|
||||
if (prev!=(unsigned int)-1) return 0; // lead surrogate not followed by tail surrogate
|
||||
goto done;
|
||||
case '\\':
|
||||
if (prev!=(unsigned int)-1) return 0; // lead surrogate not followed by tail surrogate
|
||||
// c='\\'; // c already is backslash
|
||||
break;
|
||||
case '"': c='"'; break;
|
||||
case 'n': c='\n'; break;
|
||||
case 'r': c='\r'; break;
|
||||
case 'b': c='\b'; break;
|
||||
@@ -28,18 +29,19 @@ size_t scan_jsonescape(const char *src,char *dest,size_t *destlen) {
|
||||
size_t j;
|
||||
for (cur=j=0; j<4; ++j) {
|
||||
char x=scan_fromhex(s[i+2+j]);
|
||||
if (x<0) return 0; // not hex -> invalid input
|
||||
if (x<0) goto abort; // not hex -> invalid input
|
||||
cur=(cur<<4) | x;
|
||||
}
|
||||
if (cur>=0xd800 && cur<=0xdbff) {
|
||||
// utf-16 surrogate pair; needs to be followed by another
|
||||
// surrogate. We need to read both and convert to UTF-8
|
||||
if (prev!=(unsigned int)-1) return 0; // two lead surrogates
|
||||
if (prev!=(unsigned int)-1) goto abort; // two lead surrogates
|
||||
prev=cur;
|
||||
i+=5; // we want i to go up by 6, 1 is done by the for loop
|
||||
continue; // write nothing!
|
||||
} else if (cur>=0xdc00 && cur<=0xdfff) {
|
||||
todo=(cur&0x3ff) | ((prev&0x3ff) << 10) | 0x100000;
|
||||
if (prev==(unsigned int)-1) goto abort; // no lead surrogate
|
||||
todo=(cur&0x3ff) | ((prev&0x3ff) << 10) | 0x10000;
|
||||
} else
|
||||
todo=cur;
|
||||
written+=fmt_utf8(dest?dest+written:dest,todo);
|
||||
@@ -48,16 +50,22 @@ size_t scan_jsonescape(const char *src,char *dest,size_t *destlen) {
|
||||
continue;
|
||||
}
|
||||
default:
|
||||
if (prev!=(unsigned int)-1) return 0; // lead surrogate not followed by tail surrogate
|
||||
c=s[i+1];
|
||||
break;
|
||||
}
|
||||
++i;
|
||||
}
|
||||
if (prev!=(unsigned int)-1) goto abort;
|
||||
/* We expect utf-8 incoming. Make sure it's valid. */
|
||||
if (!scan_utf8(s+i,4,NULL)) goto abort;
|
||||
if (dest) dest[written]=c;
|
||||
++written;
|
||||
}
|
||||
done:
|
||||
*destlen=written;
|
||||
return i;
|
||||
abort:
|
||||
if (prev!=(unsigned int)-1) i-=6; // if we abort and there still was an open surrogate pair, cancel it
|
||||
*destlen=written;
|
||||
return i;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user