From 2d90963bf8b1efef27a2a80e0e71aae5f8fe82a0 Mon Sep 17 00:00:00 2001 From: leitner Date: Mon, 20 Jan 2025 19:00:58 +0000 Subject: [PATCH] add fmt_utf8_scratch --- fmt.h | 2 ++ fmt/fmt_utf8.c | 44 +++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/fmt.h b/fmt.h index 673f405..af81cde 100644 --- a/fmt.h +++ b/fmt.h @@ -196,6 +196,8 @@ size_t fmt_asn1derlength_scratch(char* dest,unsigned long long l) noexcept; /* s att_write(1) size_t fmt_asn1dertag(char* dest,unsigned long long l) noexcept; /* 1 byte for each 7 bits; upper bit = more bytes coming */ +// This function is only available on x86 and x86_64 (because unaligned +// word write is allowed and does not cost performance): att_write(1) size_t fmt_asn1dertag_scratch(char* dest,unsigned long long l) noexcept;/* same, but is allowed to overwrite up to 7 additional bytes */ diff --git a/fmt/fmt_utf8.c b/fmt/fmt_utf8.c index ffb887b..714de00 100644 --- a/fmt/fmt_utf8.c +++ b/fmt/fmt_utf8.c @@ -1,4 +1,4 @@ -#include "fmt.h" +#include size_t fmt_utf8(char *dest,uint32_t n) { size_t i,j; @@ -6,9 +6,8 @@ size_t fmt_utf8(char *dest,uint32_t n) { if (dest) *dest=(char)n; return 1; } - for (i=0x3f,j=1; i<=0x7fffffff; i=(i<<5)|0x1f, ++j) { + for (i=0x3f,j=0; i<=0x7fffffff; i=(i<<5)|0x1f, ++j) { if (i>=n) { - --j; if (dest) { size_t k=j*6; // gcc -fanalyze warns here that j-1 might underflow, leading to @@ -30,4 +29,43 @@ size_t fmt_utf8(char *dest,uint32_t n) { return 0; } +#if defined(__i386__) || defined(__x86_64__) + +// on architectures where unaligned writes are ok and don't carry a +// performance pentalty, it may make sense to not write each byte +// individually but write a 32-bit word at the end. The downside is that +// this will write scratch bytes over the bytes following the utf-8. If +// you allocated a few more bytes scratch space, and did not expect +// those bytes to be untouched, you can use this function. +// I did a small benchmark: +// for (i=j=0; i<1000; i+=fmt_utf8(buf+i,i)); +// Regular fmt_utf8: 13k cpu cycles, fmt_utf_scratch: 11k cpu cycles. + +size_t fmt_utf8_scratch(char *dest,uint32_t n) { + size_t i; + unsigned int j; + if (n<=0x7f) { + if (dest) *dest=(char)n; + return 1; + } + for (i=0x3f,j=0; i<=0x7fffffff; i=(i<<5)|0x1f, ++j) { + if (i>=n) { + if (dest) { + uint64_t buf=0; + while (n>0x3f) { + buf = (buf << 8) + (n & 0x3f) + 0x80; + n >>= 6; + } + buf = (buf << 8) + (unsigned char)((char)0xc0 >> (j-1)) + n; + *(uint64_t*)dest = buf; + } + return j+1; + } + if (i==0x7fffffff) return 0; + } + /* we were asked to encode a value that cannot be encoded */ + return 0; +} +#endif + /* unit tested via scan/scan_utf8.c */