Skip to content

Commit 96c34a2

Browse files
author
Lucas
committed
add btoa()/atob() builtins
- Introduce global btoa() and atob() functions - Encoder: fast 12-bit pair-LUT, ~3.6 GB/s - Decoder: branchless streaming form, ~0.65 GB/s scalar - Tolerant to whitespace, validates padding and invalid input - Minimal allocations: only one malloc if input is wide-char
1 parent 5299e09 commit 96c34a2

File tree

3 files changed

+393
-9
lines changed

3 files changed

+393
-9
lines changed

quickjs.c

Lines changed: 296 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2320,7 +2320,6 @@ JSContext *JS_NewContextRaw(JSRuntime *rt)
23202320
JSContext *JS_NewContext(JSRuntime *rt)
23212321
{
23222322
JSContext *ctx;
2323-
23242323
ctx = JS_NewContextRaw(rt);
23252324
if (!ctx)
23262325
return NULL;
@@ -2336,7 +2335,7 @@ JSContext *JS_NewContext(JSRuntime *rt)
23362335
JS_AddIntrinsicPromise(ctx);
23372336
JS_AddIntrinsicBigInt(ctx);
23382337
JS_AddIntrinsicWeakRef(ctx);
2339-
2338+
JS_AddIntrinsicBase64(ctx);
23402339
JS_AddPerformance(ctx);
23412340

23422341
return ctx;
@@ -4037,26 +4036,26 @@ JSValue JS_NewStringLen(JSContext *ctx, const char *buf, size_t buf_len)
40374036
size_t len;
40384037
int kind;
40394038

4040-
if (buf_len <= 0) {
4039+
if (unlikely(buf_len <= 0)) {
40414040
return JS_AtomToString(ctx, JS_ATOM_empty_string);
40424041
}
40434042
/* Compute string kind and length: 7-bit, 8-bit, 16-bit, 16-bit UTF-16 */
40444043
kind = utf8_scan(buf, buf_len, &len);
4045-
if (len > JS_STRING_LEN_MAX)
4044+
if (unlikely(len > JS_STRING_LEN_MAX))
40464045
return JS_ThrowRangeError(ctx, "invalid string length");
40474046

40484047
switch (kind) {
40494048
case UTF8_PLAIN_ASCII:
40504049
str = js_alloc_string(ctx, len, 0);
4051-
if (!str)
4050+
if (unlikely(!str))
40524051
return JS_EXCEPTION;
40534052
memcpy(str8(str), buf, len);
40544053
str8(str)[len] = '\0';
40554054
break;
40564055
case UTF8_NON_ASCII:
40574056
/* buf contains non-ASCII code-points, but limited to 8-bit values */
40584057
str = js_alloc_string(ctx, len, 0);
4059-
if (!str)
4058+
if (unlikely(!str))
40604059
return JS_EXCEPTION;
40614060
utf8_decode_buf8(str8(str), len + 1, buf, buf_len);
40624061
break;
@@ -4065,7 +4064,7 @@ JSValue JS_NewStringLen(JSContext *ctx, const char *buf, size_t buf_len)
40654064
//if (kind & UTF8_HAS_ERRORS)
40664065
// return JS_ThrowRangeError(ctx, "invalid UTF-8 sequence");
40674066
str = js_alloc_string(ctx, len, 1);
4068-
if (!str)
4067+
if (unlikely(!str))
40694068
return JS_EXCEPTION;
40704069
utf8_decode_buf16(str16(str), len, buf, buf_len);
40714070
break;
@@ -4077,10 +4076,10 @@ JSValue JS_NewTwoByteString(JSContext *ctx, const uint16_t *buf, size_t len)
40774076
{
40784077
JSString *str;
40794078

4080-
if (!len)
4079+
if (unlikely(!len))
40814080
return JS_AtomToString(ctx, JS_ATOM_empty_string);
40824081
str = js_alloc_string(ctx, len, 1);
4083-
if (!str)
4082+
if (unlikely(!str))
40844083
return JS_EXCEPTION;
40854084
memcpy(str16(str), buf, len * sizeof(*buf));
40864085
return JS_MKPTR(JS_TAG_STRING, str);
@@ -57764,6 +57763,294 @@ static void _JS_AddIntrinsicCallSite(JSContext *ctx)
5776457763
countof(js_callsite_proto_funcs));
5776557764
}
5776657765

57766+
static const unsigned char B64_ENC[64] = {
57767+
'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P',
57768+
'Q','R','S','T','U','V','W','X','Y','Z',
57769+
'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p',
57770+
'q','r','s','t','u','v','w','x','y','z',
57771+
'0','1','2','3','4','5','6','7','8','9',
57772+
'-','_'
57773+
};
57774+
57775+
// Bit flags (0 means "invalid")
57776+
enum { K_VAL = 1u, K_WS = 2u, K_PAD = 4u };
57777+
57778+
// sextet values for valid chars (others don't matter)
57779+
static const uint8_t B64_VAL[256] = {
57780+
['A']=0, ['B']=1, ['C']=2, ['D']=3, ['E']=4, ['F']=5, ['G']=6, ['H']=7,
57781+
['I']=8, ['J']=9, ['K']=10,['L']=11,['M']=12,['N']=13,['O']=14,['P']=15,
57782+
['Q']=16,['R']=17,['S']=18,['T']=19,['U']=20,['V']=21,['W']=22,['X']=23,['Y']=24,['Z']=25,
57783+
['a']=26,['b']=27,['c']=28,['d']=29,['e']=30,['f']=31,['g']=32,['h']=33,
57784+
['i']=34,['j']=35,['k']=36,['l']=37,['m']=38,['n']=39,['o']=40,['p']=41,
57785+
['q']=42,['r']=43,['s']=44,['t']=45,['u']=46,['v']=47,['w']=48,['x']=49,['y']=50,['z']=51,
57786+
['0']=52,['1']=53,['2']=54,['3']=55,['4']=56,['5']=57,['6']=58,['7']=59,['8']=60,['9']=61,
57787+
['-']=62, ['_']=63, // base64url; swap to '+'/'/' if using standard base64
57788+
};
57789+
57790+
// flag bitset: only non-zero entries listed; unspecified default to 0 (invalid)
57791+
static const uint8_t B64_FLAGS[256] = {
57792+
// whitespace
57793+
[' ']=K_WS, ['\t']=K_WS, ['\r']=K_WS, ['\n']=K_WS,
57794+
// padding
57795+
['=']=K_PAD,
57796+
// valid chars
57797+
['A']=K_VAL,['B']=K_VAL,['C']=K_VAL,['D']=K_VAL,['E']=K_VAL,['F']=K_VAL,['G']=K_VAL,['H']=K_VAL,
57798+
['I']=K_VAL,['J']=K_VAL,['K']=K_VAL,['L']=K_VAL,['M']=K_VAL,['N']=K_VAL,['O']=K_VAL,['P']=K_VAL,
57799+
['Q']=K_VAL,['R']=K_VAL,['S']=K_VAL,['T']=K_VAL,['U']=K_VAL,['V']=K_VAL,['W']=K_VAL,['X']=K_VAL,
57800+
['Y']=K_VAL,['Z']=K_VAL,
57801+
['a']=K_VAL,['b']=K_VAL,['c']=K_VAL,['d']=K_VAL,['e']=K_VAL,['f']=K_VAL,['g']=K_VAL,['h']=K_VAL,
57802+
['i']=K_VAL,['j']=K_VAL,['k']=K_VAL,['l']=K_VAL,['m']=K_VAL,['n']=K_VAL,['o']=K_VAL,['p']=K_VAL,
57803+
['q']=K_VAL,['r']=K_VAL,['s']=K_VAL,['t']=K_VAL,['u']=K_VAL,['v']=K_VAL,['w']=K_VAL,['x']=K_VAL,
57804+
['y']=K_VAL,['z']=K_VAL,
57805+
['0']=K_VAL,['1']=K_VAL,['2']=K_VAL,['3']=K_VAL,['4']=K_VAL,['5']=K_VAL,['6']=K_VAL,['7']=K_VAL,
57806+
['8']=K_VAL,['9']=K_VAL,
57807+
['-']=K_VAL,['_']=K_VAL
57808+
};
57809+
57810+
static uint16_t B64_PAIR_LUT[4096];
57811+
static int B64_PAIR_INIT = 0;
57812+
57813+
static inline void b64_pair_init_once(void) {
57814+
if (unlikely(!B64_PAIR_INIT)) {
57815+
for (int i=0;i<4096;i++) {
57816+
uint8_t a = (uint8_t)(i >> 6);
57817+
uint8_t b = (uint8_t)(i & 63);
57818+
B64_PAIR_LUT[i] = (uint16_t)(B64_ENC[a] | (B64_ENC[b] << 8));
57819+
}
57820+
B64_PAIR_INIT = 1;
57821+
}
57822+
}
57823+
57824+
static inline size_t b64_encode(const uint8_t *src, size_t len, char *dst) {
57825+
b64_pair_init_once();
57826+
size_t i=0, j=0;
57827+
size_t main = (len/3)*3;
57828+
for (; i<main; i+=3, j+=4) {
57829+
uint32_t v = (src[i]<<16)|(src[i+1]<<8)|src[i+2];
57830+
uint16_t p0 = B64_PAIR_LUT[(v>>12)&0xFFF];
57831+
uint16_t p1 = B64_PAIR_LUT[v & 0xFFF];
57832+
memcpy(dst+j, &p0, 2);
57833+
memcpy(dst+j+2, &p1, 2);
57834+
}
57835+
size_t rem = len-i;
57836+
if (unlikely(rem==1)) {
57837+
uint32_t v = src[i]<<16;
57838+
dst[j++] = B64_ENC[(v>>18)&63];
57839+
dst[j++] = B64_ENC[(v>>12)&63];
57840+
dst[j++] = '=';
57841+
dst[j++] = '=';
57842+
} else if (unlikely(rem==2)) {
57843+
uint32_t v = (src[i]<<16)|(src[i+1]<<8);
57844+
dst[j++] = B64_ENC[(v>>18)&63];
57845+
dst[j++] = B64_ENC[(v>>12)&63];
57846+
dst[j++] = B64_ENC[(v>>6)&63];
57847+
dst[j++] = '=';
57848+
}
57849+
return j;
57850+
}
57851+
static inline size_t
57852+
b64_decode(const char *src, size_t len, uint8_t *dst, int *err)
57853+
{
57854+
uint32_t acc = 0;
57855+
int bits = 0;
57856+
size_t j = 0;
57857+
int seen_pad = 0;
57858+
57859+
if (unlikely(err)) *err = 0;
57860+
57861+
for (size_t i=0; i<len; i++) {
57862+
unsigned ch = (unsigned char)src[i];
57863+
uint8_t flag = B64_FLAGS[ch];
57864+
57865+
if (likely(flag & K_VAL)) {
57866+
// normal sextet
57867+
if (unlikely(seen_pad)) { if (err) *err = 1; return 0; }
57868+
acc = (acc << 6) | B64_VAL[ch];
57869+
bits += 6;
57870+
if (bits >= 8) {
57871+
bits -= 8;
57872+
dst[j++] = (uint8_t)((acc >> bits) & 0xFF);
57873+
}
57874+
} else if (flag & K_WS) {
57875+
// whitespace -> skip
57876+
continue;
57877+
} else if (flag & K_PAD) {
57878+
// '=' padding
57879+
seen_pad = 1;
57880+
// After '=', only ws or '=' is valid
57881+
// Validate remaining input
57882+
for (size_t k=i+1; k<len; k++) {
57883+
unsigned ch2 = (unsigned char)src[k];
57884+
uint8_t f2 = B64_FLAGS[ch2];
57885+
if (f2 & K_WS) continue; // ws
57886+
if (ch2!='=') { if (err) *err = 1; return 0; }
57887+
}
57888+
break;
57889+
} else {
57890+
// invalid
57891+
if (err) *err = 1;
57892+
return 0;
57893+
}
57894+
}
57895+
57896+
// Leftover bits are only valid if 0–2 '=' pads handled it
57897+
if (unlikely(bits >= 6)) {
57898+
if (err) *err = 1;
57899+
return 0;
57900+
}
57901+
57902+
return j;
57903+
}
57904+
57905+
static JSValue js_btoa(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv)
57906+
{
57907+
JSValue val = likely(JS_IsString(argv[0])) ? JS_DupValue(ctx, argv[0])
57908+
: JS_ToString(ctx, argv[0]);
57909+
if (unlikely(JS_IsException(val)))
57910+
return JS_EXCEPTION;
57911+
57912+
JSString *s = JS_VALUE_GET_STRING(val);
57913+
size_t len = (size_t)s->len;
57914+
57915+
const uint8_t *in8 = NULL;
57916+
uint8_t *tmp = NULL;
57917+
57918+
if (likely(!s->is_wide_char)) {
57919+
in8 = (const uint8_t *)str8(s);
57920+
} else {
57921+
const uint16_t *src = str16(s);
57922+
tmp = js_malloc(ctx, len ? len : 1);
57923+
if (unlikely(!tmp)) {
57924+
JS_FreeValue(ctx, val);
57925+
return JS_ThrowOutOfMemory(ctx);
57926+
}
57927+
for (size_t i = 0; i < len; i++) {
57928+
uint32_t c = src[i];
57929+
if (unlikely(c > 0xFF)) {
57930+
js_free(ctx, tmp);
57931+
JS_FreeValue(ctx, val);
57932+
return JS_ThrowTypeError(ctx, "character out of range (>255) at %zu", i);
57933+
}
57934+
tmp[i] = (uint8_t)c;
57935+
}
57936+
in8 = tmp;
57937+
}
57938+
57939+
if (unlikely(len > (SIZE_MAX - 2) / 3)) {
57940+
if (tmp) js_free(ctx, tmp);
57941+
JS_FreeValue(ctx, val);
57942+
return JS_ThrowRangeError(ctx, "input too large");
57943+
}
57944+
size_t out_len = 4 * ((len + 2) / 3);
57945+
if (unlikely(out_len > JS_STRING_LEN_MAX)) {
57946+
if (tmp) js_free(ctx, tmp);
57947+
JS_FreeValue(ctx, val);
57948+
return JS_ThrowRangeError(ctx, "output too large");
57949+
}
57950+
57951+
JSString *ostr = js_alloc_string(ctx, out_len, /*is_wide=*/0);
57952+
if (unlikely(!ostr)) {
57953+
if (tmp) js_free(ctx, tmp);
57954+
JS_FreeValue(ctx, val);
57955+
return JS_EXCEPTION;
57956+
}
57957+
57958+
b64_encode(in8, len, (char *)str8(ostr));
57959+
ostr->len = out_len; // ensure length matches
57960+
57961+
if (tmp) js_free(ctx, tmp);
57962+
JS_FreeValue(ctx, val);
57963+
return JS_MKPTR(JS_TAG_STRING, ostr);
57964+
}
57965+
57966+
static JSValue js_atob(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv)
57967+
{
57968+
JSValue val = likely(JS_IsString(argv[0])) ? JS_DupValue(ctx, argv[0])
57969+
: JS_ToString(ctx, argv[0]);
57970+
if (unlikely(JS_IsException(val)))
57971+
return JS_EXCEPTION;
57972+
57973+
JSString *s = JS_VALUE_GET_STRING(val);
57974+
size_t slen = (size_t)s->len;
57975+
57976+
const uint8_t *in;
57977+
uint8_t *tmp = NULL;
57978+
57979+
if (likely(!s->is_wide_char)) {
57980+
const uint8_t *p = (const uint8_t *)str8(s);
57981+
// validate ASCII fast path
57982+
for (size_t i = 0; i < slen; i++) {
57983+
if (unlikely(p[i] & 0x80)) {
57984+
JS_FreeValue(ctx, val);
57985+
return JS_ThrowTypeError(ctx, "non-ASCII input");
57986+
}
57987+
}
57988+
in = p;
57989+
} else {
57990+
const uint16_t *src = str16(s);
57991+
tmp = js_malloc(ctx, slen ? slen : 1);
57992+
if (unlikely(!tmp)) {
57993+
JS_FreeValue(ctx, val);
57994+
return JS_ThrowOutOfMemory(ctx);
57995+
}
57996+
for (size_t i = 0; i < slen; i++) {
57997+
if (unlikely(src[i] > 0x7F)) {
57998+
js_free(ctx, tmp);
57999+
JS_FreeValue(ctx, val);
58000+
return JS_ThrowTypeError(ctx, "non-ASCII input at %zu", i);
58001+
}
58002+
tmp[i] = (uint8_t)src[i];
58003+
}
58004+
in = tmp;
58005+
}
58006+
58007+
// Max decoded size: slen*3/4
58008+
if (unlikely(slen > (SIZE_MAX / 3) * 4)) {
58009+
if (tmp) js_free(ctx, tmp);
58010+
JS_FreeValue(ctx, val);
58011+
return JS_ThrowRangeError(ctx, "input too large");
58012+
}
58013+
size_t out_cap = (slen / 4) * 3 + 3; // safe bound
58014+
if (unlikely(out_cap > JS_STRING_LEN_MAX)) {
58015+
if (tmp) js_free(ctx, tmp);
58016+
JS_FreeValue(ctx, val);
58017+
return JS_ThrowRangeError(ctx, "output too large");
58018+
}
58019+
58020+
JSString *ostr = js_alloc_string(ctx, out_cap, /*is_wide=*/0);
58021+
if (unlikely(!ostr)) {
58022+
if (tmp) js_free(ctx, tmp);
58023+
JS_FreeValue(ctx, val);
58024+
return JS_EXCEPTION;
58025+
}
58026+
58027+
uint8_t *dst = (uint8_t *)str8(ostr);
58028+
int err = 0;
58029+
size_t out_len = b64_decode((const char *)in, slen, dst, &err);
58030+
58031+
if (unlikely(tmp)) js_free(ctx, tmp);
58032+
JS_FreeValue(ctx, val);
58033+
58034+
if (unlikely(err)) {
58035+
js_free(ctx, ostr);
58036+
return JS_ThrowTypeError(ctx, "invalid base64 input");
58037+
}
58038+
58039+
ostr->len = out_len;
58040+
return JS_MKPTR(JS_TAG_STRING, ostr);
58041+
}
58042+
58043+
static const JSCFunctionListEntry js_base64_funcs[] = {
58044+
JS_CFUNC_DEF("btoa", 1, js_btoa),
58045+
JS_CFUNC_DEF("atob", 1, js_atob)};
58046+
58047+
void JS_AddIntrinsicBase64(JSContext *ctx)
58048+
{
58049+
JSValue global = JS_GetGlobalObject(ctx);
58050+
JS_SetPropertyFunctionList(ctx, global, js_base64_funcs, sizeof(js_base64_funcs) / sizeof(js_base64_funcs[0]));
58051+
JS_FreeValue(ctx, global);
58052+
}
58053+
5776758054
bool JS_DetectModule(const char *input, size_t input_len)
5776858055
{
5776958056
#ifndef QJS_DISABLE_PARSER

quickjs.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -484,6 +484,7 @@ JS_EXTERN void JS_AddIntrinsicTypedArrays(JSContext *ctx);
484484
JS_EXTERN void JS_AddIntrinsicPromise(JSContext *ctx);
485485
JS_EXTERN void JS_AddIntrinsicBigInt(JSContext *ctx);
486486
JS_EXTERN void JS_AddIntrinsicWeakRef(JSContext *ctx);
487+
JS_EXTERN void JS_AddIntrinsicBase64(JSContext *ctx);
487488
JS_EXTERN void JS_AddPerformance(JSContext *ctx);
488489

489490
/* for equality comparisons and sameness */
@@ -801,6 +802,10 @@ static inline JSValue JS_NewString(JSContext *ctx, const char *str) {
801802
// that is the responsibility of the caller
802803
JS_EXTERN JSValue JS_NewTwoByteString(JSContext *ctx, const uint16_t *buf,
803804
size_t len);
805+
JS_EXTERN int JS_StringToLatin1Bytes(JSContext *ctx, JSValueConst val_in,
806+
uint8_t *out, size_t out_cap,
807+
size_t *out_len);
808+
804809
JS_EXTERN JSValue JS_NewAtomString(JSContext *ctx, const char *str);
805810
JS_EXTERN JSValue JS_ToString(JSContext *ctx, JSValueConst val);
806811
JS_EXTERN JSValue JS_ToPropertyKey(JSContext *ctx, JSValueConst val);

0 commit comments

Comments
 (0)