Skip to content

Commit e133c1e

Browse files
Lucassaghul
andcommitted
add btoa()/atob() builtins
- Introduce global btoa() and atob() functions - Standard base64 alphabet (RFC 4648) - Decoder implements forgiving-base64-decode (WHATWG Infra spec) - Tolerant to whitespace, validates padding per spec - JS_AddIntrinsicAToB() ensures DOMException is registered Co-authored-by: Saúl Ibarra Corretgé <s@saghul.net>
1 parent 01bce21 commit e133c1e

File tree

3 files changed

+409
-10
lines changed

3 files changed

+409
-10
lines changed

quickjs.c

Lines changed: 314 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2500,7 +2500,6 @@ JSContext *JS_NewContextRaw(JSRuntime *rt)
25002500
JSContext *JS_NewContext(JSRuntime *rt)
25012501
{
25022502
JSContext *ctx;
2503-
25042503
ctx = JS_NewContextRaw(rt);
25052504
if (!ctx)
25062505
return NULL;
@@ -2515,7 +2514,7 @@ JSContext *JS_NewContext(JSRuntime *rt)
25152514
JS_AddIntrinsicTypedArrays(ctx) ||
25162515
JS_AddIntrinsicPromise(ctx) ||
25172516
JS_AddIntrinsicWeakRef(ctx) ||
2518-
JS_AddIntrinsicDOMException(ctx) ||
2517+
JS_AddIntrinsicAToB(ctx) ||
25192518
JS_AddPerformance(ctx)) {
25202519
JS_FreeContext(ctx);
25212520
return NULL;
@@ -4329,26 +4328,26 @@ JSValue JS_NewStringLen(JSContext *ctx, const char *buf, size_t buf_len)
43294328
size_t len;
43304329
int kind;
43314330

4332-
if (buf_len <= 0) {
4331+
if (unlikely(buf_len <= 0))
43334332
return js_empty_string(ctx->rt);
4334-
}
4333+
43354334
/* Compute string kind and length: 7-bit, 8-bit, 16-bit, 16-bit UTF-16 */
43364335
kind = utf8_scan(buf, buf_len, &len);
4337-
if (len > JS_STRING_LEN_MAX)
4336+
if (unlikely(len > JS_STRING_LEN_MAX))
43384337
return JS_ThrowRangeError(ctx, "invalid string length");
43394338

43404339
switch (kind) {
43414340
case UTF8_PLAIN_ASCII:
43424341
str = js_alloc_string(ctx, len, 0);
4343-
if (!str)
4342+
if (unlikely(!str))
43444343
return JS_EXCEPTION;
43454344
memcpy(str8(str), buf, len);
43464345
str8(str)[len] = '\0';
43474346
break;
43484347
case UTF8_NON_ASCII:
43494348
/* buf contains non-ASCII code-points, but limited to 8-bit values */
43504349
str = js_alloc_string(ctx, len, 0);
4351-
if (!str)
4350+
if (unlikely(!str))
43524351
return JS_EXCEPTION;
43534352
utf8_decode_buf8(str8(str), len + 1, buf, buf_len);
43544353
break;
@@ -4357,7 +4356,7 @@ JSValue JS_NewStringLen(JSContext *ctx, const char *buf, size_t buf_len)
43574356
//if (kind & UTF8_HAS_ERRORS)
43584357
// return JS_ThrowRangeError(ctx, "invalid UTF-8 sequence");
43594358
str = js_alloc_string(ctx, len, 1);
4360-
if (!str)
4359+
if (unlikely(!str))
43614360
return JS_EXCEPTION;
43624361
utf8_decode_buf16(str16(str), len, buf, buf_len);
43634362
break;
@@ -4369,10 +4368,11 @@ JSValue JS_NewStringUTF16(JSContext *ctx, const uint16_t *buf, size_t len)
43694368
{
43704369
JSString *str;
43714370

4372-
if (!len)
4371+
if (unlikely(!len))
43734372
return js_empty_string(ctx->rt);
4373+
43744374
str = js_alloc_string(ctx, len, 1);
4375-
if (!str)
4375+
if (unlikely(!str))
43764376
return JS_EXCEPTION;
43774377
memcpy(str16(str), buf, len * sizeof(*buf));
43784378
return JS_MKPTR(JS_TAG_STRING, str);
@@ -60797,6 +60797,310 @@ int JS_AddIntrinsicDOMException(JSContext *ctx)
6079760797
ctx->class_proto[JS_CLASS_DOM_EXCEPTION] = proto;
6079860798
return 0;
6079960799
}
60800+
/* base64 */
60801+
60802+
static const unsigned char b64_enc[64] = {
60803+
'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P',
60804+
'Q','R','S','T','U','V','W','X','Y','Z',
60805+
'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p',
60806+
'q','r','s','t','u','v','w','x','y','z',
60807+
'0','1','2','3','4','5','6','7','8','9',
60808+
'+','/'
60809+
};
60810+
60811+
enum { K_VAL = 1u, K_WS = 2u, K_PAD = 4u };
60812+
60813+
static const uint8_t b64_val[256] = {
60814+
['A']=0, ['B']=1, ['C']=2, ['D']=3, ['E']=4, ['F']=5, ['G']=6, ['H']=7,
60815+
['I']=8, ['J']=9, ['K']=10,['L']=11,['M']=12,['N']=13,['O']=14,['P']=15,
60816+
['Q']=16,['R']=17,['S']=18,['T']=19,['U']=20,['V']=21,['W']=22,['X']=23,['Y']=24,['Z']=25,
60817+
['a']=26,['b']=27,['c']=28,['d']=29,['e']=30,['f']=31,['g']=32,['h']=33,
60818+
['i']=34,['j']=35,['k']=36,['l']=37,['m']=38,['n']=39,['o']=40,['p']=41,
60819+
['q']=42,['r']=43,['s']=44,['t']=45,['u']=46,['v']=47,['w']=48,['x']=49,['y']=50,['z']=51,
60820+
['0']=52,['1']=53,['2']=54,['3']=55,['4']=56,['5']=57,['6']=58,['7']=59,['8']=60,['9']=61,
60821+
['+']=62, ['/']=63,
60822+
};
60823+
60824+
static const char b64_flags[256] = {
60825+
[' ']=K_WS, ['\t']=K_WS, ['\r']=K_WS, ['\n']=K_WS,
60826+
['=']=K_PAD,
60827+
['A']=K_VAL,['B']=K_VAL,['C']=K_VAL,['D']=K_VAL,['E']=K_VAL,['F']=K_VAL,['G']=K_VAL,['H']=K_VAL,
60828+
['I']=K_VAL,['J']=K_VAL,['K']=K_VAL,['L']=K_VAL,['M']=K_VAL,['N']=K_VAL,['O']=K_VAL,['P']=K_VAL,
60829+
['Q']=K_VAL,['R']=K_VAL,['S']=K_VAL,['T']=K_VAL,['U']=K_VAL,['V']=K_VAL,['W']=K_VAL,['X']=K_VAL,
60830+
['Y']=K_VAL,['Z']=K_VAL,
60831+
['a']=K_VAL,['b']=K_VAL,['c']=K_VAL,['d']=K_VAL,['e']=K_VAL,['f']=K_VAL,['g']=K_VAL,['h']=K_VAL,
60832+
['i']=K_VAL,['j']=K_VAL,['k']=K_VAL,['l']=K_VAL,['m']=K_VAL,['n']=K_VAL,['o']=K_VAL,['p']=K_VAL,
60833+
['q']=K_VAL,['r']=K_VAL,['s']=K_VAL,['t']=K_VAL,['u']=K_VAL,['v']=K_VAL,['w']=K_VAL,['x']=K_VAL,
60834+
['y']=K_VAL,['z']=K_VAL,
60835+
['0']=K_VAL,['1']=K_VAL,['2']=K_VAL,['3']=K_VAL,['4']=K_VAL,['5']=K_VAL,['6']=K_VAL,['7']=K_VAL,
60836+
['8']=K_VAL,['9']=K_VAL,
60837+
['+']=K_VAL,['/']=K_VAL,
60838+
};
60839+
60840+
static size_t b64_encode(const uint8_t *src, size_t len, char *dst)
60841+
{
60842+
size_t i = 0, j = 0;
60843+
size_t main_len = (len / 3) * 3;
60844+
60845+
for (; i < main_len; i += 3, j += 4) {
60846+
uint32_t v = ((uint32_t)src[i] << 16)
60847+
| ((uint32_t)src[i + 1] << 8)
60848+
| (uint32_t)src[i + 2];
60849+
dst[j + 0] = b64_enc[(v >> 18) & 63];
60850+
dst[j + 1] = b64_enc[(v >> 12) & 63];
60851+
dst[j + 2] = b64_enc[(v >> 6) & 63];
60852+
dst[j + 3] = b64_enc[v & 63];
60853+
}
60854+
60855+
size_t rem = len - i;
60856+
if (rem == 1) {
60857+
uint32_t v = (uint32_t)src[i] << 16;
60858+
dst[j++] = b64_enc[(v >> 18) & 63];
60859+
dst[j++] = b64_enc[(v >> 12) & 63];
60860+
dst[j++] = '=';
60861+
dst[j++] = '=';
60862+
} else if (rem == 2) {
60863+
uint32_t v = ((uint32_t)src[i] << 16) | ((uint32_t)src[i + 1] << 8);
60864+
dst[j++] = b64_enc[(v >> 18) & 63];
60865+
dst[j++] = b64_enc[(v >> 12) & 63];
60866+
dst[j++] = b64_enc[(v >> 6) & 63];
60867+
dst[j++] = '=';
60868+
}
60869+
return j;
60870+
}
60871+
60872+
/* Implements https://infra.spec.whatwg.org/#forgiving-base64-decode */
60873+
static size_t
60874+
b64_decode(const char *src, size_t len, uint8_t *dst, int *err)
60875+
{
60876+
size_t nws, pad, data_len, i, j;
60877+
uint32_t acc;
60878+
int bits;
60879+
unsigned ch;
60880+
60881+
*err = 0;
60882+
60883+
/* count non-whitespace characters */
60884+
nws = 0;
60885+
for (i = 0; i < len; i++) {
60886+
if (!(b64_flags[(unsigned char)src[i]] & K_WS))
60887+
nws++;
60888+
}
60889+
60890+
/* count trailing '=' (skipping whitespace from the end) */
60891+
pad = 0;
60892+
i = len;
60893+
while (i > 0) {
60894+
ch = (unsigned char)src[--i];
60895+
if (b64_flags[ch] & K_WS)
60896+
continue;
60897+
if (ch == '=')
60898+
pad++;
60899+
else
60900+
break;
60901+
}
60902+
60903+
/* strip 1-2 trailing '=' only when total non-ws length is a multiple of 4 */
60904+
if (pad > 0) {
60905+
if (nws % 4 != 0 || pad > 2) {
60906+
*err = 1;
60907+
return 0;
60908+
}
60909+
data_len = nws - pad;
60910+
} else {
60911+
data_len = nws;
60912+
}
60913+
60914+
/* a single base64 char (6 bits) can't produce a byte */
60915+
if (data_len % 4 == 1) {
60916+
*err = 1;
60917+
return 0;
60918+
}
60919+
60920+
/* decode: process exactly data_len non-ws characters */
60921+
acc = 0;
60922+
bits = 0;
60923+
j = 0;
60924+
nws = 0;
60925+
for (i = 0; i < len && nws < data_len; i++) {
60926+
ch = (unsigned char)src[i];
60927+
if (b64_flags[ch] & K_WS)
60928+
continue;
60929+
if (!(b64_flags[ch] & K_VAL)) {
60930+
*err = 1;
60931+
return 0;
60932+
}
60933+
acc = (acc << 6) | b64_val[ch];
60934+
bits += 6;
60935+
nws++;
60936+
if (bits >= 8) {
60937+
bits -= 8;
60938+
dst[j++] = (uint8_t)((acc >> bits) & 0xFF);
60939+
}
60940+
}
60941+
60942+
return j;
60943+
}
60944+
60945+
static JSValue js_btoa(JSContext *ctx, JSValueConst this_val,
60946+
int argc, JSValueConst *argv)
60947+
{
60948+
const uint8_t *in8;
60949+
uint8_t *tmp = NULL;
60950+
JSValue val, ret;
60951+
JSString *s, *ostr;
60952+
size_t len, out_len, written;
60953+
60954+
val = JS_ToString(ctx, argv[0]);
60955+
if (unlikely(JS_IsException(val)))
60956+
return JS_EXCEPTION;
60957+
60958+
s = JS_VALUE_GET_STRING(val);
60959+
len = (size_t)s->len;
60960+
60961+
if (likely(!s->is_wide_char)) {
60962+
in8 = (const uint8_t *)str8(s);
60963+
} else {
60964+
const uint16_t *src = str16(s);
60965+
tmp = js_malloc(ctx, likely(len) ? len : 1);
60966+
if (unlikely(!tmp)) {
60967+
ret = JS_ThrowOutOfMemory(ctx);
60968+
goto fail;
60969+
}
60970+
for (size_t i = 0; i < len; i++) {
60971+
uint32_t c = src[i];
60972+
if (unlikely(c > 0xFF)) {
60973+
ret = JS_ThrowDOMException(ctx, "InvalidCharacterError",
60974+
"String contains an invalid character");
60975+
goto fail;
60976+
}
60977+
tmp[i] = (uint8_t)c;
60978+
}
60979+
in8 = tmp;
60980+
}
60981+
60982+
if (unlikely(len > (SIZE_MAX - 2) / 3)) {
60983+
ret = JS_ThrowRangeError(ctx, "input too large");
60984+
goto fail;
60985+
}
60986+
out_len = 4 * ((len + 2) / 3);
60987+
if (unlikely(out_len > JS_STRING_LEN_MAX)) {
60988+
ret = JS_ThrowRangeError(ctx, "output too large");
60989+
goto fail;
60990+
}
60991+
60992+
ostr = js_alloc_string(ctx, out_len, 0);
60993+
if (unlikely(!ostr)) {
60994+
ret = JS_EXCEPTION;
60995+
goto fail;
60996+
}
60997+
60998+
written = b64_encode(in8, len, (char *)str8(ostr));
60999+
str8(ostr)[written] = '\0';
61000+
ostr->len = out_len;
61001+
ret = JS_MKPTR(JS_TAG_STRING, ostr);
61002+
fail:
61003+
if (tmp)
61004+
js_free(ctx, tmp);
61005+
JS_FreeValue(ctx, val);
61006+
return ret;
61007+
}
61008+
61009+
static JSValue js_atob(JSContext *ctx, JSValueConst this_val,
61010+
int argc, JSValueConst *argv)
61011+
{
61012+
const uint8_t *in;
61013+
uint8_t *tmp = NULL;
61014+
JSValue val, ret;
61015+
JSString *s, *ostr;
61016+
size_t slen, out_cap, out_len;
61017+
int err;
61018+
61019+
val = JS_ToString(ctx, argv[0]);
61020+
if (unlikely(JS_IsException(val)))
61021+
return JS_EXCEPTION;
61022+
61023+
s = JS_VALUE_GET_STRING(val);
61024+
slen = (size_t)s->len;
61025+
61026+
if (likely(!s->is_wide_char)) {
61027+
const uint8_t *p = (const uint8_t *)str8(s);
61028+
for (size_t i = 0; i < slen; i++) {
61029+
if (unlikely(p[i] & 0x80)) {
61030+
ret = JS_ThrowDOMException(ctx, "InvalidCharacterError",
61031+
"The string to be decoded is not correctly encoded");
61032+
goto fail;
61033+
}
61034+
}
61035+
in = p;
61036+
} else {
61037+
const uint16_t *src = str16(s);
61038+
tmp = js_malloc(ctx, likely(slen) ? slen : 1);
61039+
if (unlikely(!tmp)) {
61040+
ret = JS_ThrowOutOfMemory(ctx);
61041+
goto fail;
61042+
}
61043+
for (size_t i = 0; i < slen; i++) {
61044+
if (unlikely(src[i] > 0x7F)) {
61045+
ret = JS_ThrowDOMException(ctx, "InvalidCharacterError",
61046+
"The string to be decoded is not correctly encoded");
61047+
goto fail;
61048+
}
61049+
tmp[i] = (uint8_t)src[i];
61050+
}
61051+
in = tmp;
61052+
}
61053+
61054+
if (unlikely(slen > (SIZE_MAX / 3) * 4)) {
61055+
ret = JS_ThrowRangeError(ctx, "input too large");
61056+
goto fail;
61057+
}
61058+
out_cap = (slen / 4) * 3 + 3;
61059+
if (unlikely(out_cap > JS_STRING_LEN_MAX)) {
61060+
ret = JS_ThrowRangeError(ctx, "output too large");
61061+
goto fail;
61062+
}
61063+
61064+
ostr = js_alloc_string(ctx, out_cap, 0);
61065+
if (unlikely(!ostr)) {
61066+
ret = JS_EXCEPTION;
61067+
goto fail;
61068+
}
61069+
61070+
err = 0;
61071+
out_len = b64_decode((const char *)in, slen, (uint8_t *)str8(ostr), &err);
61072+
61073+
if (unlikely(err)) {
61074+
js_free_string(ctx->rt, ostr);
61075+
ret = JS_ThrowDOMException(ctx, "InvalidCharacterError",
61076+
"The string to be decoded is not correctly encoded");
61077+
goto fail;
61078+
}
61079+
str8(ostr)[out_len] = '\0';
61080+
ostr->len = out_len;
61081+
ret = JS_MKPTR(JS_TAG_STRING, ostr);
61082+
fail:
61083+
if (tmp)
61084+
js_free(ctx, tmp);
61085+
JS_FreeValue(ctx, val);
61086+
return ret;
61087+
}
61088+
61089+
static const JSCFunctionListEntry js_base64_funcs[] = {
61090+
JS_CFUNC_DEF("btoa", 1, js_btoa),
61091+
JS_CFUNC_DEF("atob", 1, js_atob),
61092+
};
61093+
61094+
int JS_AddIntrinsicAToB(JSContext *ctx)
61095+
{
61096+
if (!JS_IsRegisteredClass(ctx->rt, JS_CLASS_DOM_EXCEPTION)) {
61097+
if (JS_AddIntrinsicDOMException(ctx))
61098+
return -1;
61099+
}
61100+
JS_SetPropertyFunctionList(ctx, ctx->global_obj,
61101+
js_base64_funcs, countof(js_base64_funcs));
61102+
return 0;
61103+
}
6080061104

6080161105
bool JS_DetectModule(const char *input, size_t input_len)
6080261106
{

quickjs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -560,6 +560,7 @@ JS_EXTERN int JS_AddIntrinsicBigInt(JSContext *ctx);
560560
JS_EXTERN int JS_AddIntrinsicWeakRef(JSContext *ctx);
561561
JS_EXTERN int JS_AddPerformance(JSContext *ctx);
562562
JS_EXTERN int JS_AddIntrinsicDOMException(JSContext *ctx);
563+
JS_EXTERN int JS_AddIntrinsicAToB(JSContext *ctx);
563564

564565
/* for equality comparisons and sameness */
565566
JS_EXTERN int JS_IsEqual(JSContext *ctx, JSValueConst op1, JSValueConst op2);

0 commit comments

Comments
 (0)