Skip to content

Commit 5c3077e

Browse files
authored
Implement RegExp serialization (#153)
JS_WriteObject() and JS_ReadObject() now support RegExp objects.
1 parent a6e73ca commit 5c3077e

File tree

5 files changed

+139
-0
lines changed

5 files changed

+139
-0
lines changed

cutils.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,14 @@ static inline uint64_t bswap64(uint64_t v)
230230
((v & ((uint64_t)0xff << (0 * 8))) << (7 * 8));
231231
}
232232

233+
static inline void inplace_bswap16(uint8_t *tab) {
234+
put_u16(tab, bswap16(get_u16(tab)));
235+
}
236+
237+
static inline void inplace_bswap32(uint8_t *tab) {
238+
put_u32(tab, bswap32(get_u32(tab)));
239+
}
240+
233241
/* XXX: should take an extra argument to pass slack information to the caller */
234242
typedef void *DynBufReallocFunc(void *opaque, void *ptr, size_t size);
235243

libregexp.c

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2557,6 +2557,71 @@ const char *lre_get_groupnames(const uint8_t *bc_buf)
25572557
return (const char *)(bc_buf + 7 + re_bytecode_len);
25582558
}
25592559

2560+
void lre_byte_swap(uint8_t *buf, size_t len, BOOL is_byte_swapped)
2561+
{
2562+
uint8_t *p, *pe;
2563+
uint32_t n, r;
2564+
2565+
p = buf;
2566+
if (len < RE_HEADER_LEN)
2567+
abort();
2568+
2569+
// format is:
2570+
// <header>
2571+
// <bytecode>
2572+
// <capture group name 1>
2573+
// <capture group name 2>
2574+
// etc.
2575+
n = get_u32(&p[3]); // bytecode size
2576+
inplace_bswap32(&p[3]);
2577+
if (is_byte_swapped)
2578+
n = bswap32(n);
2579+
if (n > len - RE_HEADER_LEN)
2580+
abort();
2581+
2582+
p = &buf[RE_HEADER_LEN];
2583+
pe = &p[n];
2584+
2585+
while (p < pe) {
2586+
n = reopcode_info[*p].size;
2587+
switch (n) {
2588+
case 1:
2589+
case 2:
2590+
break;
2591+
case 3:
2592+
switch (*p) {
2593+
case REOP_save_reset: // has two 8 bit arguments
2594+
break;
2595+
case REOP_range32: // variable length
2596+
for (r = 3 + 4 * get_u16(&p[1]); n < r; n += 4)
2597+
inplace_bswap32(&p[n]);
2598+
goto doswap16;
2599+
case REOP_range: // variable length
2600+
for (r = 3 + 2 * get_u16(&p[1]); n < r; n += 2)
2601+
inplace_bswap16(&p[n]);
2602+
goto doswap16;
2603+
default:
2604+
doswap16:
2605+
inplace_bswap16(&p[1]);
2606+
}
2607+
break;
2608+
case 5:
2609+
inplace_bswap32(&p[1]);
2610+
break;
2611+
case 17:
2612+
assert(*p == REOP_simple_greedy_quant);
2613+
inplace_bswap32(&p[1]);
2614+
inplace_bswap32(&p[5]);
2615+
inplace_bswap32(&p[9]);
2616+
inplace_bswap32(&p[13]);
2617+
break;
2618+
default:
2619+
abort();
2620+
}
2621+
p = &p[n];
2622+
}
2623+
}
2624+
25602625
#ifdef TEST
25612626

25622627
BOOL lre_check_stack_overflow(void *opaque, size_t alloca_size)

libregexp.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ int lre_exec(uint8_t **capture,
5353
int lre_parse_escape(const uint8_t **pp, int allow_utf16);
5454
LRE_BOOL lre_is_space(int c);
5555

56+
void lre_byte_swap(uint8_t *buf, size_t len, BOOL is_byte_swapped);
57+
5658
/* must be provided by the user */
5759
LRE_BOOL lre_check_stack_overflow(void *opaque, size_t alloca_size);
5860
void *lre_realloc(void *opaque, void *ptr, size_t size);

quickjs.c

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31659,6 +31659,7 @@ typedef enum BCTagEnum {
3165931659
BC_TAG_TYPED_ARRAY,
3166031660
BC_TAG_ARRAY_BUFFER,
3166131661
BC_TAG_SHARED_ARRAY_BUFFER,
31662+
BC_TAG_REGEXP,
3166231663
BC_TAG_DATE,
3166331664
BC_TAG_OBJECT_VALUE,
3166431665
BC_TAG_OBJECT_REFERENCE,
@@ -32272,6 +32273,24 @@ static int JS_WriteSharedArrayBuffer(BCWriterState *s, JSValueConst obj)
3227232273
return 0;
3227332274
}
3227432275

32276+
static int JS_WriteRegExp(BCWriterState *s, JSRegExp regexp)
32277+
{
32278+
JSString *bc = regexp.bytecode;
32279+
assert(!bc->is_wide_char);
32280+
32281+
JS_WriteString(s, regexp.pattern);
32282+
32283+
if (is_be())
32284+
lre_byte_swap(bc->u.str8, bc->len, /*is_byte_swapped*/FALSE);
32285+
32286+
JS_WriteString(s, bc);
32287+
32288+
if (is_be())
32289+
lre_byte_swap(bc->u.str8, bc->len, /*is_byte_swapped*/TRUE);
32290+
32291+
return 0;
32292+
}
32293+
3227532294
static int JS_WriteObjectRec(BCWriterState *s, JSValueConst obj)
3227632295
{
3227732296
uint32_t tag;
@@ -32360,6 +32379,10 @@ static int JS_WriteObjectRec(BCWriterState *s, JSValueConst obj)
3236032379
goto invalid_tag;
3236132380
ret = JS_WriteSharedArrayBuffer(s, obj);
3236232381
break;
32382+
case JS_CLASS_REGEXP:
32383+
bc_put_u8(s, BC_TAG_REGEXP);
32384+
ret = JS_WriteRegExp(s, p->u.regexp);
32385+
break;
3236332386
case JS_CLASS_DATE:
3236432387
bc_put_u8(s, BC_TAG_DATE);
3236532388
ret = JS_WriteObjectRec(s, p->u.object_data);
@@ -33357,6 +33380,31 @@ static JSValue JS_ReadSharedArrayBuffer(BCReaderState *s)
3335733380
return JS_EXCEPTION;
3335833381
}
3335933382

33383+
static JSValue JS_ReadRegExp(BCReaderState *s)
33384+
{
33385+
JSContext *ctx = s->ctx;
33386+
JSString *pattern;
33387+
JSString *bc;
33388+
33389+
pattern = JS_ReadString(s);
33390+
if (!pattern)
33391+
return JS_EXCEPTION;
33392+
33393+
bc = JS_ReadString(s);
33394+
if (!bc) {
33395+
js_free_string(ctx->rt, pattern);
33396+
return JS_EXCEPTION;
33397+
}
33398+
33399+
assert(!bc->is_wide_char);
33400+
if (is_be())
33401+
lre_byte_swap(bc->u.str8, bc->len, /*is_byte_swapped*/TRUE);
33402+
33403+
return js_regexp_constructor_internal(ctx, JS_UNDEFINED,
33404+
JS_MKPTR(JS_TAG_STRING, pattern),
33405+
JS_MKPTR(JS_TAG_STRING, bc));
33406+
}
33407+
3336033408
static JSValue JS_ReadDate(BCReaderState *s)
3336133409
{
3336233410
JSContext *ctx = s->ctx;
@@ -33484,6 +33532,9 @@ static JSValue JS_ReadObjectRec(BCReaderState *s)
3348433532
goto invalid_tag;
3348533533
obj = JS_ReadSharedArrayBuffer(s);
3348633534
break;
33535+
case BC_TAG_REGEXP:
33536+
obj = JS_ReadRegExp(s);
33537+
break;
3348733538
case BC_TAG_DATE:
3348833539
obj = JS_ReadDate(s);
3348933540
break;

tests/test_bjson.js

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,18 @@ function bjson_test_reference()
143143
}
144144
}
145145

146+
function bjson_test_regexp()
147+
{
148+
var buf, r;
149+
150+
bjson_test(/xyzzy/);
151+
bjson_test(/xyzzy/digu);
152+
153+
buf = bjson.write(/(?<𝓓𝓸𝓰>dog)/);
154+
r = bjson.read(buf, 0, buf.byteLength);
155+
assert("sup dog".match(r).groups["𝓓𝓸𝓰"], "dog");
156+
}
157+
146158
function bjson_test_all()
147159
{
148160
var obj;
@@ -171,6 +183,7 @@ function bjson_test_all()
171183
}
172184

173185
bjson_test_reference();
186+
bjson_test_regexp();
174187
}
175188

176189
bjson_test_all();

0 commit comments

Comments
 (0)