Skip to content

Commit d1960d1

Browse files
authored
Implement RegExp 'd' flag (#86)
1 parent e2bc644 commit d1960d1

File tree

5 files changed

+152
-62
lines changed

5 files changed

+152
-62
lines changed

libregexp.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2582,25 +2582,26 @@ void *lre_realloc(void *opaque, void *ptr, size_t size)
25822582

25832583
int main(int argc, char **argv)
25842584
{
2585-
int len, ret, i;
2585+
int len, flags, ret, i;
25862586
uint8_t *bc;
25872587
char error_msg[64];
25882588
uint8_t *capture[CAPTURE_COUNT_MAX * 2];
25892589
const char *input;
25902590
int input_len, capture_count;
25912591

2592-
if (argc < 3) {
2593-
printf("usage: %s regexp input\n", argv[0]);
2592+
if (argc < 4) {
2593+
printf("usage: %s regexp flags input\n", argv[0]);
25942594
exit(1);
25952595
}
2596+
flags = atoi(argv[2]);
25962597
bc = lre_compile(&len, error_msg, sizeof(error_msg), argv[1],
2597-
strlen(argv[1]), 0, NULL);
2598+
strlen(argv[1]), flags, NULL);
25982599
if (!bc) {
25992600
fprintf(stderr, "error: %s\n", error_msg);
26002601
exit(1);
26012602
}
26022603

2603-
input = argv[2];
2604+
input = argv[3];
26042605
input_len = strlen(input);
26052606

26062607
ret = lre_exec(capture, bc, (uint8_t *)input, 0, input_len, 0, NULL);

libregexp.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#define LRE_FLAG_DOTALL (1 << 3)
3737
#define LRE_FLAG_UTF16 (1 << 4)
3838
#define LRE_FLAG_STICKY (1 << 5)
39+
#define LRE_FLAG_INDICES (1 << 6) /* Unused by libregexp, just recorded. */
3940

4041
#define LRE_FLAG_NAMED_GROUPS (1 << 7) /* named groups are present in the regexp */
4142

quickjs-atom.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ DEF(revoke, "revoke")
166166
DEF(async, "async")
167167
DEF(exec, "exec")
168168
DEF(groups, "groups")
169+
DEF(indices, "indices")
169170
DEF(status, "status")
170171
DEF(reason, "reason")
171172
DEF(globalThis, "globalThis")

quickjs.c

Lines changed: 143 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1151,6 +1151,11 @@ static const JSClassExoticMethods js_proxy_exotic_methods;
11511151
static const JSClassExoticMethods js_module_ns_exotic_methods;
11521152
static JSClassID js_class_id_alloc = JS_CLASS_INIT_COUNT;
11531153

1154+
static JSValue js_int32(int32_t v)
1155+
{
1156+
return JS_MKVAL(JS_TAG_INT, v);
1157+
}
1158+
11541159
static void js_trigger_gc(JSRuntime *rt, size_t size)
11551160
{
11561161
BOOL force_gc;
@@ -39831,6 +39836,9 @@ static JSValue js_compile_regexp(JSContext *ctx, JSValueConst pattern,
3983139836
/* XXX: re_flags = LRE_FLAG_OCTAL unless strict mode? */
3983239837
for (i = 0; i < len; i++) {
3983339838
switch(str[i]) {
39839+
case 'd':
39840+
mask = LRE_FLAG_INDICES;
39841+
break;
3983439842
case 'g':
3983539843
mask = LRE_FLAG_GLOBAL;
3983639844
break;
@@ -40153,6 +40161,11 @@ static JSValue js_regexp_get_flags(JSContext *ctx, JSValueConst this_val)
4015340161
if (JS_VALUE_GET_TAG(this_val) != JS_TAG_OBJECT)
4015440162
return JS_ThrowTypeErrorNotAnObject(ctx);
4015540163

40164+
res = JS_ToBoolFree(ctx, JS_GetPropertyStr(ctx, this_val, "hasIndices"));
40165+
if (res < 0)
40166+
goto exception;
40167+
if (res)
40168+
*p++ = 'd';
4015640169
res = JS_ToBoolFree(ctx, JS_GetProperty(ctx, this_val, JS_ATOM_global));
4015740170
if (res < 0)
4015840171
goto exception;
@@ -40232,53 +40245,56 @@ static JSValue js_regexp_exec(JSContext *ctx, JSValueConst this_val,
4023240245
{
4023340246
JSRegExp *re = js_get_regexp(ctx, this_val, TRUE);
4023440247
JSString *str;
40235-
JSValue str_val, obj, val, groups = JS_UNDEFINED;
40248+
JSValue t, ret, str_val, obj, val, groups;
40249+
JSValue indices, indices_groups;
4023640250
uint8_t *re_bytecode;
40237-
int ret;
4023840251
uint8_t **capture, *str_buf;
40239-
int capture_count, shift, i, re_flags;
40252+
int rc, capture_count, shift, i, re_flags;
4024040253
int64_t last_index;
4024140254
const char *group_name_ptr;
4024240255

4024340256
if (!re)
4024440257
return JS_EXCEPTION;
40258+
4024540259
str_val = JS_ToString(ctx, argv[0]);
4024640260
if (JS_IsException(str_val))
40247-
return str_val;
40248-
val = JS_GetProperty(ctx, this_val, JS_ATOM_lastIndex);
40249-
if (JS_IsException(val) ||
40250-
JS_ToLengthFree(ctx, &last_index, val)) {
40251-
JS_FreeValue(ctx, str_val);
4025240261
return JS_EXCEPTION;
40253-
}
40262+
40263+
ret = JS_EXCEPTION;
40264+
obj = JS_NULL;
40265+
groups = JS_UNDEFINED;
40266+
indices = JS_UNDEFINED;
40267+
indices_groups = JS_UNDEFINED;
40268+
capture = NULL;
40269+
40270+
val = JS_GetProperty(ctx, this_val, JS_ATOM_lastIndex);
40271+
if (JS_IsException(val) || JS_ToLengthFree(ctx, &last_index, val))
40272+
goto fail;
40273+
4025440274
re_bytecode = re->bytecode->u.str8;
4025540275
re_flags = lre_get_flags(re_bytecode);
4025640276
if ((re_flags & (LRE_FLAG_GLOBAL | LRE_FLAG_STICKY)) == 0) {
4025740277
last_index = 0;
4025840278
}
4025940279
str = JS_VALUE_GET_STRING(str_val);
4026040280
capture_count = lre_get_capture_count(re_bytecode);
40261-
capture = NULL;
4026240281
if (capture_count > 0) {
4026340282
capture = js_malloc(ctx, sizeof(capture[0]) * capture_count * 2);
40264-
if (!capture) {
40265-
JS_FreeValue(ctx, str_val);
40266-
return JS_EXCEPTION;
40267-
}
40283+
if (!capture)
40284+
goto fail;
4026840285
}
4026940286
shift = str->is_wide_char;
4027040287
str_buf = str->u.str8;
4027140288
if (last_index > str->len) {
40272-
ret = 2;
40289+
rc = 2;
4027340290
} else {
40274-
ret = lre_exec(capture, re_bytecode,
40275-
str_buf, last_index, str->len,
40276-
shift, ctx);
40291+
rc = lre_exec(capture, re_bytecode,
40292+
str_buf, last_index, str->len,
40293+
shift, ctx);
4027740294
}
40278-
obj = JS_NULL;
40279-
if (ret != 1) {
40280-
if (ret >= 0) {
40281-
if (ret == 2 || (re_flags & (LRE_FLAG_GLOBAL | LRE_FLAG_STICKY))) {
40295+
if (rc != 1) {
40296+
if (rc >= 0) {
40297+
if (rc == 2 || (re_flags & (LRE_FLAG_GLOBAL | LRE_FLAG_STICKY))) {
4028240298
if (JS_SetProperty(ctx, this_val, JS_ATOM_lastIndex,
4028340299
JS_NewInt32(ctx, 0)) < 0)
4028440300
goto fail;
@@ -40287,7 +40303,6 @@ static JSValue js_regexp_exec(JSContext *ctx, JSValueConst this_val,
4028740303
JS_ThrowInternalError(ctx, "out of memory in regexp execution");
4028840304
goto fail;
4028940305
}
40290-
JS_FreeValue(ctx, str_val);
4029140306
} else {
4029240307
int prop_flags;
4029340308
if (re_flags & (LRE_FLAG_GLOBAL | LRE_FLAG_STICKY)) {
@@ -40305,52 +40320,123 @@ static JSValue js_regexp_exec(JSContext *ctx, JSValueConst this_val,
4030540320
if (JS_IsException(groups))
4030640321
goto fail;
4030740322
}
40308-
40309-
for(i = 0; i < capture_count; i++) {
40310-
int start, end;
40311-
JSValue val;
40312-
if (capture[2 * i] == NULL ||
40313-
capture[2 * i + 1] == NULL) {
40314-
val = JS_UNDEFINED;
40315-
} else {
40316-
start = (capture[2 * i] - str_buf) >> shift;
40317-
end = (capture[2 * i + 1] - str_buf) >> shift;
40318-
val = js_sub_string(ctx, str, start, end);
40319-
if (JS_IsException(val))
40323+
if (re_flags & LRE_FLAG_INDICES) {
40324+
indices = JS_NewArray(ctx);
40325+
if (JS_IsException(indices))
40326+
goto fail;
40327+
if (group_name_ptr) {
40328+
indices_groups = JS_NewObjectProto(ctx, JS_NULL);
40329+
if (JS_IsException(indices_groups))
4032040330
goto fail;
4032140331
}
40332+
}
40333+
40334+
for(i = 0; i < capture_count; i++) {
40335+
const char *name = NULL;
40336+
uint8_t **match = &capture[2 * i];
40337+
int start = -1;
40338+
int end = -1;
40339+
4032240340
if (group_name_ptr && i > 0) {
40323-
if (*group_name_ptr) {
40324-
if (JS_DefinePropertyValueStr(ctx, groups, group_name_ptr,
40325-
JS_DupValue(ctx, val),
40326-
prop_flags) < 0) {
40341+
if (*group_name_ptr) name = group_name_ptr;
40342+
group_name_ptr += strlen(group_name_ptr) + 1;
40343+
}
40344+
40345+
if (match[0] && match[1]) {
40346+
start = (match[0] - str_buf) >> shift;
40347+
end = (match[1] - str_buf) >> shift;
40348+
}
40349+
40350+
if (!JS_IsUndefined(indices)) {
40351+
JSValue val = JS_UNDEFINED;
40352+
if (start != -1) {
40353+
val = JS_NewArray(ctx);
40354+
if (JS_IsException(val))
40355+
goto fail;
40356+
if (JS_DefinePropertyValueUint32(ctx, val, 0,
40357+
js_int32(start),
40358+
prop_flags) < 0) {
40359+
JS_FreeValue(ctx, val);
40360+
goto fail;
40361+
}
40362+
if (JS_DefinePropertyValueUint32(ctx, val, 1,
40363+
js_int32(end),
40364+
prop_flags) < 0) {
4032740365
JS_FreeValue(ctx, val);
4032840366
goto fail;
4032940367
}
4033040368
}
40331-
group_name_ptr += strlen(group_name_ptr) + 1;
40369+
if (name && !JS_IsUndefined(indices_groups)) {
40370+
val = JS_DupValue(ctx, val);
40371+
if (JS_DefinePropertyValueStr(ctx, indices_groups,
40372+
name, val, prop_flags) < 0) {
40373+
JS_FreeValue(ctx, val);
40374+
goto fail;
40375+
}
40376+
}
40377+
if (JS_DefinePropertyValueUint32(ctx, indices, i, val,
40378+
prop_flags) < 0) {
40379+
goto fail;
40380+
}
40381+
}
40382+
40383+
JSValue val = JS_UNDEFINED;
40384+
if (start != -1) {
40385+
val = js_sub_string(ctx, str, start, end);
40386+
if (JS_IsException(val))
40387+
goto fail;
40388+
}
40389+
40390+
if (name) {
40391+
if (JS_DefinePropertyValueStr(ctx, groups, name,
40392+
JS_DupValue(ctx, val),
40393+
prop_flags) < 0) {
40394+
JS_FreeValue(ctx, val);
40395+
goto fail;
40396+
}
4033240397
}
40398+
4033340399
if (JS_DefinePropertyValueUint32(ctx, obj, i, val, prop_flags) < 0)
4033440400
goto fail;
4033540401
}
40402+
40403+
t = groups, groups = JS_UNDEFINED;
4033640404
if (JS_DefinePropertyValue(ctx, obj, JS_ATOM_groups,
40337-
groups, prop_flags) < 0)
40405+
t, prop_flags) < 0) {
4033840406
goto fail;
40339-
if (JS_DefinePropertyValue(ctx, obj, JS_ATOM_index,
40340-
JS_NewInt32(ctx, (capture[0] - str_buf) >> shift), prop_flags) < 0)
40407+
}
40408+
40409+
t = js_int32((capture[0] - str_buf) >> shift);
40410+
if (JS_DefinePropertyValue(ctx, obj, JS_ATOM_index, t, prop_flags) < 0)
4034140411
goto fail;
40342-
if (JS_DefinePropertyValue(ctx, obj, JS_ATOM_input, str_val, prop_flags) < 0)
40343-
goto fail1;
40412+
40413+
t = str_val, str_val = JS_UNDEFINED;
40414+
if (JS_DefinePropertyValue(ctx, obj, JS_ATOM_input, t, prop_flags) < 0)
40415+
goto fail;
40416+
40417+
if (!JS_IsUndefined(indices)) {
40418+
t = indices_groups, indices_groups = JS_UNDEFINED;
40419+
if (JS_DefinePropertyValue(ctx, indices, JS_ATOM_groups,
40420+
t, prop_flags) < 0) {
40421+
goto fail;
40422+
}
40423+
t = indices, indices = JS_UNDEFINED;
40424+
if (JS_DefinePropertyValue(ctx, obj, JS_ATOM_indices,
40425+
t, prop_flags) < 0) {
40426+
goto fail;
40427+
}
40428+
}
4034440429
}
40345-
js_free(ctx, capture);
40346-
return obj;
40430+
ret = obj;
40431+
obj = JS_UNDEFINED;
4034740432
fail:
40348-
JS_FreeValue(ctx, groups);
40433+
JS_FreeValue(ctx, indices_groups);
40434+
JS_FreeValue(ctx, indices);
4034940435
JS_FreeValue(ctx, str_val);
40350-
fail1:
40436+
JS_FreeValue(ctx, groups);
4035140437
JS_FreeValue(ctx, obj);
4035240438
js_free(ctx, capture);
40353-
return JS_EXCEPTION;
40439+
return ret;
4035440440
}
4035540441

4035640442
/* delete portions of a string that match a given regex */
@@ -41185,12 +41271,13 @@ static const JSCFunctionListEntry js_regexp_funcs[] = {
4118541271
static const JSCFunctionListEntry js_regexp_proto_funcs[] = {
4118641272
JS_CGETSET_DEF("flags", js_regexp_get_flags, NULL ),
4118741273
JS_CGETSET_DEF("source", js_regexp_get_source, NULL ),
41188-
JS_CGETSET_MAGIC_DEF("global", js_regexp_get_flag, NULL, 1 ),
41189-
JS_CGETSET_MAGIC_DEF("ignoreCase", js_regexp_get_flag, NULL, 2 ),
41190-
JS_CGETSET_MAGIC_DEF("multiline", js_regexp_get_flag, NULL, 4 ),
41191-
JS_CGETSET_MAGIC_DEF("dotAll", js_regexp_get_flag, NULL, 8 ),
41192-
JS_CGETSET_MAGIC_DEF("unicode", js_regexp_get_flag, NULL, 16 ),
41193-
JS_CGETSET_MAGIC_DEF("sticky", js_regexp_get_flag, NULL, 32 ),
41274+
JS_CGETSET_MAGIC_DEF("global", js_regexp_get_flag, NULL, LRE_FLAG_GLOBAL ),
41275+
JS_CGETSET_MAGIC_DEF("ignoreCase", js_regexp_get_flag, NULL, LRE_FLAG_IGNORECASE ),
41276+
JS_CGETSET_MAGIC_DEF("multiline", js_regexp_get_flag, NULL, LRE_FLAG_MULTILINE ),
41277+
JS_CGETSET_MAGIC_DEF("dotAll", js_regexp_get_flag, NULL, LRE_FLAG_DOTALL ),
41278+
JS_CGETSET_MAGIC_DEF("unicode", js_regexp_get_flag, NULL, LRE_FLAG_UTF16 ),
41279+
JS_CGETSET_MAGIC_DEF("sticky", js_regexp_get_flag, NULL, LRE_FLAG_STICKY ),
41280+
JS_CGETSET_MAGIC_DEF("hasIndices", js_regexp_get_flag, NULL, LRE_FLAG_INDICES ),
4119441281
JS_CFUNC_DEF("exec", 1, js_regexp_exec ),
4119541282
JS_CFUNC_DEF("compile", 2, js_regexp_compile ),
4119641283
JS_CFUNC_DEF("test", 1, js_regexp_test ),

test262.conf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ Reflect.setPrototypeOf
154154
regexp-dotall
155155
regexp-duplicate-named-groups=skip
156156
regexp-lookbehind
157-
regexp-match-indices=skip
157+
regexp-match-indices
158158
regexp-named-groups
159159
regexp-unicode-property-escapes
160160
regexp-v-flag=skip

0 commit comments

Comments
 (0)