Skip to content

Commit c3e8299

Browse files
authored
Merge pull request #1950 from pocke/dedup-empty-arrays-and-hashes
Dedup empty arrays in parsing
2 parents 86dd467 + ff7c2c0 commit c3e8299

File tree

6 files changed

+116
-48
lines changed

6 files changed

+116
-48
lines changed

Gemfile.lock

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ GEM
7171
psych (4.0.6)
7272
stringio
7373
public_suffix (6.0.1)
74-
raap (0.8.0)
74+
raap (1.0.0)
7575
rbs (~> 3.0)
7676
timeout (~> 0.4)
7777
racc (1.8.1)

ext/rbs_extension/parser.c

Lines changed: 63 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,14 @@ typedef struct {
5252
VALUE rest_keywords;
5353
} method_params;
5454

55+
static VALUE EMPTY_ARRAY;
56+
57+
static void inline melt_array(VALUE *array) {
58+
if (*array == EMPTY_ARRAY) {
59+
*array = rb_ary_new();
60+
}
61+
}
62+
5563
static bool rbs_is_untyped_params(method_params *params) {
5664
return NIL_P(params->required_positionals);
5765
}
@@ -137,7 +145,7 @@ void parser_advance_no_gap(parserstate *state) {
137145
*/
138146
VALUE parse_type_name(parserstate *state, TypeNameKind kind, range *rg) {
139147
VALUE absolute = Qfalse;
140-
VALUE path = rb_ary_new();
148+
VALUE path = EMPTY_ARRAY;
141149
VALUE namespace;
142150

143151
if (rg) {
@@ -155,6 +163,7 @@ VALUE parse_type_name(parserstate *state, TypeNameKind kind, range *rg) {
155163
&& state->current_token.range.end.byte_pos == state->next_token.range.start.byte_pos
156164
&& state->next_token.range.end.byte_pos == state->next_token2.range.start.byte_pos
157165
) {
166+
melt_array(&path);
158167
rb_ary_push(path, ID2SYM(INTERN_TOKEN(state, state->current_token)));
159168

160169
parser_advance(state);
@@ -211,9 +220,10 @@ VALUE parse_type_name(parserstate *state, TypeNameKind kind, range *rg) {
211220
type_list ::= {} type `,` ... <`,`> eol
212221
| {} type `,` ... `,` <type> eol
213222
*/
214-
static VALUE parse_type_list(parserstate *state, enum TokenType eol, VALUE types) {
223+
static void parse_type_list(parserstate *state, enum TokenType eol, VALUE *types) {
215224
while (true) {
216-
rb_ary_push(types, parse_type(state));
225+
melt_array(types);
226+
rb_ary_push(*types, parse_type(state));
217227

218228
if (state->next_token.type == pCOMMA) {
219229
parser_advance(state);
@@ -233,8 +243,6 @@ static VALUE parse_type_list(parserstate *state, enum TokenType eol, VALUE types
233243
}
234244
}
235245
}
236-
237-
return types;
238246
}
239247

240248
static bool is_keyword_token(enum TokenType type) {
@@ -435,6 +443,7 @@ static void parse_params(parserstate *state, method_params *params) {
435443
}
436444

437445
param = parse_function_param(state);
446+
melt_array(&params->required_positionals);
438447
rb_ary_push(params->required_positionals, param);
439448

440449
break;
@@ -460,6 +469,7 @@ static void parse_params(parserstate *state, method_params *params) {
460469
}
461470

462471
param = parse_function_param(state);
472+
melt_array(&params->optional_positionals);
463473
rb_ary_push(params->optional_positionals, param);
464474

465475
break;
@@ -503,6 +513,7 @@ static void parse_params(parserstate *state, method_params *params) {
503513
}
504514

505515
param = parse_function_param(state);
516+
melt_array(&params->trailing_positionals);
506517
rb_ary_push(params->trailing_positionals, param);
507518

508519
break;
@@ -593,10 +604,10 @@ static VALUE parse_optional(parserstate *state) {
593604
}
594605

595606
static void initialize_method_params(method_params *params){
596-
params->required_positionals = rb_ary_new();
597-
params->optional_positionals = rb_ary_new();
607+
params->required_positionals = EMPTY_ARRAY;
608+
params->optional_positionals = EMPTY_ARRAY;
598609
params->rest_positionals = Qnil;
599-
params->trailing_positionals = rb_ary_new();
610+
params->trailing_positionals = EMPTY_ARRAY;
600611
params->required_keywords = rb_hash_new();
601612
params->optional_keywords = rb_hash_new();
602613
params->rest_keywords = Qnil;
@@ -863,7 +874,7 @@ static VALUE parse_instance_type(parserstate *state, bool parse_alias) {
863874
}
864875

865876
VALUE typename = parse_type_name(state, expected_kind, &name_range);
866-
VALUE types = rb_ary_new();
877+
VALUE types = EMPTY_ARRAY;
867878

868879
TypeNameKind kind;
869880
if (state->current_token.type == tUIDENT) {
@@ -879,7 +890,7 @@ static VALUE parse_instance_type(parserstate *state, bool parse_alias) {
879890
if (state->next_token.type == pLBRACKET) {
880891
parser_advance(state);
881892
args_range.start = state->current_token.range.start;
882-
parse_type_list(state, pRBRACKET, types);
893+
parse_type_list(state, pRBRACKET, &types);
883894
parser_advance_assert(state, pRBRACKET);
884895
args_range.end = state->current_token.range.end;
885896
} else {
@@ -1018,17 +1029,17 @@ static VALUE parse_simple(parserstate *state) {
10181029
case pLBRACKET: {
10191030
range rg;
10201031
rg.start = state->current_token.range.start;
1021-
VALUE types = rb_ary_new();
1032+
VALUE types = EMPTY_ARRAY;
10221033
if (state->next_token.type != pRBRACKET) {
1023-
parse_type_list(state, pRBRACKET, types);
1034+
parse_type_list(state, pRBRACKET, &types);
10241035
}
10251036
parser_advance_assert(state, pRBRACKET);
10261037
rg.end = state->current_token.range.end;
10271038

10281039
return rbs_tuple(types, rbs_new_location(state->buffer, rg));
10291040
}
10301041
case pAREF_OPR: {
1031-
return rbs_tuple(rb_ary_new(), rbs_new_location(state->buffer, state->current_token.range));
1042+
return rbs_tuple(EMPTY_ARRAY, rbs_new_location(state->buffer, state->current_token.range));
10321043
}
10331044
case pLBRACE: {
10341045
position start = state->current_token.range.start;
@@ -1113,7 +1124,7 @@ VALUE parse_type(parserstate *state) {
11131124
type_param ::= tUIDENT (module_type_params == false)
11141125
*/
11151126
VALUE parse_type_params(parserstate *state, range *rg, bool module_type_params) {
1116-
VALUE params = rb_ary_new();
1127+
VALUE params = EMPTY_ARRAY;
11171128

11181129
if (state->next_token.type == pLBRACKET) {
11191130
parser_advance(state);
@@ -1189,6 +1200,7 @@ VALUE parse_type_params(parserstate *state, range *rg, bool module_type_params)
11891200
rbs_loc_add_optional_child(loc, rb_intern("upper_bound"), upper_bound_range);
11901201

11911202
VALUE param = rbs_ast_type_param(name, variance, unchecked, upper_bound, location);
1203+
melt_array(&params);
11921204
rb_ary_push(params, param);
11931205

11941206
if (state->next_token.type == pCOMMA) {
@@ -1428,7 +1440,7 @@ VALUE parse_annotation(parserstate *state) {
14281440
annotations ::= {} annotation ... <annotation>
14291441
| {<>}
14301442
*/
1431-
void parse_annotations(parserstate *state, VALUE annotations, position *annot_pos) {
1443+
void parse_annotations(parserstate *state, VALUE *annotations, position *annot_pos) {
14321444
*annot_pos = NullPosition;
14331445

14341446
while (true) {
@@ -1439,7 +1451,8 @@ void parse_annotations(parserstate *state, VALUE annotations, position *annot_po
14391451
*annot_pos = state->current_token.range.start;
14401452
}
14411453

1442-
rb_ary_push(annotations, parse_annotation(state));
1454+
melt_array(annotations);
1455+
rb_ary_push(*annotations, parse_annotation(state));
14431456
} else {
14441457
break;
14451458
}
@@ -1623,11 +1636,11 @@ VALUE parse_member_def(parserstate *state, bool instance_only, bool accept_overl
16231636

16241637
bool loop = true;
16251638
while (loop) {
1626-
VALUE annotations = rb_ary_new();
1639+
VALUE annotations = EMPTY_ARRAY;
16271640
position overload_annot_pos = NullPosition;
16281641

16291642
if (state->next_token.type == tANNOTATION) {
1630-
parse_annotations(state, annotations, &overload_annot_pos);
1643+
parse_annotations(state, &annotations, &overload_annot_pos);
16311644
}
16321645

16331646
switch (state->next_token.type) {
@@ -1718,7 +1731,7 @@ VALUE parse_member_def(parserstate *state, bool instance_only, bool accept_overl
17181731
*
17191732
* @param kind
17201733
* */
1721-
void class_instance_name(parserstate *state, TypeNameKind kind, VALUE *name, VALUE args, range *name_range, range *args_range) {
1734+
void class_instance_name(parserstate *state, TypeNameKind kind, VALUE *name, VALUE *args, range *name_range, range *args_range) {
17221735
parser_advance(state);
17231736

17241737
*name = parse_type_name(state, kind, name_range);
@@ -1785,11 +1798,11 @@ VALUE parse_mixin_member(parserstate *state, bool from_interface, position comme
17851798
parser_push_typevar_table(state, reset_typevar_scope);
17861799

17871800
VALUE name;
1788-
VALUE args = rb_ary_new();
1801+
VALUE args = EMPTY_ARRAY;
17891802
class_instance_name(
17901803
state,
17911804
from_interface ? INTERFACE_NAME : (INTERFACE_NAME | CLASS_NAME),
1792-
&name, args, &name_range, &args_range
1805+
&name, &args, &name_range, &args_range
17931806
);
17941807

17951808
parser_pop_typevar_table(state);
@@ -2141,13 +2154,13 @@ VALUE parse_attribute_member(parserstate *state, position comment_pos, VALUE ann
21412154
| alias_member (instance only)
21422155
*/
21432156
VALUE parse_interface_members(parserstate *state) {
2144-
VALUE members = rb_ary_new();
2157+
VALUE members = EMPTY_ARRAY;
21452158

21462159
while (state->next_token.type != kEND) {
2147-
VALUE annotations = rb_ary_new();
2160+
VALUE annotations = EMPTY_ARRAY;
21482161
position annot_pos = NullPosition;
21492162

2150-
parse_annotations(state, annotations, &annot_pos);
2163+
parse_annotations(state, &annotations, &annot_pos);
21512164

21522165
parser_advance(state);
21532166

@@ -2175,6 +2188,7 @@ VALUE parse_interface_members(parserstate *state) {
21752188
);
21762189
}
21772190

2191+
melt_array(&members);
21782192
rb_ary_push(members, member);
21792193
}
21802194

@@ -2231,7 +2245,7 @@ VALUE parse_interface_decl(parserstate *state, position comment_pos, VALUE annot
22312245
module_self_type ::= <module_name>
22322246
| module_name `[` type_list <`]`>
22332247
*/
2234-
void parse_module_self_types(parserstate *state, VALUE array) {
2248+
void parse_module_self_types(parserstate *state, VALUE *array) {
22352249
while (true) {
22362250
range self_range;
22372251
range name_range;
@@ -2244,11 +2258,11 @@ void parse_module_self_types(parserstate *state, VALUE array) {
22442258
VALUE module_name = parse_type_name(state, CLASS_NAME | INTERFACE_NAME, &name_range);
22452259
self_range.end = name_range.end;
22462260

2247-
VALUE args = rb_ary_new();
2261+
VALUE args = EMPTY_ARRAY;
22482262
if (state->next_token.type == pLBRACKET) {
22492263
parser_advance(state);
22502264
args_range.start = state->current_token.range.start;
2251-
parse_type_list(state, pRBRACKET, args);
2265+
parse_type_list(state, pRBRACKET, &args);
22522266
parser_advance(state);
22532267
self_range.end = args_range.end = state->current_token.range.end;
22542268
}
@@ -2260,7 +2274,8 @@ void parse_module_self_types(parserstate *state, VALUE array) {
22602274
rbs_loc_add_optional_child(loc, rb_intern("args"), args_range);
22612275

22622276
VALUE self_type = rbs_ast_decl_module_self(module_name, args, location);
2263-
rb_ary_push(array, self_type);
2277+
melt_array(array);
2278+
rb_ary_push(*array, self_type);
22642279

22652280
if (state->next_token.type == pCOMMA) {
22662281
parser_advance(state);
@@ -2284,14 +2299,14 @@ VALUE parse_nested_decl(parserstate *state, const char *nested_in, position anno
22842299
| `private`
22852300
*/
22862301
VALUE parse_module_members(parserstate *state) {
2287-
VALUE members = rb_ary_new();
2302+
VALUE members = EMPTY_ARRAY;
22882303

22892304
while (state->next_token.type != kEND) {
22902305
VALUE member;
2291-
VALUE annotations = rb_ary_new();
2306+
VALUE annotations = EMPTY_ARRAY;
22922307
position annot_pos = NullPosition;
22932308

2294-
parse_annotations(state, annotations, &annot_pos);
2309+
parse_annotations(state, &annotations, &annot_pos);
22952310

22962311
parser_advance(state);
22972312

@@ -2349,6 +2364,7 @@ VALUE parse_module_members(parserstate *state) {
23492364
break;
23502365
}
23512366

2367+
melt_array(&members);
23522368
rb_ary_push(members, member);
23532369
}
23542370

@@ -2371,13 +2387,13 @@ VALUE parse_module_decl0(parserstate *state, range keyword_range, VALUE module_n
23712387
decl_range.start = keyword_range.start;
23722388

23732389
VALUE type_params = parse_type_params(state, &type_params_range, true);
2374-
VALUE self_types = rb_ary_new();
2390+
VALUE self_types = EMPTY_ARRAY;
23752391

23762392
if (state->next_token.type == pCOLON) {
23772393
parser_advance(state);
23782394
colon_range = state->current_token.range;
23792395
self_types_range.start = state->next_token.range.start;
2380-
parse_module_self_types(state, self_types);
2396+
parse_module_self_types(state, &self_types);
23812397
self_types_range.end = state->current_token.range.end;
23822398
} else {
23832399
colon_range = NULL_RANGE;
@@ -2472,8 +2488,8 @@ VALUE parse_class_decl_super(parserstate *state, range *lt_range) {
24722488
*lt_range = state->current_token.range;
24732489
super_range.start = state->next_token.range.start;
24742490

2475-
args = rb_ary_new();
2476-
class_instance_name(state, CLASS_NAME, &name, args, &name_range, &args_range);
2491+
args = EMPTY_ARRAY;
2492+
class_instance_name(state, CLASS_NAME, &name, &args, &name_range, &args_range);
24772493

24782494
super_range.end = state->current_token.range.end;
24792495

@@ -2626,10 +2642,10 @@ VALUE parse_nested_decl(parserstate *state, const char *nested_in, position anno
26262642
}
26272643

26282644
VALUE parse_decl(parserstate *state) {
2629-
VALUE annotations = rb_ary_new();
2645+
VALUE annotations = EMPTY_ARRAY;
26302646
position annot_pos = NullPosition;
26312647

2632-
parse_annotations(state, annotations, &annot_pos);
2648+
parse_annotations(state, &annotations, &annot_pos);
26332649

26342650
parser_advance(state);
26352651
switch (state->current_token.type) {
@@ -2670,10 +2686,11 @@ VALUE parse_namespace(parserstate *state, range *rg) {
26702686
parser_advance(state);
26712687
}
26722688

2673-
VALUE path = rb_ary_new();
2689+
VALUE path = EMPTY_ARRAY;
26742690

26752691
while (true) {
26762692
if (state->next_token.type == tUIDENT && state->next_token2.type == pCOLON2) {
2693+
melt_array(&path);
26772694
rb_ary_push(path, ID2SYM(INTERN_TOKEN(state, state->next_token)));
26782695
if (null_position_p(rg->start)) {
26792696
rg->start = state->next_token.range.start;
@@ -2813,14 +2830,16 @@ VALUE parse_use_directive(parserstate *state) {
28132830
}
28142831

28152832
VALUE parse_signature(parserstate *state) {
2816-
VALUE dirs = rb_ary_new();
2817-
VALUE decls = rb_ary_new();
2833+
VALUE dirs = EMPTY_ARRAY;
2834+
VALUE decls = EMPTY_ARRAY;
28182835

28192836
while (state->next_token.type == kUSE) {
2837+
melt_array(&dirs);
28202838
rb_ary_push(dirs, parse_use_directive(state));
28212839
}
28222840

28232841
while (state->next_token.type != pEOF) {
2842+
melt_array(&decls);
28242843
rb_ary_push(decls, parse_decl(state));
28252844
}
28262845

@@ -2943,6 +2962,10 @@ rbsparser_lex(VALUE self, VALUE buffer, VALUE end_pos) {
29432962
void rbs__init_parser(void) {
29442963
RBS_Parser = rb_define_class_under(RBS, "Parser", rb_cObject);
29452964
rb_gc_register_mark_object(RBS_Parser);
2965+
VALUE empty_array = rb_obj_freeze(rb_ary_new());
2966+
rb_gc_register_mark_object(empty_array);
2967+
EMPTY_ARRAY = empty_array;
2968+
29462969
rb_define_singleton_method(RBS_Parser, "_parse_type", rbsparser_parse_type, 5);
29472970
rb_define_singleton_method(RBS_Parser, "_parse_method_type", rbsparser_parse_method_type, 5);
29482971
rb_define_singleton_method(RBS_Parser, "_parse_signature", rbsparser_parse_signature, 2);

0 commit comments

Comments
 (0)