Skip to content

Commit 5d3d5d7

Browse files
committed
[MERGE] Shrink RExC_state_t struct
This converts a bunch of I32 fields in the struct to booleans, and reorders the fields to eliminate holes, taking the size of the struct down from 386 bytes with 4 bytes of holes to 344 bytes with no holes on a 64-bit Linux build.
2 parents b6ec6bc + 1a0ea8b commit 5d3d5d7

File tree

2 files changed

+44
-43
lines changed

2 files changed

+44
-43
lines changed

regcomp.c

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -595,7 +595,7 @@ S_pat_upgrade_to_utf8(pTHX_ RExC_state_t * const pRExC_state,
595595
*plen_p = d - dst;
596596
*pat_p = (char*) dst;
597597
SAVEFREEPV(*pat_p);
598-
RExC_orig_utf8 = RExC_utf8 = 1;
598+
RExC_orig_utf8 = RExC_utf8 = true;
599599
}
600600

601601

@@ -1602,7 +1602,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
16021602
}
16031603

16041604
/* ignore the utf8ness if the pattern is 0 length */
1605-
RExC_utf8 = RExC_orig_utf8 = (plen == 0 || IN_BYTES) ? 0 : SvUTF8(pat);
1605+
RExC_utf8 = RExC_orig_utf8 = (plen == 0 || IN_BYTES) ? false : cBOOL(SvUTF8(pat));
16061606
RExC_strict = cBOOL(pm_flags & RXf_PMf_STRICT);
16071607

16081608

@@ -1637,7 +1637,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
16371637

16381638
if ( old_re
16391639
&& !recompile
1640-
&& cBOOL(RX_UTF8(old_re)) == cBOOL(RExC_utf8)
1640+
&& cBOOL(RX_UTF8(old_re)) == RExC_utf8
16411641
&& ( RX_COMPFLAGS(old_re) == ( orig_rx_flags & RXf_PMf_FLAGCOPYMASK ) )
16421642
&& RX_PRELEN(old_re) == plen
16431643
&& memEQ(RX_PRECOMP(old_re), exp, plen)
@@ -1669,7 +1669,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
16691669
/* Set to use unicode semantics if the pattern is in utf8 and has the
16701670
* 'depends' charset specified, as it means unicode when utf8 */
16711671
set_regex_charset(&rx_flags, REGEX_UNICODE_CHARSET);
1672-
RExC_uni_semantics = 1;
1672+
RExC_uni_semantics = true;
16731673
}
16741674

16751675
RExC_pm_flags = pm_flags;
@@ -1688,14 +1688,14 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
16881688
}
16891689
assert(!pRExC_state->runtime_code_qr);
16901690

1691-
RExC_sawback = 0;
1691+
RExC_sawback = false;
16921692

16931693
RExC_seen = 0;
16941694
RExC_maxlen = 0;
1695-
RExC_in_lookaround = 0;
1695+
RExC_in_lookaround = false;
16961696
RExC_seen_zerolen = *exp == '^' ? -1 : 0;
1697-
RExC_recode_x_to_native = 0;
1698-
RExC_in_multi_char_class = 0;
1697+
RExC_recode_x_to_native = false;
1698+
RExC_in_multi_char_class = false;
16991699

17001700
RExC_start = RExC_copy_start_in_constructed = RExC_copy_start_in_input = RExC_precomp = exp;
17011701
RExC_precomp_end = RExC_end = exp + plen;
@@ -2910,7 +2910,7 @@ S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state,
29102910
RExC_rxi->data->data[num]=(void*)sv_dat;
29112911
SvREFCNT_inc_simple_void_NN(sv_dat);
29122912
}
2913-
RExC_sawback = 1;
2913+
RExC_sawback = true;
29142914
ret = reg2node(pRExC_state,
29152915
((! FOLD)
29162916
? REFN
@@ -2945,7 +2945,7 @@ S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state,
29452945
* If the construct is empty generates a NOTHING op and returns its
29462946
* regnode_offset, which the caller would then return to its caller.
29472947
*
2948-
* If the construct is not empty increments RExC_in_lookaround, and turns
2948+
* If the construct is not empty sets RExC_in_lookaround, and turns
29492949
* on any flags provided in RExC_seen, and then returns 0 to signify
29502950
* that parsing should continue.
29512951
*
@@ -2976,7 +2976,7 @@ S_reg_la_NOTHING(pTHX_ RExC_state_t *pRExC_state, U32 flags,
29762976
}
29772977

29782978
RExC_seen |= flags;
2979-
RExC_in_lookaround++;
2979+
RExC_in_lookaround = true;
29802980
return 0; /* keep parsing! */
29812981
}
29822982

@@ -2993,7 +2993,7 @@ S_reg_la_NOTHING(pTHX_ RExC_state_t *pRExC_state, U32 flags,
29932993
* If the construct is empty generates an OPFAIL op and returns its
29942994
* regnode_offset which the caller should then return to its caller.
29952995
*
2996-
* If the construct is not empty increments RExC_in_lookaround, and also
2996+
* If the construct is not empty sets RExC_in_lookaround, and also
29972997
* increments RExC_seen_zerolen, and turns on the flags provided in
29982998
* RExC_seen, and then returns 0 to signify that parsing should continue.
29992999
*
@@ -3026,7 +3026,7 @@ S_reg_la_OPFAIL(pTHX_ RExC_state_t *pRExC_state, U32 flags,
30263026
* does not match ever. */
30273027
RExC_seen_zerolen++;
30283028
RExC_seen |= flags;
3029-
RExC_in_lookaround++;
3029+
RExC_in_lookaround = true;
30303030
return 0; /* keep parsing! */
30313031
}
30323032

@@ -3105,7 +3105,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
31053105
I32 after_freeze = 0;
31063106
I32 num; /* numeric backreferences */
31073107
SV * max_open; /* Max number of unclosed parens */
3108-
I32 was_in_lookaround = RExC_in_lookaround;
3108+
bool was_in_lookaround = RExC_in_lookaround;
31093109
I32 fake_eval = 0; /* matches paren */
31103110

31113111
/* The difference between the following variables can be seen with *
@@ -3427,7 +3427,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
34273427
}
34283428

34293429
RExC_seen_zerolen++;
3430-
RExC_in_lookaround++;
3430+
RExC_in_lookaround = true;
34313431
RExC_seen |= seen_flag_set;
34323432

34333433
RExC_parse_set(start_arg);
@@ -5453,7 +5453,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
54535453

54545454
/* The values are Unicode, and therefore have to be converted to native
54555455
* on a non-Unicode (meaning non-ASCII) platform. */
5456-
SET_recode_x_to_native(1);
5456+
SET_recode_x_to_native(true);
54575457
}
54585458

54595459
/* Here, we have the string the name evaluates to, ready to be parsed,
@@ -5479,7 +5479,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
54795479
RExC_start = save_start;
54805480
RExC_parse_set(endbrace);
54815481
RExC_end = orig_end;
5482-
SET_recode_x_to_native(0);
5482+
SET_recode_x_to_native(true);
54835483

54845484
SvREFCNT_dec_NN(substitute_parse);
54855485

@@ -5901,7 +5901,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
59015901
RExC_seen_d_op = true;
59025902
}
59035903
else if (op == BOUNDL) {
5904-
RExC_contains_locale = 1;
5904+
RExC_contains_locale = true;
59055905
}
59065906

59075907
if (invert) {
@@ -6203,7 +6203,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
62036203
REQUIRE_PARENS_PASS;
62046204
}
62056205
}
6206-
RExC_sawback = 1;
6206+
RExC_sawback = true;
62076207
ret = reg2node(pRExC_state,
62086208
((! FOLD)
62096209
? REF
@@ -6770,7 +6770,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
67706770
* existing node, so can start a new node with this one */
67716771
if (! len) {
67726772
node_type = EXACTFL;
6773-
RExC_contains_locale = 1;
6773+
RExC_contains_locale = true;
67746774
}
67756775
else if (node_type == EXACT) {
67766776
p = oldp;
@@ -10822,7 +10822,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
1082210822
RExC_parse_set( RExC_start );
1082310823
RExC_copy_start_in_constructed = RExC_start + constructed_prefix_len;
1082410824
RExC_end = RExC_parse + len;
10825-
RExC_in_multi_char_class = 1;
10825+
RExC_in_multi_char_class = true;
1082610826

1082710827
ret = reg(pRExC_state, 1, &reg_flags, depth+1);
1082810828

@@ -10832,7 +10832,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
1083210832
RExC_parse_set(save_parse);
1083310833
RExC_start = RExC_copy_start_in_constructed = RExC_copy_start_in_input = save_start;
1083410834
RExC_end = save_end;
10835-
RExC_in_multi_char_class = 0;
10835+
RExC_in_multi_char_class = false;
1083610836
SvREFCNT_dec_NN(multi_char_matches);
1083710837
SvREFCNT_dec(properties);
1083810838
SvREFCNT_dec(cp_list);
@@ -11245,7 +11245,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
1124511245
}
1124611246

1124711247
if (anyof_flags & ANYOF_LOCALE_FLAGS) {
11248-
RExC_contains_locale = 1;
11248+
RExC_contains_locale = true;
1124911249
}
1125011250

1125111251
if (optimizable) {

regcomp_internal.h

Lines changed: 21 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,16 @@ struct RExC_state_t {
5353
regnode *emit_start; /* Start of emitted-code area */
5454
regnode_offset emit; /* Code-emit pointer */
5555
I32 naughty; /* How bad is this pattern? */
56-
I32 sawback; /* Did we see \1, ...? */
56+
bool sawback; /* Did we see \1, ...? */
57+
58+
bool utf8; /* whether the pattern is utf8 or not */
59+
bool orig_utf8; /* whether the pattern was originally in utf8 */
60+
/* XXX use this for future optimisation of case
61+
* where pattern must be upgraded to utf8. */
62+
bool uni_semantics; /* If a d charset modifier should use unicode
63+
rules, even if the pattern is not in
64+
utf8 */
65+
5766
SSize_t size; /* Number of regnode equivalents in
5867
pattern */
5968
Size_t sets_depth; /* Counts recursion depth of already-
@@ -127,37 +136,29 @@ struct RExC_state_t {
127136
accept */
128137
I32 seen_zerolen;
129138
regnode *end_op; /* END node in program */
130-
I32 utf8; /* whether the pattern is utf8 or not */
131-
I32 orig_utf8; /* whether the pattern was originally in utf8 */
132-
/* XXX use this for future optimisation of case
133-
* where pattern must be upgraded to utf8. */
134-
I32 uni_semantics; /* If a d charset modifier should use unicode
135-
rules, even if the pattern is not in
136-
utf8 */
137-
139+
bool in_lookaround;
140+
bool contains_locale;
141+
bool recode_x_to_native;
142+
bool in_multi_char_class;
138143
I32 recurse_count; /* Number of recurse regops we have generated */
139144
regnode **recurse; /* Recurse regops */
140145
U8 *study_chunk_recursed; /* bitmap of which subs we have moved
141146
through */
142147
U32 study_chunk_recursed_bytes; /* bytes in bitmap */
143-
I32 in_lookaround;
144-
I32 contains_locale;
145-
I32 recode_x_to_native;
146-
I32 in_multi_char_class;
147148
int code_index; /* next code_blocks[] slot */
148149
struct reg_code_blocks *code_blocks;/* positions of literal (?{})
149150
within pattern */
150151
SSize_t maxlen; /* minimum possible number of chars in string to match */
151152
scan_frame *frame_head;
152153
scan_frame *frame_last;
153154
U32 frame_count;
154-
AV *warn_text;
155-
HV *unlexed_names;
156-
SV *runtime_code_qr; /* qr with the runtime code blocks */
157155
bool seen_d_op;
158156
bool strict;
159157
bool study_started;
160158
bool in_script_run;
159+
AV *warn_text;
160+
HV *unlexed_names;
161+
SV *runtime_code_qr; /* qr with the runtime code blocks */
161162
bool use_BRANCHJ;
162163
bool sWARN_EXPERIMENTAL__VLB;
163164
bool sWARN_EXPERIMENTAL__REGEX_SETS;
@@ -170,12 +171,12 @@ struct RExC_state_t {
170171
* See GH Issue #21558 and also ba6e2c38aafc23cf114f3ba0d0ff3baead34328b
171172
*/
172173
#if defined(DEBUGGING) || !defined(USE_DYNAMIC_LOADING)
173-
const char *lastparse;
174174
I32 lastnum;
175-
U32 study_chunk_recursed_count;
175+
const char *lastparse;
176176
AV *paren_name_list; /* idx -> name */
177177
SV *mysv1;
178178
SV *mysv2;
179+
U32 study_chunk_recursed_count;
179180
#endif
180181
};
181182

@@ -497,7 +498,7 @@ struct RExC_state_t {
497498
STMT_START { \
498499
if (DEPENDS_SEMANTICS) { \
499500
set_regex_charset(&RExC_flags, REGEX_UNICODE_CHARSET); \
500-
RExC_uni_semantics = 1; \
501+
RExC_uni_semantics = true; \
501502
if (RExC_seen_d_op && LIKELY(! IN_PARENS_PASS)) { \
502503
/* No need to restart the parse if we haven't seen \
503504
* anything that differs between /u and /d, and no need \
@@ -740,7 +741,7 @@ static const scan_data_t zero_scan_data = {
740741

741742

742743

743-
#define UTF cBOOL(RExC_utf8)
744+
#define UTF RExC_utf8
744745

745746
/* The enums for all these are ordered so things work out correctly */
746747
#define LOC (get_regex_charset(RExC_flags) == REGEX_LOCALE_CHARSET)

0 commit comments

Comments
 (0)