Skip to content

Commit ea16201

Browse files
authored
De-macro the pcre2test code (#797)
The current style in pcre2test.c is rather hard to work with. Every little call to PCRE2 functions has to be guarded behind many layers of complicated macros like "SET", "CASTVAR", and more. At the cost of a modest increase in binary size, I would like to split out roughly half of the logic in pcre2test.c into a separate file, which can be multiply-included.
1 parent dd0697f commit ea16201

File tree

4 files changed

+2216
-14677
lines changed

4 files changed

+2216
-14677
lines changed

src/pcre2_internal.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,6 @@ there will be many irrelevant consequential errors. */
7474
PCRE2_CODE_UNIT_WIDTH != 16 && \
7575
PCRE2_CODE_UNIT_WIDTH != 32))
7676
#error PCRE2_CODE_UNIT_WIDTH must be defined as 8, 16, or 32.
77-
#include <AbandonCompile>
7877
#endif
7978

8079

src/pcre2_intmodedep.h

Lines changed: 47 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,16 @@ to have access to the hidden structures at all supported widths.
4747
4848
Some of the mode-dependent macros are required at different widths for
4949
different parts of the pcre2test code (in particular, the included
50-
pcre2_printint_inc.h file). We undefine them here so that they can be re-defined for
51-
multiple inclusions. Not all of these are used in pcre2test, but it's easier
52-
just to undefine them all. */
50+
pcre2_printint_inc.h file). We undefine them here so that they can be re-defined
51+
for multiple inclusions. Not all of these are used in pcre2test, but it's easier
52+
just to undefine them all.
53+
54+
You can also include pcre2_intmodedep.h with PCRE2_CODE_UNIT_WIDTH defined to
55+
zero in order to simply clear the previous macros. */
56+
57+
#ifndef PCRE2_CODE_UNIT_WIDTH
58+
#error PCRE2_CODE_UNIT_WIDTH must be defined
59+
#endif
5360

5461
#undef ACROSSCHAR
5562
#undef BACKCHAR
@@ -81,9 +88,14 @@ just to undefine them all. */
8188
#undef PUTINC
8289
#undef TABLE_GET
8390

91+
/*************************************************
92+
* MACROS *
93+
*************************************************/
8494

95+
/* Macros may be undefined and re-defined if the same file handles multiple
96+
bit-widths. */
8597

86-
/* -------------------------- MACROS ----------------------------- */
98+
#if PCRE2_CODE_UNIT_WIDTH != 0
8799

88100
/* PCRE keeps offsets in its compiled code as at least 16-bit quantities
89101
(always stored in big-endian order in 8-bit mode) by default. These are used,
@@ -202,7 +214,7 @@ arithmetic results in a signed value. Hence the cast. */
202214
#define GET2(a,n) (unsigned int)(((a)[n] << 8) | (a)[(n)+1])
203215
#define PUT2(a,n,d) a[n] = (d) >> 8, a[(n)+1] = (d) & 255
204216

205-
#else /* Code units are 16 or 32 bits */
217+
#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
206218
#define IMM2_SIZE 1
207219
#define GET2(a,n) a[n]
208220
#define PUT2(a,n,d) a[n] = d
@@ -227,7 +239,7 @@ check is needed before accessing these tables. */
227239
#define CHMAX_255(c) TRUE
228240
#endif /* SUPPORT_UNICODE */
229241

230-
#else /* Code units are 16 or 32 bits */
242+
#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
231243
#define CHMAX_255(c) ((c) <= 255u)
232244
#define MAX_255(c) ((c) <= 255u)
233245
#define MAX_MARK ((1u << 16) - 1)
@@ -474,7 +486,7 @@ code. */
474486

475487
/* ------------------- 32-bit support ------------------ */
476488

477-
#else
489+
#elif PCRE2_CODE_UNIT_WIDTH == 32
478490

479491
/* These are trivial for the 32-bit library, since all UTF-32 characters fit
480492
into one PCRE2_UCHAR unit. */
@@ -555,6 +567,32 @@ These are all no-ops since all UTF-32 characters fit into one PCRE2_UCHAR. */
555567
#define PUTINC(a,n,d) PUT(a,n,d), a += LINK_SIZE
556568
#define PUT2INC(a,n,d) PUT2(a,n,d), a += IMM2_SIZE
557569

570+
#endif /* PCRE2_CODE_UNIT_WIDTH != 0 */
571+
572+
573+
574+
/*************************************************
575+
* STRUCTURES *
576+
*************************************************/
577+
578+
/* We need a more complex include guard than usual, because the file can be
579+
included once for each bit-width to define the various structures. */
580+
581+
#if PCRE2_CODE_UNIT_WIDTH == 8 && !defined PCRE2_INTMODEDEP_IDEMPOTENT_GUARD_8
582+
#define PCRE2_INTMODEDEP_IDEMPOTENT_GUARD_8
583+
#define PCRE2_INTMODEDEP_CAN_DEFINE
584+
#endif
585+
#if PCRE2_CODE_UNIT_WIDTH == 16 && !defined PCRE2_INTMODEDEP_IDEMPOTENT_GUARD_16
586+
#define PCRE2_INTMODEDEP_IDEMPOTENT_GUARD_16
587+
#define PCRE2_INTMODEDEP_CAN_DEFINE
588+
#endif
589+
#if PCRE2_CODE_UNIT_WIDTH == 32 && !defined PCRE2_INTMODEDEP_IDEMPOTENT_GUARD_32
590+
#define PCRE2_INTMODEDEP_IDEMPOTENT_GUARD_32
591+
#define PCRE2_INTMODEDEP_CAN_DEFINE
592+
#endif
593+
594+
#ifdef PCRE2_INTMODEDEP_CAN_DEFINE
595+
#undef PCRE2_INTMODEDEP_CAN_DEFINE
558596

559597
/* ----------------------- HIDDEN STRUCTURES ----------------------------- */
560598

@@ -998,4 +1036,6 @@ typedef struct dfa_match_block {
9981036

9991037
#endif /* PCRE2_PCRE2TEST */
10001038

1039+
#endif /* PCRE2_INTMODEDEP_CAN_DEFINE */
1040+
10011041
/* End of pcre2_intmodedep.h */

0 commit comments

Comments
 (0)