Skip to content

Commit 8d8736e

Browse files
authored
Address three minor TODO comments in the test code
1 parent 0ee6302 commit 8d8736e

File tree

8 files changed

+262
-114
lines changed

8 files changed

+262
-114
lines changed

maint/GenerateUcd.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -695,7 +695,6 @@ def write_bitsets(list, item_size):
695695
found = 1
696696

697697
# Add new characters to an existing set
698-
# TODO: make sure the data doesn't overflow a list[]
699698

700699
if found:
701700
found = 0

src/pcre2_convert.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1119,12 +1119,16 @@ pcre2_pattern_convert(PCRE2_SPTR pattern, PCRE2_SIZE plength, uint32_t options,
11191119
pcre2_convert_context *ccontext)
11201120
{
11211121
int rc;
1122+
PCRE2_UCHAR null_str[1] = { 0xcd };
11221123
PCRE2_UCHAR dummy_buffer[DUMMY_BUFFER_SIZE];
11231124
PCRE2_UCHAR *use_buffer = dummy_buffer;
11241125
PCRE2_SIZE use_length = DUMMY_BUFFER_SIZE;
11251126
BOOL utf = (options & PCRE2_CONVERT_UTF) != 0;
11261127
uint32_t pattype = options & TYPE_OPTIONS;
11271128

1129+
if (pattern == NULL && plength == 0)
1130+
pattern = null_str;
1131+
11281132
if (pattern == NULL || bufflenptr == NULL)
11291133
{
11301134
if (bufflenptr != NULL) *bufflenptr = 0; /* Error offset */

src/pcre2test.c

Lines changed: 50 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,8 @@ claim to be C99 don't support it (hence DISABLE_PERCENT_ZT). */
214214
#endif
215215

216216
#define CFORE_UNSET UINT32_MAX /* Unset value for startend/cfail/cerror fields */
217-
#define CONVERT_UNSET UINT32_MAX /* Unset value for convert_type field */
217+
#define CONVERT_UNSET UINT32_MAX /* Unset value for convert_type/convert_length fields */
218+
#define MOD_STR_UNSET UINT8_MAX /* Sentinel length for unset string options */
218219
#define DFA_WS_DIMENSION 1000 /* Size of DFA workspace */
219220
#define DEFAULT_OVECCOUNT 15 /* Default ovector count */
220221
#define JUNK_OFFSET 0xdeadbeef /* For initializing ovector */
@@ -517,7 +518,7 @@ enum { MOD_CTC, /* Applies to a compile context */
517518
MOD_OPT, /* Is an option bit */
518519
MOD_OPTMZ, /* Is an optimization directive */
519520
MOD_SIZ, /* Is a PCRE2_SIZE value */
520-
MOD_STR }; /* Is a string */
521+
MOD_STR }; /* Is a string; Pascal-encoded with length in first byte */
521522

522523
/* Control bits. Some apply to compiling, some to matching, but some can be set
523524
either on a pattern or a data line, so they must all be distinct. There are now
@@ -616,15 +617,15 @@ different things in the two cases. */
616617

617618
/* Structures for holding modifier information for patterns and subject strings
618619
(data). Fields containing modifiers that can be set either for a pattern or a
619-
subject must be at the start and in the same order in both cases so that the
620-
same offset in the big table below works for both. */
620+
subject (MOD_PD[P]/MOD_PND) must be at the start and in the same order in both
621+
structures so that the same offset in the big table below works for both. */
621622

622623
typedef struct patctl { /* Structure for pattern modifiers. */
623624
uint32_t options; /* Must be in same position as datctl */
624625
uint32_t control; /* Must be in same position as datctl */
625626
uint32_t control2; /* Must be in same position as datctl */
626627
uint32_t jitstack; /* Must be in same position as datctl */
627-
uint8_t replacement[REPLACE_MODSIZE]; /* So must this */
628+
uint8_t replacement[1+REPLACE_MODSIZE]; /* So must this */
628629
uint32_t substitute_skip; /* Must be in same position as datctl */
629630
uint32_t substitute_stop; /* Must be in same position as datctl */
630631
uint32_t jit;
@@ -635,7 +636,7 @@ typedef struct patctl { /* Structure for pattern modifiers. */
635636
uint32_t convert_glob_escape;
636637
uint32_t convert_glob_separator;
637638
int32_t regerror_buffsize;
638-
uint8_t locale[LOCALESIZE];
639+
uint8_t locale[1+LOCALESIZE];
639640
} patctl;
640641

641642
#define MAXCPYGET 10
@@ -646,10 +647,10 @@ typedef struct datctl { /* Structure for data line modifiers. */
646647
uint32_t control; /* Must be in same position as patctl */
647648
uint32_t control2; /* Must be in same position as patctl */
648649
uint32_t jitstack; /* Must be in same position as patctl */
649-
uint8_t replacement[REPLACE_MODSIZE]; /* So must this */
650+
uint8_t replacement[1+REPLACE_MODSIZE]; /* So must this */
650651
uint32_t substitute_skip; /* Must be in same position as patctl */
651652
uint32_t substitute_stop; /* Must be in same position as patctl */
652-
uint8_t substitute_subject[SUBSTITUTE_SUBJECT_MODSIZE];
653+
uint8_t substitute_subject[1+SUBSTITUTE_SUBJECT_MODSIZE];
653654
uint32_t startend[2];
654655
uint32_t cerror[2];
655656
uint32_t cfail[2];
@@ -662,6 +663,31 @@ typedef struct datctl { /* Structure for data line modifiers. */
662663
uint8_t get_names[LENCPYGET];
663664
} datctl;
664665

666+
/* Helper functions to zero out the structures. */
667+
668+
static void patctl_zero(patctl *p)
669+
{
670+
memset(p, 0, sizeof(patctl));
671+
p->replacement[0] = MOD_STR_UNSET;
672+
p->convert_type = CONVERT_UNSET;
673+
p->convert_length = CONVERT_UNSET;
674+
p->regerror_buffsize = -1;
675+
p->locale[0] = MOD_STR_UNSET;
676+
}
677+
678+
static void datctl_zero(datctl *d)
679+
{
680+
memset(d, 0, sizeof(datctl));
681+
d->replacement[0] = MOD_STR_UNSET;
682+
d->substitute_subject[0] = MOD_STR_UNSET;
683+
d->oveccount = DEFAULT_OVECCOUNT;
684+
d->copy_numbers[0] = -1;
685+
d->get_numbers[0] = -1;
686+
d->startend[0] = d->startend[1] = CFORE_UNSET;
687+
d->cerror[0] = d->cerror[1] = CFORE_UNSET;
688+
d->cfail[0] = d->cfail[1] = CFORE_UNSET;
689+
}
690+
665691
/* Ids for which context to modify. */
666692

667693
enum { CTX_PAT, /* Active pattern context */
@@ -675,9 +701,18 @@ enum { CTX_PAT, /* Active pattern context */
675701
#define CO(name) offsetof(PCRE2_REAL_COMPILE_CONTEXT, name)
676702
#define MO(name) offsetof(PCRE2_REAL_MATCH_CONTEXT, name)
677703
#define PO(name) offsetof(patctl, name)
678-
#define PD(name) PO(name)
679704
#define DO(name) offsetof(datctl, name)
680705

706+
/* Validate that the offsets for the shared fields do indeed match. */
707+
708+
STATIC_ASSERT(PO(options) == DO(options), options_mismatch);
709+
STATIC_ASSERT(PO(control) == DO(control), control_mismatch);
710+
STATIC_ASSERT(PO(control2) == DO(control2), control2_mismatch);
711+
STATIC_ASSERT(PO(jitstack) == DO(jitstack), jitstack_mismatch);
712+
STATIC_ASSERT(PO(replacement) == DO(replacement), replacement_mismatch);
713+
STATIC_ASSERT(PO(substitute_skip) == DO(substitute_skip), substitute_skip_mismatch);
714+
STATIC_ASSERT(PO(substitute_stop) == DO(substitute_stop), substitute_stop_mismatch);
715+
681716
/* Table of all long-form modifiers. Must be in collating sequence of modifier
682717
name because it is searched by binary chop. */
683718

@@ -706,7 +741,7 @@ static modstruct modlist[] = {
706741
{ "alt_extended_class", MOD_PAT, MOD_OPT, PCRE2_ALT_EXTENDED_CLASS, PO(options) },
707742
{ "alt_verbnames", MOD_PAT, MOD_OPT, PCRE2_ALT_VERBNAMES, PO(options) },
708743
{ "altglobal", MOD_PND, MOD_CTL, CTL_ALTGLOBAL, PO(control) },
709-
{ "anchored", MOD_PD, MOD_OPT, PCRE2_ANCHORED, PD(options) },
744+
{ "anchored", MOD_PD, MOD_OPT, PCRE2_ANCHORED, PO(options) },
710745
{ "ascii_all", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_ALL, CO(extra_options) },
711746
{ "ascii_bsd", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_BSD, CO(extra_options) },
712747
{ "ascii_bss", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_BSS, CO(extra_options) },
@@ -746,7 +781,7 @@ static modstruct modlist[] = {
746781
{ "dotstar_anchor", MOD_CTC, MOD_OPTMZ, PCRE2_DOTSTAR_ANCHOR, 0 },
747782
{ "dotstar_anchor_off", MOD_CTC, MOD_OPTMZ, PCRE2_DOTSTAR_ANCHOR_OFF, 0 },
748783
{ "dupnames", MOD_PATP, MOD_OPT, PCRE2_DUPNAMES, PO(options) },
749-
{ "endanchored", MOD_PD, MOD_OPT, PCRE2_ENDANCHORED, PD(options) },
784+
{ "endanchored", MOD_PD, MOD_OPT, PCRE2_ENDANCHORED, PO(options) },
750785
{ "escaped_cr_is_lf", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ESCAPED_CR_IS_LF, CO(extra_options) },
751786
{ "expand", MOD_PAT, MOD_CTL, CTL_EXPAND, PO(control) },
752787
{ "extended", MOD_PATP, MOD_OPT, PCRE2_EXTENDED, PO(options) },
@@ -779,7 +814,7 @@ static modstruct modlist[] = {
779814
{ "max_pattern_compiled_length", MOD_CTC, MOD_SIZ, 0, CO(max_pattern_compiled_length) },
780815
{ "max_pattern_length", MOD_CTC, MOD_SIZ, 0, CO(max_pattern_length) },
781816
{ "max_varlookbehind", MOD_CTC, MOD_INT, 0, CO(max_varlookbehind) },
782-
{ "memory", MOD_PD, MOD_CTL, CTL_MEMORY, PD(control) },
817+
{ "memory", MOD_PD, MOD_CTL, CTL_MEMORY, PO(control) },
783818
{ "multiline", MOD_PATP, MOD_OPT, PCRE2_MULTILINE, PO(options) },
784819
{ "never_backslash_c", MOD_PAT, MOD_OPT, PCRE2_NEVER_BACKSLASH_C, PO(options) },
785820
{ "never_callout", MOD_CTC, MOD_OPT, PCRE2_EXTRA_NEVER_CALLOUT, CO(extra_options) },
@@ -792,7 +827,7 @@ static modstruct modlist[] = {
792827
{ "no_dotstar_anchor", MOD_PAT, MOD_OPT, PCRE2_NO_DOTSTAR_ANCHOR, PO(options) },
793828
{ "no_jit", MOD_DATP, MOD_OPT, PCRE2_NO_JIT, DO(options) },
794829
{ "no_start_optimize", MOD_PATP, MOD_OPT, PCRE2_NO_START_OPTIMIZE, PO(options) },
795-
{ "no_utf_check", MOD_PD, MOD_OPT, PCRE2_NO_UTF_CHECK, PD(options) },
830+
{ "no_utf_check", MOD_PD, MOD_OPT, PCRE2_NO_UTF_CHECK, PO(options) },
796831
{ "notbol", MOD_DAT, MOD_OPT, PCRE2_NOTBOL, DO(options) },
797832
{ "notempty", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY, DO(options) },
798833
{ "notempty_atstart", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY_ATSTART, DO(options) },
@@ -3529,24 +3564,6 @@ char *arg_subject = NULL;
35293564
char *arg_pattern = NULL;
35303565
char *arg_error = NULL;
35313566

3532-
/* The offsets to the options and control bits fields of the pattern and data
3533-
control blocks must be the same so that common options and controls such as
3534-
"anchored" or "memory" can work for either of them from a single table entry.
3535-
We cannot test this till runtime because "offsetof" does not work in the
3536-
preprocessor. */
3537-
3538-
// TODO This comment above is not correct: we can test it at compile time,
3539-
// although it is true that it's not possible using the preprocessor. Use our
3540-
// new STATIC_ASSERT macro.
3541-
3542-
if (PO(options) != DO(options) || PO(control) != DO(control) ||
3543-
PO(control2) != DO(control2))
3544-
{
3545-
fprintf(stderr, "** Coding error: "
3546-
"options and control offsets for pattern and data must be the same.\n");
3547-
return 1;
3548-
}
3549-
35503567
/* Get buffers from malloc() so that valgrind will check their misuse when
35513568
debugging. They grow automatically when very long lines are read. The 16-
35523569
and 32-bit buffers (pbuffer16, pbuffer32) are obtained only if needed. */
@@ -3567,17 +3584,8 @@ _setmode( _fileno( stdout ), _O_BINARY );
35673584

35683585
locale_name[0] = 0;
35693586

3570-
memset(&def_patctl, 0, sizeof(patctl));
3571-
def_patctl.convert_type = CONVERT_UNSET;
3572-
def_patctl.regerror_buffsize = -1;
3573-
3574-
memset(&def_datctl, 0, sizeof(datctl));
3575-
def_datctl.oveccount = DEFAULT_OVECCOUNT;
3576-
def_datctl.copy_numbers[0] = -1;
3577-
def_datctl.get_numbers[0] = -1;
3578-
def_datctl.startend[0] = def_datctl.startend[1] = CFORE_UNSET;
3579-
def_datctl.cerror[0] = def_datctl.cerror[1] = CFORE_UNSET;
3580-
def_datctl.cfail[0] = def_datctl.cfail[1] = CFORE_UNSET;
3587+
patctl_zero(&def_patctl);
3588+
datctl_zero(&def_datctl);
35813589

35823590
/* Scan command line options. */
35833591

0 commit comments

Comments
 (0)