@@ -214,7 +214,8 @@ claim to be C99 don't support it (hence DISABLE_PERCENT_ZT). */
214214#endif
215215
216216#define CFORE_UNSET UINT32_MAX /* Unset value for startend/cfail/cerror fields */
217- #define CONVERT_UNSET UINT32_MAX /* Unset value for convert_type field */
217+ #define CONVERT_UNSET UINT32_MAX /* Unset value for convert_type/convert_length fields */
218+ #define MOD_STR_UNSET UINT8_MAX /* Sentinel length for unset string options */
218219#define DFA_WS_DIMENSION 1000 /* Size of DFA workspace */
219220#define DEFAULT_OVECCOUNT 15 /* Default ovector count */
220221#define JUNK_OFFSET 0xdeadbeef /* For initializing ovector */
@@ -517,7 +518,7 @@ enum { MOD_CTC, /* Applies to a compile context */
517518 MOD_OPT , /* Is an option bit */
518519 MOD_OPTMZ , /* Is an optimization directive */
519520 MOD_SIZ , /* Is a PCRE2_SIZE value */
520- MOD_STR }; /* Is a string */
521+ MOD_STR }; /* Is a string; Pascal-encoded with length in first byte */
521522
522523/* Control bits. Some apply to compiling, some to matching, but some can be set
523524either on a pattern or a data line, so they must all be distinct. There are now
@@ -616,15 +617,15 @@ different things in the two cases. */
616617
617618/* Structures for holding modifier information for patterns and subject strings
618619(data). Fields containing modifiers that can be set either for a pattern or a
619- subject must be at the start and in the same order in both cases so that the
620- same offset in the big table below works for both. */
620+ subject (MOD_PD[P]/MOD_PND) must be at the start and in the same order in both
621+ structures so that the same offset in the big table below works for both. */
621622
622623typedef struct patctl { /* Structure for pattern modifiers. */
623624 uint32_t options ; /* Must be in same position as datctl */
624625 uint32_t control ; /* Must be in same position as datctl */
625626 uint32_t control2 ; /* Must be in same position as datctl */
626627 uint32_t jitstack ; /* Must be in same position as datctl */
627- uint8_t replacement [REPLACE_MODSIZE ]; /* So must this */
628+ uint8_t replacement [1 + REPLACE_MODSIZE ]; /* So must this */
628629 uint32_t substitute_skip ; /* Must be in same position as datctl */
629630 uint32_t substitute_stop ; /* Must be in same position as datctl */
630631 uint32_t jit ;
@@ -635,7 +636,7 @@ typedef struct patctl { /* Structure for pattern modifiers. */
635636 uint32_t convert_glob_escape ;
636637 uint32_t convert_glob_separator ;
637638 int32_t regerror_buffsize ;
638- uint8_t locale [LOCALESIZE ];
639+ uint8_t locale [1 + LOCALESIZE ];
639640} patctl ;
640641
641642#define MAXCPYGET 10
@@ -646,10 +647,10 @@ typedef struct datctl { /* Structure for data line modifiers. */
646647 uint32_t control ; /* Must be in same position as patctl */
647648 uint32_t control2 ; /* Must be in same position as patctl */
648649 uint32_t jitstack ; /* Must be in same position as patctl */
649- uint8_t replacement [REPLACE_MODSIZE ]; /* So must this */
650+ uint8_t replacement [1 + REPLACE_MODSIZE ]; /* So must this */
650651 uint32_t substitute_skip ; /* Must be in same position as patctl */
651652 uint32_t substitute_stop ; /* Must be in same position as patctl */
652- uint8_t substitute_subject [SUBSTITUTE_SUBJECT_MODSIZE ];
653+ uint8_t substitute_subject [1 + SUBSTITUTE_SUBJECT_MODSIZE ];
653654 uint32_t startend [2 ];
654655 uint32_t cerror [2 ];
655656 uint32_t cfail [2 ];
@@ -662,6 +663,31 @@ typedef struct datctl { /* Structure for data line modifiers. */
662663 uint8_t get_names [LENCPYGET ];
663664} datctl ;
664665
666+ /* Helper functions to zero out the structures. */
667+
668+ static void patctl_zero (patctl * p )
669+ {
670+ memset (p , 0 , sizeof (patctl ));
671+ p -> replacement [0 ] = MOD_STR_UNSET ;
672+ p -> convert_type = CONVERT_UNSET ;
673+ p -> convert_length = CONVERT_UNSET ;
674+ p -> regerror_buffsize = -1 ;
675+ p -> locale [0 ] = MOD_STR_UNSET ;
676+ }
677+
678+ static void datctl_zero (datctl * d )
679+ {
680+ memset (d , 0 , sizeof (datctl ));
681+ d -> replacement [0 ] = MOD_STR_UNSET ;
682+ d -> substitute_subject [0 ] = MOD_STR_UNSET ;
683+ d -> oveccount = DEFAULT_OVECCOUNT ;
684+ d -> copy_numbers [0 ] = -1 ;
685+ d -> get_numbers [0 ] = -1 ;
686+ d -> startend [0 ] = d -> startend [1 ] = CFORE_UNSET ;
687+ d -> cerror [0 ] = d -> cerror [1 ] = CFORE_UNSET ;
688+ d -> cfail [0 ] = d -> cfail [1 ] = CFORE_UNSET ;
689+ }
690+
665691/* Ids for which context to modify. */
666692
667693enum { CTX_PAT , /* Active pattern context */
@@ -675,9 +701,18 @@ enum { CTX_PAT, /* Active pattern context */
675701#define CO (name ) offsetof(PCRE2_REAL_COMPILE_CONTEXT, name)
676702#define MO (name ) offsetof(PCRE2_REAL_MATCH_CONTEXT, name)
677703#define PO (name ) offsetof(patctl, name)
678- #define PD (name ) PO(name)
679704#define DO (name ) offsetof(datctl, name)
680705
706+ /* Validate that the offsets for the shared fields do indeed match. */
707+
708+ STATIC_ASSERT (PO (options ) == DO (options ), options_mismatch );
709+ STATIC_ASSERT (PO (control ) == DO (control ), control_mismatch );
710+ STATIC_ASSERT (PO (control2 ) == DO (control2 ), control2_mismatch );
711+ STATIC_ASSERT (PO (jitstack ) == DO (jitstack ), jitstack_mismatch );
712+ STATIC_ASSERT (PO (replacement ) == DO (replacement ), replacement_mismatch );
713+ STATIC_ASSERT (PO (substitute_skip ) == DO (substitute_skip ), substitute_skip_mismatch );
714+ STATIC_ASSERT (PO (substitute_stop ) == DO (substitute_stop ), substitute_stop_mismatch );
715+
681716/* Table of all long-form modifiers. Must be in collating sequence of modifier
682717name because it is searched by binary chop. */
683718
@@ -706,7 +741,7 @@ static modstruct modlist[] = {
706741 { "alt_extended_class" , MOD_PAT , MOD_OPT , PCRE2_ALT_EXTENDED_CLASS , PO (options ) },
707742 { "alt_verbnames" , MOD_PAT , MOD_OPT , PCRE2_ALT_VERBNAMES , PO (options ) },
708743 { "altglobal" , MOD_PND , MOD_CTL , CTL_ALTGLOBAL , PO (control ) },
709- { "anchored" , MOD_PD , MOD_OPT , PCRE2_ANCHORED , PD (options ) },
744+ { "anchored" , MOD_PD , MOD_OPT , PCRE2_ANCHORED , PO (options ) },
710745 { "ascii_all" , MOD_CTC , MOD_OPT , PCRE2_EXTRA_ASCII_ALL , CO (extra_options ) },
711746 { "ascii_bsd" , MOD_CTC , MOD_OPT , PCRE2_EXTRA_ASCII_BSD , CO (extra_options ) },
712747 { "ascii_bss" , MOD_CTC , MOD_OPT , PCRE2_EXTRA_ASCII_BSS , CO (extra_options ) },
@@ -746,7 +781,7 @@ static modstruct modlist[] = {
746781 { "dotstar_anchor" , MOD_CTC , MOD_OPTMZ , PCRE2_DOTSTAR_ANCHOR , 0 },
747782 { "dotstar_anchor_off" , MOD_CTC , MOD_OPTMZ , PCRE2_DOTSTAR_ANCHOR_OFF , 0 },
748783 { "dupnames" , MOD_PATP , MOD_OPT , PCRE2_DUPNAMES , PO (options ) },
749- { "endanchored" , MOD_PD , MOD_OPT , PCRE2_ENDANCHORED , PD (options ) },
784+ { "endanchored" , MOD_PD , MOD_OPT , PCRE2_ENDANCHORED , PO (options ) },
750785 { "escaped_cr_is_lf" , MOD_CTC , MOD_OPT , PCRE2_EXTRA_ESCAPED_CR_IS_LF , CO (extra_options ) },
751786 { "expand" , MOD_PAT , MOD_CTL , CTL_EXPAND , PO (control ) },
752787 { "extended" , MOD_PATP , MOD_OPT , PCRE2_EXTENDED , PO (options ) },
@@ -779,7 +814,7 @@ static modstruct modlist[] = {
779814 { "max_pattern_compiled_length" , MOD_CTC , MOD_SIZ , 0 , CO (max_pattern_compiled_length ) },
780815 { "max_pattern_length" , MOD_CTC , MOD_SIZ , 0 , CO (max_pattern_length ) },
781816 { "max_varlookbehind" , MOD_CTC , MOD_INT , 0 , CO (max_varlookbehind ) },
782- { "memory" , MOD_PD , MOD_CTL , CTL_MEMORY , PD (control ) },
817+ { "memory" , MOD_PD , MOD_CTL , CTL_MEMORY , PO (control ) },
783818 { "multiline" , MOD_PATP , MOD_OPT , PCRE2_MULTILINE , PO (options ) },
784819 { "never_backslash_c" , MOD_PAT , MOD_OPT , PCRE2_NEVER_BACKSLASH_C , PO (options ) },
785820 { "never_callout" , MOD_CTC , MOD_OPT , PCRE2_EXTRA_NEVER_CALLOUT , CO (extra_options ) },
@@ -792,7 +827,7 @@ static modstruct modlist[] = {
792827 { "no_dotstar_anchor" , MOD_PAT , MOD_OPT , PCRE2_NO_DOTSTAR_ANCHOR , PO (options ) },
793828 { "no_jit" , MOD_DATP , MOD_OPT , PCRE2_NO_JIT , DO (options ) },
794829 { "no_start_optimize" , MOD_PATP , MOD_OPT , PCRE2_NO_START_OPTIMIZE , PO (options ) },
795- { "no_utf_check" , MOD_PD , MOD_OPT , PCRE2_NO_UTF_CHECK , PD (options ) },
830+ { "no_utf_check" , MOD_PD , MOD_OPT , PCRE2_NO_UTF_CHECK , PO (options ) },
796831 { "notbol" , MOD_DAT , MOD_OPT , PCRE2_NOTBOL , DO (options ) },
797832 { "notempty" , MOD_DAT , MOD_OPT , PCRE2_NOTEMPTY , DO (options ) },
798833 { "notempty_atstart" , MOD_DAT , MOD_OPT , PCRE2_NOTEMPTY_ATSTART , DO (options ) },
@@ -3529,24 +3564,6 @@ char *arg_subject = NULL;
35293564char * arg_pattern = NULL ;
35303565char * arg_error = NULL ;
35313566
3532- /* The offsets to the options and control bits fields of the pattern and data
3533- control blocks must be the same so that common options and controls such as
3534- "anchored" or "memory" can work for either of them from a single table entry.
3535- We cannot test this till runtime because "offsetof" does not work in the
3536- preprocessor. */
3537-
3538- // TODO This comment above is not correct: we can test it at compile time,
3539- // although it is true that it's not possible using the preprocessor. Use our
3540- // new STATIC_ASSERT macro.
3541-
3542- if (PO (options ) != DO (options ) || PO (control ) != DO (control ) ||
3543- PO (control2 ) != DO (control2 ))
3544- {
3545- fprintf (stderr , "** Coding error: "
3546- "options and control offsets for pattern and data must be the same.\n" );
3547- return 1 ;
3548- }
3549-
35503567/* Get buffers from malloc() so that valgrind will check their misuse when
35513568debugging. They grow automatically when very long lines are read. The 16-
35523569and 32-bit buffers (pbuffer16, pbuffer32) are obtained only if needed. */
@@ -3567,17 +3584,8 @@ _setmode( _fileno( stdout ), _O_BINARY );
35673584
35683585locale_name [0 ] = 0 ;
35693586
3570- memset (& def_patctl , 0 , sizeof (patctl ));
3571- def_patctl .convert_type = CONVERT_UNSET ;
3572- def_patctl .regerror_buffsize = -1 ;
3573-
3574- memset (& def_datctl , 0 , sizeof (datctl ));
3575- def_datctl .oveccount = DEFAULT_OVECCOUNT ;
3576- def_datctl .copy_numbers [0 ] = -1 ;
3577- def_datctl .get_numbers [0 ] = -1 ;
3578- def_datctl .startend [0 ] = def_datctl .startend [1 ] = CFORE_UNSET ;
3579- def_datctl .cerror [0 ] = def_datctl .cerror [1 ] = CFORE_UNSET ;
3580- def_datctl .cfail [0 ] = def_datctl .cfail [1 ] = CFORE_UNSET ;
3587+ patctl_zero (& def_patctl );
3588+ datctl_zero (& def_datctl );
35813589
35823590/* Scan command line options. */
35833591
0 commit comments