@@ -14,6 +14,7 @@ Written by Philip Hazel, October 2016
14
14
#include <stdlib.h>
15
15
#include <string.h>
16
16
17
+ #include "config.h"
17
18
#define PCRE2_CODE_UNIT_WIDTH 8
18
19
#include "pcre2.h"
19
20
@@ -36,6 +37,148 @@ Written by Philip Hazel, October 2016
36
37
PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_HARD| \
37
38
PCRE2_PARTIAL_SOFT)
38
39
40
+ static void print_compile_options (FILE * stream , uint32_t compile_options )
41
+ {
42
+ fprintf (stream , "Compile options %.8x never_backslash_c" , compile_options );
43
+ fprintf (stream , "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n" ,
44
+ ((compile_options & PCRE2_ALT_BSUX ) != 0 )? ",alt_bsux" : "" ,
45
+ ((compile_options & PCRE2_ALT_CIRCUMFLEX ) != 0 )? ",alt_circumflex" : "" ,
46
+ ((compile_options & PCRE2_ALT_VERBNAMES ) != 0 )? ",alt_verbnames" : "" ,
47
+ ((compile_options & PCRE2_ALLOW_EMPTY_CLASS ) != 0 )? ",allow_empty_class" : "" ,
48
+ ((compile_options & PCRE2_ANCHORED ) != 0 )? ",anchored" : "" ,
49
+ ((compile_options & PCRE2_AUTO_CALLOUT ) != 0 )? ",auto_callout" : "" ,
50
+ ((compile_options & PCRE2_CASELESS ) != 0 )? ",caseless" : "" ,
51
+ ((compile_options & PCRE2_DOLLAR_ENDONLY ) != 0 )? ",dollar_endonly" : "" ,
52
+ ((compile_options & PCRE2_DOTALL ) != 0 )? ",dotall" : "" ,
53
+ ((compile_options & PCRE2_DUPNAMES ) != 0 )? ",dupnames" : "" ,
54
+ ((compile_options & PCRE2_ENDANCHORED ) != 0 )? ",endanchored" : "" ,
55
+ ((compile_options & PCRE2_EXTENDED ) != 0 )? ",extended" : "" ,
56
+ ((compile_options & PCRE2_FIRSTLINE ) != 0 )? ",firstline" : "" ,
57
+ ((compile_options & PCRE2_MATCH_UNSET_BACKREF ) != 0 )? ",match_unset_backref" : "" ,
58
+ ((compile_options & PCRE2_MULTILINE ) != 0 )? ",multiline" : "" ,
59
+ ((compile_options & PCRE2_NEVER_UCP ) != 0 )? ",never_ucp" : "" ,
60
+ ((compile_options & PCRE2_NEVER_UTF ) != 0 )? ",never_utf" : "" ,
61
+ ((compile_options & PCRE2_NO_AUTO_CAPTURE ) != 0 )? ",no_auto_capture" : "" ,
62
+ ((compile_options & PCRE2_NO_AUTO_POSSESS ) != 0 )? ",no_auto_possess" : "" ,
63
+ ((compile_options & PCRE2_NO_DOTSTAR_ANCHOR ) != 0 )? ",no_dotstar_anchor" : "" ,
64
+ ((compile_options & PCRE2_NO_UTF_CHECK ) != 0 )? ",no_utf_check" : "" ,
65
+ ((compile_options & PCRE2_NO_START_OPTIMIZE ) != 0 )? ",no_start_optimize" : "" ,
66
+ ((compile_options & PCRE2_UCP ) != 0 )? ",ucp" : "" ,
67
+ ((compile_options & PCRE2_UNGREEDY ) != 0 )? ",ungreedy" : "" ,
68
+ ((compile_options & PCRE2_USE_OFFSET_LIMIT ) != 0 )? ",use_offset_limit" : "" ,
69
+ ((compile_options & PCRE2_UTF ) != 0 )? ",utf" : "" );
70
+ }
71
+
72
+ static void print_match_options (FILE * stream , uint32_t match_options )
73
+ {
74
+ fprintf (stream , "Match options %.8x" , match_options );
75
+ fprintf (stream , "%s%s%s%s%s%s%s%s%s\n" ,
76
+ ((match_options & PCRE2_ANCHORED ) != 0 )? ",anchored" : "" ,
77
+ ((match_options & PCRE2_ENDANCHORED ) != 0 )? ",endanchored" : "" ,
78
+ ((match_options & PCRE2_NO_UTF_CHECK ) != 0 )? ",no_utf_check" : "" ,
79
+ ((match_options & PCRE2_NOTBOL ) != 0 )? ",notbol" : "" ,
80
+ ((match_options & PCRE2_NOTEMPTY ) != 0 )? ",notempty" : "" ,
81
+ ((match_options & PCRE2_NOTEMPTY_ATSTART ) != 0 )? ",notempty_atstart" : "" ,
82
+ ((match_options & PCRE2_NOTEOL ) != 0 )? ",noteol" : "" ,
83
+ ((match_options & PCRE2_PARTIAL_HARD ) != 0 )? ",partial_hard" : "" ,
84
+ ((match_options & PCRE2_PARTIAL_SOFT ) != 0 )? ",partial_soft" : "" );
85
+ }
86
+
87
+ static void dump_matches (FILE * stream , pcre2_match_data * match_data , pcre2_match_context * match_context )
88
+ {
89
+ PCRE2_UCHAR error_buf [256 ];
90
+ int errorcode ;
91
+ uint32_t ovector_count = pcre2_get_ovector_count (match_data );
92
+
93
+ for (uint32_t ovector = ovector_count ; ovector < ovector_count ; ovector ++ )
94
+ {
95
+ PCRE2_UCHAR * bufferptr = NULL ;
96
+ PCRE2_SIZE bufflen = 0 ;
97
+
98
+ errorcode = pcre2_substring_get_bynumber (match_data , ovector , & bufferptr , & bufflen );
99
+
100
+ if (errorcode >= 0 )
101
+ {
102
+ fprintf (stream , "Match %d (hex encoded): " , ovector );
103
+ for (PCRE2_SIZE i = 0 ; i < bufflen ; i ++ )
104
+ {
105
+ fprintf (stderr , "%02x" , bufferptr [i ]);
106
+ }
107
+ fprintf (stderr , "\n" );
108
+ }
109
+ else
110
+ {
111
+ pcre2_get_error_message (errorcode , error_buf , 256 );
112
+ fprintf (stream , "Match %d failed: %s\n" , ovector , error_buf );
113
+ }
114
+ }
115
+ }
116
+
117
+ /* This function describes the current test case being evaluated, then aborts */
118
+
119
+ #ifdef SUPPORT_JIT
120
+ static void describe_failure (
121
+ const char * task ,
122
+ const unsigned char * data ,
123
+ size_t size ,
124
+ uint32_t compile_options ,
125
+ uint32_t match_options ,
126
+ int errorcode ,
127
+ pcre2_match_data * match_data ,
128
+ int errorcode_jit ,
129
+ pcre2_match_data * match_data_jit ,
130
+ pcre2_match_context * match_context
131
+ ) {
132
+ PCRE2_UCHAR buffer [256 ];
133
+
134
+ fprintf (stderr , "Encountered failure while performing %s; context:\n" , task );
135
+
136
+ fprintf (stderr , "Pattern/sample string (hex encoded): " );
137
+ for (size_t i = 0 ; i < size ; i ++ )
138
+ {
139
+ fprintf (stderr , "%02x" , data [i ]);
140
+ }
141
+ fprintf (stderr , "\n" );
142
+
143
+ print_compile_options (stderr , compile_options );
144
+ print_match_options (stderr , match_options );
145
+
146
+ if (errorcode < 0 )
147
+ {
148
+ pcre2_get_error_message (errorcode , buffer , 256 );
149
+ fprintf (stderr , "Non-JIT'd operation emitted an error: %s\n" , buffer );
150
+ }
151
+ else
152
+ {
153
+ fprintf (stderr , "Non-JIT'd operation did not emit an error.\n" );
154
+ if (match_data != NULL )
155
+ {
156
+ fprintf (stderr , "%d matches discovered by non-JIT'd regex:\n" , pcre2_get_ovector_count (match_data ));
157
+ dump_matches (stderr , match_data , match_context );
158
+ fprintf (stderr , "\n" );
159
+ }
160
+ }
161
+
162
+ if (errorcode_jit < 0 )
163
+ {
164
+ pcre2_get_error_message (errorcode_jit , buffer , 256 );
165
+ fprintf (stderr , "JIT'd operation emitted an error: %s\n" , buffer );
166
+ }
167
+ else
168
+ {
169
+ fprintf (stderr , "JIT'd operation did not emit an error.\n" );
170
+ if (match_data_jit != NULL )
171
+ {
172
+ fprintf (stderr , "%d matches discovered by JIT'd regex:\n" , pcre2_get_ovector_count (match_data_jit ));
173
+ dump_matches (stderr , match_data_jit , match_context );
174
+ fprintf (stderr , "\n" );
175
+ }
176
+ }
177
+
178
+ abort ();
179
+ }
180
+ #endif
181
+
39
182
/* This is the callout function. Its only purpose is to halt matching if there
40
183
are more than 100 callouts, as one way of stopping too much time being spent on
41
184
fruitless matches. The callout data is a pointer to the counter. */
@@ -110,34 +253,7 @@ for (i = 0; i < 2; i++)
110
253
pcre2_code * code ;
111
254
112
255
#ifdef STANDALONE
113
- printf ("Compile options %.8x never_backslash_c" , compile_options );
114
- printf ("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n" ,
115
- ((compile_options & PCRE2_ALT_BSUX ) != 0 )? ",alt_bsux" : "" ,
116
- ((compile_options & PCRE2_ALT_CIRCUMFLEX ) != 0 )? ",alt_circumflex" : "" ,
117
- ((compile_options & PCRE2_ALT_VERBNAMES ) != 0 )? ",alt_verbnames" : "" ,
118
- ((compile_options & PCRE2_ALLOW_EMPTY_CLASS ) != 0 )? ",allow_empty_class" : "" ,
119
- ((compile_options & PCRE2_ANCHORED ) != 0 )? ",anchored" : "" ,
120
- ((compile_options & PCRE2_AUTO_CALLOUT ) != 0 )? ",auto_callout" : "" ,
121
- ((compile_options & PCRE2_CASELESS ) != 0 )? ",caseless" : "" ,
122
- ((compile_options & PCRE2_DOLLAR_ENDONLY ) != 0 )? ",dollar_endonly" : "" ,
123
- ((compile_options & PCRE2_DOTALL ) != 0 )? ",dotall" : "" ,
124
- ((compile_options & PCRE2_DUPNAMES ) != 0 )? ",dupnames" : "" ,
125
- ((compile_options & PCRE2_ENDANCHORED ) != 0 )? ",endanchored" : "" ,
126
- ((compile_options & PCRE2_EXTENDED ) != 0 )? ",extended" : "" ,
127
- ((compile_options & PCRE2_FIRSTLINE ) != 0 )? ",firstline" : "" ,
128
- ((compile_options & PCRE2_MATCH_UNSET_BACKREF ) != 0 )? ",match_unset_backref" : "" ,
129
- ((compile_options & PCRE2_MULTILINE ) != 0 )? ",multiline" : "" ,
130
- ((compile_options & PCRE2_NEVER_UCP ) != 0 )? ",never_ucp" : "" ,
131
- ((compile_options & PCRE2_NEVER_UTF ) != 0 )? ",never_utf" : "" ,
132
- ((compile_options & PCRE2_NO_AUTO_CAPTURE ) != 0 )? ",no_auto_capture" : "" ,
133
- ((compile_options & PCRE2_NO_AUTO_POSSESS ) != 0 )? ",no_auto_possess" : "" ,
134
- ((compile_options & PCRE2_NO_DOTSTAR_ANCHOR ) != 0 )? ",no_dotstar_anchor" : "" ,
135
- ((compile_options & PCRE2_NO_UTF_CHECK ) != 0 )? ",no_utf_check" : "" ,
136
- ((compile_options & PCRE2_NO_START_OPTIMIZE ) != 0 )? ",no_start_optimize" : "" ,
137
- ((compile_options & PCRE2_UCP ) != 0 )? ",ucp" : "" ,
138
- ((compile_options & PCRE2_UNGREEDY ) != 0 )? ",ungreedy" : "" ,
139
- ((compile_options & PCRE2_USE_OFFSET_LIMIT ) != 0 )? ",use_offset_limit" : "" ,
140
- ((compile_options & PCRE2_UTF ) != 0 )? ",utf" : "" );
256
+ print_compile_options (stdout , compile_options );
141
257
#endif
142
258
143
259
code = pcre2_compile ((PCRE2_SPTR )data , (PCRE2_SIZE )size , compile_options ,
@@ -169,7 +285,7 @@ for (i = 0; i < 2; i++)
169
285
#endif
170
286
{
171
287
#ifdef STANDALONE
172
- printf ( "** Failed to create match data block\n" );
288
+ fprintf ( stderr , "** Failed to create match data block\n" );
173
289
#endif
174
290
abort ();
175
291
}
@@ -181,7 +297,7 @@ for (i = 0; i < 2; i++)
181
297
if (match_context == NULL )
182
298
{
183
299
#ifdef STANDALONE
184
- printf ( "** Failed to create match context block\n" );
300
+ fprintf ( stderr , "** Failed to create match context block\n" );
185
301
#endif
186
302
abort ();
187
303
}
@@ -195,18 +311,7 @@ for (i = 0; i < 2; i++)
195
311
for (j = 0 ; j < 2 ; j ++ )
196
312
{
197
313
#ifdef STANDALONE
198
- printf ("Match options %.8x" , match_options );
199
- printf ("%s%s%s%s%s%s%s%s%s%s\n" ,
200
- ((match_options & PCRE2_ANCHORED ) != 0 )? ",anchored" : "" ,
201
- ((match_options & PCRE2_ENDANCHORED ) != 0 )? ",endanchored" : "" ,
202
- ((match_options & PCRE2_NO_JIT ) != 0 )? ",no_jit" : "" ,
203
- ((match_options & PCRE2_NO_UTF_CHECK ) != 0 )? ",no_utf_check" : "" ,
204
- ((match_options & PCRE2_NOTBOL ) != 0 )? ",notbol" : "" ,
205
- ((match_options & PCRE2_NOTEMPTY ) != 0 )? ",notempty" : "" ,
206
- ((match_options & PCRE2_NOTEMPTY_ATSTART ) != 0 )? ",notempty_atstart" : "" ,
207
- ((match_options & PCRE2_NOTEOL ) != 0 )? ",noteol" : "" ,
208
- ((match_options & PCRE2_PARTIAL_HARD ) != 0 )? ",partial_hard" : "" ,
209
- ((match_options & PCRE2_PARTIAL_SOFT ) != 0 )? ",partial_soft" : "" );
314
+ print_match_options (stdout , match_options );
210
315
#endif
211
316
212
317
callout_count = 0 ;
@@ -231,16 +336,14 @@ for (i = 0; i < 2; i++)
231
336
232
337
if (errorcode_jit != errorcode )
233
338
{
234
- printf ("JIT errorcode %d did not match original errorcode %d\n" , errorcode_jit , errorcode );
235
- abort ();
339
+ describe_failure ("match errorcode comparison" , data , size , compile_options , match_options , errorcode , match_data , errorcode_jit , match_data_jit , match_context );
236
340
}
237
341
238
342
ovector_count = pcre2_get_ovector_count (match_data );
239
343
240
344
if (ovector_count != pcre2_get_ovector_count (match_data_jit ))
241
345
{
242
- puts ("JIT ovector count did not match original" );
243
- abort ();
346
+ describe_failure ("ovector count comparison" , data , size , compile_options , match_options , errorcode , match_data , errorcode_jit , match_data_jit , match_context );
244
347
}
245
348
246
349
for (uint32_t ovector = 0 ; ovector < ovector_count ; ovector ++ )
@@ -256,22 +359,19 @@ for (i = 0; i < 2; i++)
256
359
257
360
if (errorcode != errorcode_jit )
258
361
{
259
- printf ("when extracting substring, JIT errorcode %d did not match original %d\n" , errorcode_jit , errorcode );
260
- abort ();
362
+ describe_failure ("ovector entry errorcode comparison" , data , size , compile_options , match_options , errorcode , match_data , errorcode_jit , match_data_jit , match_context );
261
363
}
262
364
263
365
if (errorcode >= 0 )
264
366
{
265
367
if (bufflen != bufflen_jit )
266
368
{
267
- printf ("when extracting substring, JIT buffer length %zu did not match original %zu\n" , bufflen_jit , bufflen );
268
- abort ();
369
+ describe_failure ("ovector entry length comparison" , data , size , compile_options , match_options , errorcode , match_data , errorcode_jit , match_data_jit , match_context );
269
370
}
270
371
271
372
if (memcmp (bufferptr , bufferptr_jit , bufflen ) != 0 )
272
373
{
273
- puts ("when extracting substring, JIT buffer contents did not match original" );
274
- abort ();
374
+ describe_failure ("ovector entry content comparison" , data , size , compile_options , match_options , errorcode , match_data , errorcode_jit , match_data_jit , match_context );
275
375
}
276
376
}
277
377
0 commit comments