Skip to content

Commit 77dbf7c

Browse files
committed
Use [^...] rather than '[...] (neg)' to print NCLASS
1 parent 833ab6d commit 77dbf7c

File tree

11 files changed

+138
-60
lines changed

11 files changed

+138
-60
lines changed

src/pcre2_printint.c

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -469,8 +469,8 @@ int i;
469469

470470
fprintf(f, "%s[", before);
471471

472-
/* Negative XCLASS has an inverted map whereas the original opcodes have
473-
already done the inversion. */
472+
/* Negative XCLASS and NCLASS both have a bitmap indicating which characters
473+
are accepted. For clarity we print this inverted and prefixed by "^". */
474474
invertmap = FALSE;
475475
if (*code == OP_XCLASS)
476476
{
@@ -486,12 +486,18 @@ if (*code == OP_XCLASS)
486486
else /* CLASS or NCLASS */
487487
{
488488
printmap = TRUE;
489+
if (*code == OP_NCLASS)
490+
{
491+
invertmap = TRUE;
492+
fprintf(f, "^");
493+
}
489494
ccode = code + 1;
490495
}
491496

492497
/* Print a bit map */
493498
if (printmap)
494499
{
500+
BOOL first = TRUE;
495501
uint8_t inverted_map[32];
496502
const uint8_t *map = (const uint8_t *)ccode;
497503
if (invertmap)
@@ -507,13 +513,15 @@ if (printmap)
507513
int j;
508514
for (j = i+1; j < 256; j++)
509515
if ((map[j/8] & (1u << (j&7))) == 0) break;
510-
if (i == '-' || i == ']') fprintf(f, "\\");
516+
if (i == '-' || i == '\\' || i == ']' || (first && i == '^'))
517+
fprintf(f, "\\");
511518
if (PRINTABLE(i)) fprintf(f, "%c", i);
512519
else fprintf(f, "\\x%02x", i);
520+
first = FALSE;
513521
if (--j > i)
514522
{
515523
if (j != i + 1) fprintf(f, "-");
516-
if (j == '-' || j == ']') fprintf(f, "\\");
524+
if (j == '-' || i == '\\' || i == ']') fprintf(f, "\\");
517525
if (PRINTABLE(j)) fprintf(f, "%c", j);
518526
else fprintf(f, "\\x%02x", j);
519527
}
@@ -580,7 +588,7 @@ if (*code == OP_XCLASS)
580588
}
581589

582590
/* Indicate a non-UTF class which was created by negation */
583-
fprintf(f, "]%s%s", (*code == OP_NCLASS)? " (neg)" : "", after);
591+
fprintf(f, "]%s", after);
584592
}
585593

586594

src/pcre2test.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ to hold them as 32-bit code units. */
242242
enum { PR_OK, PR_SKIP, PR_ABEND };
243243

244244
/* The macro PRINTABLE determines whether to print an output character as-is or
245-
as a hex value when showing compiled patterns. is We use it in cases when the
245+
as a hex value when showing compiled patterns. We use it in cases when the
246246
locale has not been explicitly changed, so as to get consistent output from
247247
systems that differ in their output from isprint() even in the "C" locale. */
248248

testdata/testinput2

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7228,4 +7228,18 @@ a)"xI
72287228

72297229
/[\E/
72307230

7231+
/[\^z]/B
7232+
7233+
/[ \^]/B
7234+
7235+
/[\\z]/B
7236+
7237+
/[0-z]/B
7238+
7239+
/[0\-z]/B
7240+
7241+
/[]z]/B
7242+
7243+
/[ \]]/B
7244+
72317245
# End of testinput2

testdata/testoutput10

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -461,7 +461,7 @@ Subject length lower bound = 1
461461
/[^ab\xC0-\xF0]/IB,utf
462462
------------------------------------------------------------------
463463
Bra
464-
[\x00-`c-\xbf\xf1-\xff] (neg)
464+
[^ab\xc0-\xf0]
465465
Ket
466466
End
467467
------------------------------------------------------------------
@@ -1559,7 +1559,7 @@ Subject length lower bound = 1
15591559
------------------------------------------------------------------
15601560
Bra
15611561
^
1562-
[\x00-`c-\xff] (neg)
1562+
[^ab]
15631563
Ket
15641564
End
15651565
------------------------------------------------------------------

testdata/testoutput11-16

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -474,25 +474,25 @@ MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789AB
474474
/[^\x00-a]{12,}[^b-\xff]*/B
475475
------------------------------------------------------------------
476476
Bra
477-
[b-\xff] (neg){12,}
478-
[\x00-a] (neg)*+
477+
[^\x00-a]{12,}
478+
[^b-\xff]*+
479479
Ket
480480
End
481481
------------------------------------------------------------------
482482

483483
/[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B
484484
------------------------------------------------------------------
485485
Bra
486-
[\x00-\x08\x0e-\x1f!-\xff] (neg)*
486+
[^\x09-\x0d ]*
487487
\s*
488488

489489
[0-9A-Z_a-z]++
490490
\W+
491491

492-
[\x00-/:-\xff] (neg)*?
492+
[^0-9]*?
493493
\d
494494
0
495-
[\x00-/:-@[-^`{-\xff] (neg){4,6}?
495+
[^0-9A-Z_a-z]{4,6}?
496496
\w*
497497
A
498498
Ket

testdata/testoutput11-32

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -474,25 +474,25 @@ MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789AB
474474
/[^\x00-a]{12,}[^b-\xff]*/B
475475
------------------------------------------------------------------
476476
Bra
477-
[b-\xff] (neg){12,}
478-
[\x00-a] (neg)*+
477+
[^\x00-a]{12,}
478+
[^b-\xff]*+
479479
Ket
480480
End
481481
------------------------------------------------------------------
482482

483483
/[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B
484484
------------------------------------------------------------------
485485
Bra
486-
[\x00-\x08\x0e-\x1f!-\xff] (neg)*
486+
[^\x09-\x0d ]*
487487
\s*
488488

489489
[0-9A-Z_a-z]++
490490
\W+
491491

492-
[\x00-/:-\xff] (neg)*?
492+
[^0-9]*?
493493
\d
494494
0
495-
[\x00-/:-@[-^`{-\xff] (neg){4,6}?
495+
[^0-9A-Z_a-z]{4,6}?
496496
\w*
497497
A
498498
Ket

testdata/testoutput12-16

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ Subject length lower bound = 1
241241
/[^ab\xC0-\xF0]/IB,utf
242242
------------------------------------------------------------------
243243
Bra
244-
[\x00-`c-\xbf\xf1-\xff] (neg)
244+
[^ab\xc0-\xf0]
245245
Ket
246246
End
247247
------------------------------------------------------------------
@@ -1377,7 +1377,7 @@ Subject length lower bound = 2
13771377
/[\W\pL]/B
13781378
------------------------------------------------------------------
13791379
Bra
1380-
[\x00-/:-^`-\xff] (neg)
1380+
[^0-9_]
13811381
Ket
13821382
End
13831383
------------------------------------------------------------------
@@ -1394,7 +1394,7 @@ No match
13941394
/[\s[:^ascii:]]/B,ucp
13951395
------------------------------------------------------------------
13961396
Bra
1397-
[\x09-\x0d \x80-\xff] (neg)
1397+
[^\x00-\x08\x0e-\x1f!-\x7f]
13981398
Ket
13991399
End
14001400
------------------------------------------------------------------
@@ -1423,7 +1423,7 @@ Failed: error 191 at offset 0: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowe
14231423
------------------------------------------------------------------
14241424
Bra
14251425
^
1426-
[\x00-`c-\xff] (neg)
1426+
[^ab]
14271427
Ket
14281428
End
14291429
------------------------------------------------------------------

testdata/testoutput12-32

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ Subject length lower bound = 1
236236
/[^ab\xC0-\xF0]/IB,utf
237237
------------------------------------------------------------------
238238
Bra
239-
[\x00-`c-\xbf\xf1-\xff] (neg)
239+
[^ab\xc0-\xf0]
240240
Ket
241241
End
242242
------------------------------------------------------------------
@@ -1371,7 +1371,7 @@ Failed: error -28: UTF-32 error: code points greater than 0x10ffff are not defin
13711371
/[\W\pL]/B
13721372
------------------------------------------------------------------
13731373
Bra
1374-
[\x00-/:-^`-\xff] (neg)
1374+
[^0-9_]
13751375
Ket
13761376
End
13771377
------------------------------------------------------------------
@@ -1388,7 +1388,7 @@ No match
13881388
/[\s[:^ascii:]]/B,ucp
13891389
------------------------------------------------------------------
13901390
Bra
1391-
[\x09-\x0d \x80-\xff] (neg)
1391+
[^\x00-\x08\x0e-\x1f!-\x7f]
13921392
Ket
13931393
End
13941394
------------------------------------------------------------------
@@ -1420,7 +1420,7 @@ Subject length lower bound = 1
14201420
------------------------------------------------------------------
14211421
Bra
14221422
^
1423-
[\x00-`c-\xff] (neg)
1423+
[^ab]
14241424
Ket
14251425
End
14261426
------------------------------------------------------------------

0 commit comments

Comments
 (0)