Skip to content

Commit a3fa1f1

Browse files
committed
Use [^...] rather than '[...] (neg)' to print NCLASS
1 parent 833ab6d commit a3fa1f1

16 files changed

+272
-202
lines changed

src/pcre2_printint.c

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -463,38 +463,35 @@ static void
463463
print_class(FILE *f, PCRE2_SPTR code, const uint8_t *char_lists_end, BOOL utf,
464464
const char *before, const char *after)
465465
{
466-
BOOL printmap, invertmap;
466+
BOOL printmap, negated;
467467
PCRE2_SPTR ccode;
468468
int i;
469469

470-
fprintf(f, "%s[", before);
471-
472-
/* Negative XCLASS has an inverted map whereas the original opcodes have
473-
already done the inversion. */
474-
invertmap = FALSE;
470+
/* Negative XCLASS and NCLASS both have a bitmap indicating which characters
471+
are accepted. For clarity we print this inverted and prefixed by "^". */
475472
if (*code == OP_XCLASS)
476473
{
477474
ccode = code + LINK_SIZE + 1;
478475
printmap = (*ccode & XCL_MAP) != 0;
479-
if ((*ccode & XCL_NOT) != 0)
480-
{
481-
invertmap = TRUE;
482-
fprintf(f, "^");
483-
}
476+
negated = (*ccode & XCL_NOT) != 0;
484477
ccode++;
485478
}
486479
else /* CLASS or NCLASS */
487480
{
488481
printmap = TRUE;
482+
negated = *code == OP_NCLASS;
489483
ccode = code + 1;
490484
}
491485

486+
fprintf(f, "%s[%s", before, negated? "^" : "");
487+
492488
/* Print a bit map */
493489
if (printmap)
494490
{
491+
BOOL first = TRUE;
495492
uint8_t inverted_map[32];
496493
const uint8_t *map = (const uint8_t *)ccode;
497-
if (invertmap)
494+
if (negated)
498495
{
499496
/* Using 255 ^ instead of ~ avoids clang sanitize warning. */
500497
for (i = 0; i < 32; i++) inverted_map[i] = 255 ^ map[i];
@@ -507,13 +504,15 @@ if (printmap)
507504
int j;
508505
for (j = i+1; j < 256; j++)
509506
if ((map[j/8] & (1u << (j&7))) == 0) break;
510-
if (i == '-' || i == ']') fprintf(f, "\\");
507+
if (i == '-' || i == '\\' || i == ']' || (first && i == '^'))
508+
fprintf(f, "\\");
511509
if (PRINTABLE(i)) fprintf(f, "%c", i);
512510
else fprintf(f, "\\x%02x", i);
511+
first = FALSE;
513512
if (--j > i)
514513
{
515514
if (j != i + 1) fprintf(f, "-");
516-
if (j == '-' || j == ']') fprintf(f, "\\");
515+
if (j == '-' || j == '\\' || j == ']') fprintf(f, "\\");
517516
if (PRINTABLE(j)) fprintf(f, "%c", j);
518517
else fprintf(f, "\\x%02x", j);
519518
}
@@ -580,7 +579,7 @@ if (*code == OP_XCLASS)
580579
}
581580

582581
/* Indicate a non-UTF class which was created by negation */
583-
fprintf(f, "]%s%s", (*code == OP_NCLASS)? " (neg)" : "", after);
582+
fprintf(f, "]%s", after);
584583
}
585584

586585

@@ -842,7 +841,7 @@ for(;;)
842841
case OP_NOT:
843842
fprintf(f, " %s [^", flag);
844843
extra = print_char(f, code + 1, utf);
845-
fprintf(f, "]");
844+
fprintf(f, "] (not)");
846845
break;
847846

848847
case OP_NOTSTARI:
@@ -868,7 +867,7 @@ for(;;)
868867
case OP_NOTPOSQUERY:
869868
fprintf(f, " %s [^", flag);
870869
extra = print_char(f, code + 1, utf);
871-
fprintf(f, "]%s", OP_names[*code]);
870+
fprintf(f, "]%s (not)", OP_names[*code]);
872871
break;
873872

874873
case OP_NOTEXACTI:
@@ -890,6 +889,7 @@ for(;;)
890889
if (*code == OP_NOTMINUPTO || *code == OP_NOTMINUPTOI) fprintf(f, "?");
891890
else
892891
if (*code == OP_NOTPOSUPTO || *code == OP_NOTPOSUPTOI) fprintf(f, "+");
892+
fprintf(f, " (not)");
893893
break;
894894

895895
case OP_RECURSE:

src/pcre2test.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ to hold them as 32-bit code units. */
242242
enum { PR_OK, PR_SKIP, PR_ABEND };
243243

244244
/* The macro PRINTABLE determines whether to print an output character as-is or
245-
as a hex value when showing compiled patterns. is We use it in cases when the
245+
as a hex value when showing compiled patterns. We use it in cases when the
246246
locale has not been explicitly changed, so as to get consistent output from
247247
systems that differ in their output from isprint() even in the "C" locale. */
248248

testdata/testinput2

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7228,4 +7228,18 @@ a)"xI
72287228

72297229
/[\E/
72307230

7231+
/[\^z]/B
7232+
7233+
/[ \^]/B
7234+
7235+
/[\\z]/B
7236+
7237+
/[0-z]/B
7238+
7239+
/[0\-z]/B
7240+
7241+
/[]z]/B
7242+
7243+
/[ \]]/B
7244+
72317245
# End of testinput2

testdata/testoutput10

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -461,7 +461,7 @@ Subject length lower bound = 1
461461
/[^ab\xC0-\xF0]/IB,utf
462462
------------------------------------------------------------------
463463
Bra
464-
[\x00-`c-\xbf\xf1-\xff] (neg)
464+
[^ab\xc0-\xf0]
465465
Ket
466466
End
467467
------------------------------------------------------------------
@@ -604,7 +604,7 @@ Subject length lower bound = 3
604604
/[^\x{c4}]/IB
605605
------------------------------------------------------------------
606606
Bra
607-
[^\x{c4}]
607+
[^\x{c4}] (not)
608608
Ket
609609
End
610610
------------------------------------------------------------------
@@ -648,7 +648,7 @@ Subject length lower bound = 1
648648
/[^\xff]/IB,utf
649649
------------------------------------------------------------------
650650
Bra
651-
[^\x{ff}]
651+
[^\x{ff}] (not)
652652
Ket
653653
End
654654
------------------------------------------------------------------
@@ -898,7 +898,7 @@ Subject length lower bound = 2
898898
/[^\x{c4}]/IB,utf
899899
------------------------------------------------------------------
900900
Bra
901-
[^\x{c4}]
901+
[^\x{c4}] (not)
902902
Ket
903903
End
904904
------------------------------------------------------------------
@@ -1159,7 +1159,7 @@ Subject length lower bound = 17
11591159
/[^ⱥ]/Bi,utf
11601160
------------------------------------------------------------------
11611161
Bra
1162-
/i [^\x{2c65}]
1162+
/i [^\x{2c65}] (not)
11631163
Ket
11641164
End
11651165
------------------------------------------------------------------
@@ -1559,7 +1559,7 @@ Subject length lower bound = 1
15591559
------------------------------------------------------------------
15601560
Bra
15611561
^
1562-
[\x00-`c-\xff] (neg)
1562+
[^ab]
15631563
Ket
15641564
End
15651565
------------------------------------------------------------------

testdata/testoutput11-16

Lines changed: 34 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
/[^\x{c4}]/IB
1010
------------------------------------------------------------------
1111
Bra
12-
[^\x{c4}]
12+
[^\x{c4}] (not)
1313
Ket
1414
End
1515
------------------------------------------------------------------
@@ -364,55 +364,55 @@ Subject length lower bound = 6
364364
/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/B
365365
------------------------------------------------------------------
366366
Bra
367-
[^\x{80}]
368-
[^\x{ff}]
369-
[^\x{100}]
370-
[^\x{1000}]
371-
[^\x{ffff}]
367+
[^\x{80}] (not)
368+
[^\x{ff}] (not)
369+
[^\x{100}] (not)
370+
[^\x{1000}] (not)
371+
[^\x{ffff}] (not)
372372
Ket
373373
End
374374
------------------------------------------------------------------
375375

376376
/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/Bi
377377
------------------------------------------------------------------
378378
Bra
379-
/i [^\x{80}]
380-
/i [^\x{ff}]
381-
/i [^\x{100}]
382-
/i [^\x{1000}]
383-
/i [^\x{ffff}]
379+
/i [^\x{80}] (not)
380+
/i [^\x{ff}] (not)
381+
/i [^\x{100}] (not)
382+
/i [^\x{1000}] (not)
383+
/i [^\x{ffff}] (not)
384384
Ket
385385
End
386386
------------------------------------------------------------------
387387

388388
/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/B
389389
------------------------------------------------------------------
390390
Bra
391-
[^\x{100}]*
392-
[^\x{1000}]+
393-
[^\x{ffff}]??
394-
[^\x{8000}]{4}
395-
[^\x{8000}]*
396-
[^\x{7fff}]{2}
397-
[^\x{7fff}]{0,7}?
398-
[^\x{100}]{5}
399-
[^\x{100}]?+
391+
[^\x{100}]* (not)
392+
[^\x{1000}]+ (not)
393+
[^\x{ffff}]?? (not)
394+
[^\x{8000}]{4} (not)
395+
[^\x{8000}]* (not)
396+
[^\x{7fff}]{2} (not)
397+
[^\x{7fff}]{0,7}? (not)
398+
[^\x{100}]{5} (not)
399+
[^\x{100}]?+ (not)
400400
Ket
401401
End
402402
------------------------------------------------------------------
403403

404404
/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/Bi
405405
------------------------------------------------------------------
406406
Bra
407-
/i [^\x{100}]*
408-
/i [^\x{1000}]+
409-
/i [^\x{ffff}]??
410-
/i [^\x{8000}]{4}
411-
/i [^\x{8000}]*
412-
/i [^\x{7fff}]{2}
413-
/i [^\x{7fff}]{0,7}?
414-
/i [^\x{100}]{5}
415-
/i [^\x{100}]?+
407+
/i [^\x{100}]* (not)
408+
/i [^\x{1000}]+ (not)
409+
/i [^\x{ffff}]?? (not)
410+
/i [^\x{8000}]{4} (not)
411+
/i [^\x{8000}]* (not)
412+
/i [^\x{7fff}]{2} (not)
413+
/i [^\x{7fff}]{0,7}? (not)
414+
/i [^\x{100}]{5} (not)
415+
/i [^\x{100}]?+ (not)
416416
Ket
417417
End
418418
------------------------------------------------------------------
@@ -474,25 +474,25 @@ MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789AB
474474
/[^\x00-a]{12,}[^b-\xff]*/B
475475
------------------------------------------------------------------
476476
Bra
477-
[b-\xff] (neg){12,}
478-
[\x00-a] (neg)*+
477+
[^\x00-a]{12,}
478+
[^b-\xff]*+
479479
Ket
480480
End
481481
------------------------------------------------------------------
482482

483483
/[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B
484484
------------------------------------------------------------------
485485
Bra
486-
[\x00-\x08\x0e-\x1f!-\xff] (neg)*
486+
[^\x09-\x0d ]*
487487
\s*
488488

489489
[0-9A-Z_a-z]++
490490
\W+
491491

492-
[\x00-/:-\xff] (neg)*?
492+
[^0-9]*?
493493
\d
494494
0
495-
[\x00-/:-@[-^`{-\xff] (neg){4,6}?
495+
[^0-9A-Z_a-z]{4,6}?
496496
\w*
497497
A
498498
Ket

0 commit comments

Comments
 (0)