Skip to content

Commit 960c711

Browse files
committed
updated for version 7.4.001
Problem: Character classes such as [a-z] to not react to 'ignorecase'. Breaks man page highlighting. (Mario Grgic) Solution: Add separate items for classes that react to 'ignorecase'. Clean up logic handling character classes. Add more tests.
1 parent 4fa0083 commit 960c711

File tree

4 files changed

+147
-46
lines changed

4 files changed

+147
-46
lines changed

src/regexp_nfa.c

Lines changed: 62 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@
2929
# define NFA_REGEXP_DEBUG_LOG "nfa_regexp_debug.log"
3030
#endif
3131

32+
/* Added to NFA_ANY - NFA_NUPPER_IC to include a NL. */
33+
#define NFA_ADD_NL 31
34+
3235
enum
3336
{
3437
NFA_SPLIT = -1024,
@@ -183,6 +186,13 @@ enum
183186
NFA_NLOWER, /* Match non-lowercase char */
184187
NFA_UPPER, /* Match uppercase char */
185188
NFA_NUPPER, /* Match non-uppercase char */
189+
NFA_LOWER_IC, /* Match [a-z] */
190+
NFA_NLOWER_IC, /* Match [^a-z] */
191+
NFA_UPPER_IC, /* Match [A-Z] */
192+
NFA_NUPPER_IC, /* Match [^A-Z] */
193+
194+
NFA_FIRST_NL = NFA_ANY + NFA_ADD_NL,
195+
NFA_LAST_NL = NFA_NUPPER_IC + NFA_ADD_NL,
186196

187197
NFA_CURSOR, /* Match cursor pos */
188198
NFA_LNUM, /* Match line number */
@@ -199,9 +209,6 @@ enum
199209
NFA_MARK_LT, /* Match < mark */
200210
NFA_VISUAL, /* Match Visual area */
201211

202-
NFA_FIRST_NL = NFA_ANY + ADD_NL,
203-
NFA_LAST_NL = NFA_NUPPER + ADD_NL,
204-
205212
/* Character classes [:alnum:] etc */
206213
NFA_CLASS_ALNUM,
207214
NFA_CLASS_ALPHA,
@@ -578,6 +585,8 @@ realloc_post_list()
578585
* On failure, return 0 (=FAIL)
579586
* Start points to the first char of the range, while end should point
580587
* to the closing brace.
588+
* Keep in mind that 'ignorecase' applies at execution time, thus [a-z] may
589+
* need to be interpreted as [a-zA-Z].
581590
*/
582591
static int
583592
nfa_recognize_char_class(start, end, extra_newl)
@@ -681,7 +690,7 @@ nfa_recognize_char_class(start, end, extra_newl)
681690
return FAIL;
682691

683692
if (newl == TRUE)
684-
extra_newl = ADD_NL;
693+
extra_newl = NFA_ADD_NL;
685694

686695
switch (config)
687696
{
@@ -710,13 +719,13 @@ nfa_recognize_char_class(start, end, extra_newl)
710719
case CLASS_not | CLASS_az | CLASS_AZ:
711720
return extra_newl + NFA_NALPHA;
712721
case CLASS_az:
713-
return extra_newl + NFA_LOWER;
722+
return extra_newl + NFA_LOWER_IC;
714723
case CLASS_not | CLASS_az:
715-
return extra_newl + NFA_NLOWER;
724+
return extra_newl + NFA_NLOWER_IC;
716725
case CLASS_AZ:
717-
return extra_newl + NFA_UPPER;
726+
return extra_newl + NFA_UPPER_IC;
718727
case CLASS_not | CLASS_AZ:
719-
return extra_newl + NFA_NUPPER;
728+
return extra_newl + NFA_NUPPER_IC;
720729
}
721730
return FAIL;
722731
}
@@ -914,7 +923,7 @@ nfa_regatom()
914923
break;
915924
}
916925

917-
extra = ADD_NL;
926+
extra = NFA_ADD_NL;
918927

919928
/* "\_[" is collection plus newline */
920929
if (c == '[')
@@ -970,7 +979,7 @@ nfa_regatom()
970979
}
971980
#endif
972981
EMIT(nfa_classcodes[p - classchars]);
973-
if (extra == ADD_NL)
982+
if (extra == NFA_ADD_NL)
974983
{
975984
EMIT(NFA_NEWL);
976985
EMIT(NFA_OR);
@@ -1240,21 +1249,21 @@ nfa_regatom()
12401249
{
12411250
/*
12421251
* Try to reverse engineer character classes. For example,
1243-
* recognize that [0-9] stands for \d and [A-Za-z_] with \h,
1252+
* recognize that [0-9] stands for \d and [A-Za-z_] for \h,
12441253
* and perform the necessary substitutions in the NFA.
12451254
*/
12461255
result = nfa_recognize_char_class(regparse, endp,
1247-
extra == ADD_NL);
1256+
extra == NFA_ADD_NL);
12481257
if (result != FAIL)
12491258
{
1250-
if (result >= NFA_DIGIT && result <= NFA_NUPPER)
1251-
EMIT(result);
1252-
else /* must be char class + newline */
1259+
if (result >= NFA_FIRST_NL && result <= NFA_LAST_NL)
12531260
{
1254-
EMIT(result - ADD_NL);
1261+
EMIT(result - NFA_ADD_NL);
12551262
EMIT(NFA_NEWL);
12561263
EMIT(NFA_OR);
12571264
}
1265+
else
1266+
EMIT(result);
12581267
regparse = endp;
12591268
mb_ptr_adv(regparse);
12601269
return OK;
@@ -1504,7 +1513,7 @@ nfa_regatom()
15041513
* collection, add an OR below. But not for negated
15051514
* range. */
15061515
if (!negated)
1507-
extra = ADD_NL;
1516+
extra = NFA_ADD_NL;
15081517
}
15091518
else
15101519
{
@@ -1537,7 +1546,7 @@ nfa_regatom()
15371546
EMIT(NFA_END_COLL);
15381547

15391548
/* \_[] also matches \n but it's not negated */
1540-
if (extra == ADD_NL)
1549+
if (extra == NFA_ADD_NL)
15411550
{
15421551
EMIT(reg_string ? NL : NFA_NEWL);
15431552
EMIT(NFA_OR);
@@ -2011,7 +2020,7 @@ nfa_set_code(c)
20112020
if (c >= NFA_FIRST_NL && c <= NFA_LAST_NL)
20122021
{
20132022
addnl = TRUE;
2014-
c -= ADD_NL;
2023+
c -= NFA_ADD_NL;
20152024
}
20162025

20172026
STRCPY(code, "");
@@ -2217,6 +2226,10 @@ nfa_set_code(c)
22172226
case NFA_NLOWER:STRCPY(code, "NFA_NLOWER"); break;
22182227
case NFA_UPPER: STRCPY(code, "NFA_UPPER"); break;
22192228
case NFA_NUPPER:STRCPY(code, "NFA_NUPPER"); break;
2229+
case NFA_LOWER_IC: STRCPY(code, "NFA_LOWER_IC"); break;
2230+
case NFA_NLOWER_IC: STRCPY(code, "NFA_NLOWER_IC"); break;
2231+
case NFA_UPPER_IC: STRCPY(code, "NFA_UPPER_IC"); break;
2232+
case NFA_NUPPER_IC: STRCPY(code, "NFA_NUPPER_IC"); break;
22202233

22212234
default:
22222235
STRCPY(code, "CHAR(x)");
@@ -2687,6 +2700,10 @@ nfa_max_width(startstate, depth)
26872700
case NFA_NLOWER:
26882701
case NFA_UPPER:
26892702
case NFA_NUPPER:
2703+
case NFA_LOWER_IC:
2704+
case NFA_NLOWER_IC:
2705+
case NFA_UPPER_IC:
2706+
case NFA_NUPPER_IC:
26902707
/* possibly non-ascii */
26912708
#ifdef FEAT_MBYTE
26922709
if (has_mbyte)
@@ -3841,6 +3858,10 @@ match_follows(startstate, depth)
38413858
case NFA_NLOWER:
38423859
case NFA_UPPER:
38433860
case NFA_NUPPER:
3861+
case NFA_LOWER_IC:
3862+
case NFA_NLOWER_IC:
3863+
case NFA_UPPER_IC:
3864+
case NFA_NUPPER_IC:
38443865
case NFA_START_COLL:
38453866
case NFA_START_NEG_COLL:
38463867
case NFA_NEWL:
@@ -5872,6 +5893,28 @@ nfa_regmatch(prog, start, submatch, m)
58725893
ADD_STATE_IF_MATCH(t->state);
58735894
break;
58745895

5896+
case NFA_LOWER_IC: /* [a-z] */
5897+
result = ri_lower(curc) || (ireg_ic && ri_upper(curc));
5898+
ADD_STATE_IF_MATCH(t->state);
5899+
break;
5900+
5901+
case NFA_NLOWER_IC: /* [^a-z] */
5902+
result = curc != NUL
5903+
&& !(ri_lower(curc) || (ireg_ic && ri_upper(curc)));
5904+
ADD_STATE_IF_MATCH(t->state);
5905+
break;
5906+
5907+
case NFA_UPPER_IC: /* [A-Z] */
5908+
result = ri_upper(curc) || (ireg_ic && ri_lower(curc));
5909+
ADD_STATE_IF_MATCH(t->state);
5910+
break;
5911+
5912+
case NFA_NUPPER_IC: /* ^[A-Z] */
5913+
result = curc != NUL
5914+
&& !(ri_upper(curc) || (ireg_ic && ri_lower(curc)));
5915+
ADD_STATE_IF_MATCH(t->state);
5916+
break;
5917+
58755918
case NFA_BACKREF1:
58765919
case NFA_BACKREF2:
58775920
case NFA_BACKREF3:

src/testdir/test64.in

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -289,15 +289,29 @@ STARTTEST
289289
:call add(tl, [2, '.a\%$', " a\n "])
290290
:call add(tl, [2, '.a\%$', " a\n_a", "_a"])
291291
:"
292-
:"""" Test recognition of some character classes
293-
:call add(tl, [2, '[0-9]', '8', '8'])
294-
:call add(tl, [2, '[^0-9]', '8'])
295-
:call add(tl, [2, '[0-9a-fA-F]*', '0a7', '0a7'])
296-
:call add(tl, [2, '[^0-9A-Fa-f]\+', '0a7'])
297-
:call add(tl, [2, '[a-z_A-Z0-9]\+', 'aso_sfoij', 'aso_sfoij'])
298-
:call add(tl, [2, '[a-z]', 'a', 'a'])
299-
:call add(tl, [2, '[a-zA-Z]', 'a', 'a'])
300-
:call add(tl, [2, '[A-Z]', 'a'])
292+
:"""" Test recognition of character classes
293+
:call add(tl, [2, '[0-7]\+', 'x0123456789x', '01234567'])
294+
:call add(tl, [2, '[^0-7]\+', '0a;X+% 897', 'a;X+% 89'])
295+
:call add(tl, [2, '[0-9]\+', 'x0123456789x', '0123456789'])
296+
:call add(tl, [2, '[^0-9]\+', '0a;X+% 9', 'a;X+% '])
297+
:call add(tl, [2, '[0-9a-fA-F]\+', 'x0189abcdefg', '0189abcdef'])
298+
:call add(tl, [2, '[^0-9A-Fa-f]\+', '0189g;X+% ab', 'g;X+% '])
299+
:call add(tl, [2, '[a-z_A-Z0-9]\+', ';+aso_SfOij ', 'aso_SfOij'])
300+
:call add(tl, [2, '[^a-z_A-Z0-9]\+', 'aSo_;+% sfOij', ';+% '])
301+
:call add(tl, [2, '[a-z_A-Z]\+', '0abyz_ABYZ;', 'abyz_ABYZ'])
302+
:call add(tl, [2, '[^a-z_A-Z]\+', 'abAB_09;+% yzYZ', '09;+% '])
303+
:call add(tl, [2, '[a-z]\+', '0abcxyz1', 'abcxyz'])
304+
:call add(tl, [2, '[a-z]\+', 'AabxyzZ', 'abxyz'])
305+
:call add(tl, [2, '[^a-z]\+', 'a;X09+% x', ';X09+% '])
306+
:call add(tl, [2, '[^a-z]\+', 'abX0;%yz', 'X0;%'])
307+
:call add(tl, [2, '[a-zA-Z]\+', '0abABxzXZ9', 'abABxzXZ'])
308+
:call add(tl, [2, '[^a-zA-Z]\+', 'ab09_;+ XZ', '09_;+ '])
309+
:call add(tl, [2, '[A-Z]\+', 'aABXYZz', 'ABXYZ'])
310+
:call add(tl, [2, '[^A-Z]\+', 'ABx0;%YZ', 'x0;%'])
311+
:call add(tl, [2, '[a-z]\+\c', '0abxyzABXYZ;', 'abxyzABXYZ'])
312+
:call add(tl, [2, '[A-Z]\+\c', '0abABxzXZ9', 'abABxzXZ'])
313+
:call add(tl, [2, '\c[^a-z]\+', 'ab09_;+ XZ', '09_;+ '])
314+
:call add(tl, [2, '\c[^A-Z]\+', 'ab09_;+ XZ', '09_;+ '])
301315
:call add(tl, [2, '\C[^A-Z]\+', 'ABCOIJDEOIFNSD jsfoij sa', ' jsfoij sa'])
302316
:"
303317
:"""" Tests for \z features

src/testdir/test64.ok

Lines changed: 60 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -650,30 +650,72 @@ OK 2 - .a\%$
650650
OK 0 - .a\%$
651651
OK 1 - .a\%$
652652
OK 2 - .a\%$
653-
OK 0 - [0-9]
654-
OK 1 - [0-9]
655-
OK 2 - [0-9]
656-
OK 0 - [^0-9]
657-
OK 1 - [^0-9]
658-
OK 2 - [^0-9]
659-
OK 0 - [0-9a-fA-F]*
660-
OK 1 - [0-9a-fA-F]*
661-
OK 2 - [0-9a-fA-F]*
653+
OK 0 - [0-7]\+
654+
OK 1 - [0-7]\+
655+
OK 2 - [0-7]\+
656+
OK 0 - [^0-7]\+
657+
OK 1 - [^0-7]\+
658+
OK 2 - [^0-7]\+
659+
OK 0 - [0-9]\+
660+
OK 1 - [0-9]\+
661+
OK 2 - [0-9]\+
662+
OK 0 - [^0-9]\+
663+
OK 1 - [^0-9]\+
664+
OK 2 - [^0-9]\+
665+
OK 0 - [0-9a-fA-F]\+
666+
OK 1 - [0-9a-fA-F]\+
667+
OK 2 - [0-9a-fA-F]\+
662668
OK 0 - [^0-9A-Fa-f]\+
663669
OK 1 - [^0-9A-Fa-f]\+
664670
OK 2 - [^0-9A-Fa-f]\+
665671
OK 0 - [a-z_A-Z0-9]\+
666672
OK 1 - [a-z_A-Z0-9]\+
667673
OK 2 - [a-z_A-Z0-9]\+
668-
OK 0 - [a-z]
669-
OK 1 - [a-z]
670-
OK 2 - [a-z]
671-
OK 0 - [a-zA-Z]
672-
OK 1 - [a-zA-Z]
673-
OK 2 - [a-zA-Z]
674-
OK 0 - [A-Z]
675-
OK 1 - [A-Z]
676-
OK 2 - [A-Z]
674+
OK 0 - [^a-z_A-Z0-9]\+
675+
OK 1 - [^a-z_A-Z0-9]\+
676+
OK 2 - [^a-z_A-Z0-9]\+
677+
OK 0 - [a-z_A-Z]\+
678+
OK 1 - [a-z_A-Z]\+
679+
OK 2 - [a-z_A-Z]\+
680+
OK 0 - [^a-z_A-Z]\+
681+
OK 1 - [^a-z_A-Z]\+
682+
OK 2 - [^a-z_A-Z]\+
683+
OK 0 - [a-z]\+
684+
OK 1 - [a-z]\+
685+
OK 2 - [a-z]\+
686+
OK 0 - [a-z]\+
687+
OK 1 - [a-z]\+
688+
OK 2 - [a-z]\+
689+
OK 0 - [^a-z]\+
690+
OK 1 - [^a-z]\+
691+
OK 2 - [^a-z]\+
692+
OK 0 - [^a-z]\+
693+
OK 1 - [^a-z]\+
694+
OK 2 - [^a-z]\+
695+
OK 0 - [a-zA-Z]\+
696+
OK 1 - [a-zA-Z]\+
697+
OK 2 - [a-zA-Z]\+
698+
OK 0 - [^a-zA-Z]\+
699+
OK 1 - [^a-zA-Z]\+
700+
OK 2 - [^a-zA-Z]\+
701+
OK 0 - [A-Z]\+
702+
OK 1 - [A-Z]\+
703+
OK 2 - [A-Z]\+
704+
OK 0 - [^A-Z]\+
705+
OK 1 - [^A-Z]\+
706+
OK 2 - [^A-Z]\+
707+
OK 0 - [a-z]\+\c
708+
OK 1 - [a-z]\+\c
709+
OK 2 - [a-z]\+\c
710+
OK 0 - [A-Z]\+\c
711+
OK 1 - [A-Z]\+\c
712+
OK 2 - [A-Z]\+\c
713+
OK 0 - \c[^a-z]\+
714+
OK 1 - \c[^a-z]\+
715+
OK 2 - \c[^a-z]\+
716+
OK 0 - \c[^A-Z]\+
717+
OK 1 - \c[^A-Z]\+
718+
OK 2 - \c[^A-Z]\+
677719
OK 0 - \C[^A-Z]\+
678720
OK 1 - \C[^A-Z]\+
679721
OK 2 - \C[^A-Z]\+

src/version.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -727,6 +727,8 @@ static char *(features[]) =
727727

728728
static int included_patches[] =
729729
{ /* Add new patch number below this line */
730+
/**/
731+
1,
730732
/**/
731733
0
732734
};

0 commit comments

Comments
 (0)