|
29 | 29 | # define NFA_REGEXP_DEBUG_LOG "nfa_regexp_debug.log" |
30 | 30 | #endif |
31 | 31 |
|
| 32 | +/* Added to NFA_ANY - NFA_NUPPER_IC to include a NL. */ |
| 33 | +#define NFA_ADD_NL 31 |
| 34 | + |
32 | 35 | enum |
33 | 36 | { |
34 | 37 | NFA_SPLIT = -1024, |
@@ -183,6 +186,13 @@ enum |
183 | 186 | NFA_NLOWER, /* Match non-lowercase char */ |
184 | 187 | NFA_UPPER, /* Match uppercase char */ |
185 | 188 | NFA_NUPPER, /* Match non-uppercase char */ |
| 189 | + NFA_LOWER_IC, /* Match [a-z] */ |
| 190 | + NFA_NLOWER_IC, /* Match [^a-z] */ |
| 191 | + NFA_UPPER_IC, /* Match [A-Z] */ |
| 192 | + NFA_NUPPER_IC, /* Match [^A-Z] */ |
| 193 | + |
| 194 | + NFA_FIRST_NL = NFA_ANY + NFA_ADD_NL, |
| 195 | + NFA_LAST_NL = NFA_NUPPER_IC + NFA_ADD_NL, |
186 | 196 |
|
187 | 197 | NFA_CURSOR, /* Match cursor pos */ |
188 | 198 | NFA_LNUM, /* Match line number */ |
|
199 | 209 | NFA_MARK_LT, /* Match < mark */ |
200 | 210 | NFA_VISUAL, /* Match Visual area */ |
201 | 211 |
|
202 | | - NFA_FIRST_NL = NFA_ANY + ADD_NL, |
203 | | - NFA_LAST_NL = NFA_NUPPER + ADD_NL, |
204 | | - |
205 | 212 | /* Character classes [:alnum:] etc */ |
206 | 213 | NFA_CLASS_ALNUM, |
207 | 214 | NFA_CLASS_ALPHA, |
@@ -578,6 +585,8 @@ realloc_post_list() |
578 | 585 | * On failure, return 0 (=FAIL) |
579 | 586 | * Start points to the first char of the range, while end should point |
580 | 587 | * to the closing brace. |
| 588 | + * Keep in mind that 'ignorecase' applies at execution time, thus [a-z] may |
| 589 | + * need to be interpreted as [a-zA-Z]. |
581 | 590 | */ |
582 | 591 | static int |
583 | 592 | nfa_recognize_char_class(start, end, extra_newl) |
@@ -681,7 +690,7 @@ nfa_recognize_char_class(start, end, extra_newl) |
681 | 690 | return FAIL; |
682 | 691 |
|
683 | 692 | if (newl == TRUE) |
684 | | - extra_newl = ADD_NL; |
| 693 | + extra_newl = NFA_ADD_NL; |
685 | 694 |
|
686 | 695 | switch (config) |
687 | 696 | { |
@@ -710,13 +719,13 @@ nfa_recognize_char_class(start, end, extra_newl) |
710 | 719 | case CLASS_not | CLASS_az | CLASS_AZ: |
711 | 720 | return extra_newl + NFA_NALPHA; |
712 | 721 | case CLASS_az: |
713 | | - return extra_newl + NFA_LOWER; |
| 722 | + return extra_newl + NFA_LOWER_IC; |
714 | 723 | case CLASS_not | CLASS_az: |
715 | | - return extra_newl + NFA_NLOWER; |
| 724 | + return extra_newl + NFA_NLOWER_IC; |
716 | 725 | case CLASS_AZ: |
717 | | - return extra_newl + NFA_UPPER; |
| 726 | + return extra_newl + NFA_UPPER_IC; |
718 | 727 | case CLASS_not | CLASS_AZ: |
719 | | - return extra_newl + NFA_NUPPER; |
| 728 | + return extra_newl + NFA_NUPPER_IC; |
720 | 729 | } |
721 | 730 | return FAIL; |
722 | 731 | } |
@@ -914,7 +923,7 @@ nfa_regatom() |
914 | 923 | break; |
915 | 924 | } |
916 | 925 |
|
917 | | - extra = ADD_NL; |
| 926 | + extra = NFA_ADD_NL; |
918 | 927 |
|
919 | 928 | /* "\_[" is collection plus newline */ |
920 | 929 | if (c == '[') |
@@ -970,7 +979,7 @@ nfa_regatom() |
970 | 979 | } |
971 | 980 | #endif |
972 | 981 | EMIT(nfa_classcodes[p - classchars]); |
973 | | - if (extra == ADD_NL) |
| 982 | + if (extra == NFA_ADD_NL) |
974 | 983 | { |
975 | 984 | EMIT(NFA_NEWL); |
976 | 985 | EMIT(NFA_OR); |
@@ -1240,21 +1249,21 @@ nfa_regatom() |
1240 | 1249 | { |
1241 | 1250 | /* |
1242 | 1251 | * Try to reverse engineer character classes. For example, |
1243 | | - * recognize that [0-9] stands for \d and [A-Za-z_] with \h, |
| 1252 | + * recognize that [0-9] stands for \d and [A-Za-z_] for \h, |
1244 | 1253 | * and perform the necessary substitutions in the NFA. |
1245 | 1254 | */ |
1246 | 1255 | result = nfa_recognize_char_class(regparse, endp, |
1247 | | - extra == ADD_NL); |
| 1256 | + extra == NFA_ADD_NL); |
1248 | 1257 | if (result != FAIL) |
1249 | 1258 | { |
1250 | | - if (result >= NFA_DIGIT && result <= NFA_NUPPER) |
1251 | | - EMIT(result); |
1252 | | - else /* must be char class + newline */ |
| 1259 | + if (result >= NFA_FIRST_NL && result <= NFA_LAST_NL) |
1253 | 1260 | { |
1254 | | - EMIT(result - ADD_NL); |
| 1261 | + EMIT(result - NFA_ADD_NL); |
1255 | 1262 | EMIT(NFA_NEWL); |
1256 | 1263 | EMIT(NFA_OR); |
1257 | 1264 | } |
| 1265 | + else |
| 1266 | + EMIT(result); |
1258 | 1267 | regparse = endp; |
1259 | 1268 | mb_ptr_adv(regparse); |
1260 | 1269 | return OK; |
@@ -1504,7 +1513,7 @@ nfa_regatom() |
1504 | 1513 | * collection, add an OR below. But not for negated |
1505 | 1514 | * range. */ |
1506 | 1515 | if (!negated) |
1507 | | - extra = ADD_NL; |
| 1516 | + extra = NFA_ADD_NL; |
1508 | 1517 | } |
1509 | 1518 | else |
1510 | 1519 | { |
@@ -1537,7 +1546,7 @@ nfa_regatom() |
1537 | 1546 | EMIT(NFA_END_COLL); |
1538 | 1547 |
|
1539 | 1548 | /* \_[] also matches \n but it's not negated */ |
1540 | | - if (extra == ADD_NL) |
| 1549 | + if (extra == NFA_ADD_NL) |
1541 | 1550 | { |
1542 | 1551 | EMIT(reg_string ? NL : NFA_NEWL); |
1543 | 1552 | EMIT(NFA_OR); |
@@ -2011,7 +2020,7 @@ nfa_set_code(c) |
2011 | 2020 | if (c >= NFA_FIRST_NL && c <= NFA_LAST_NL) |
2012 | 2021 | { |
2013 | 2022 | addnl = TRUE; |
2014 | | - c -= ADD_NL; |
| 2023 | + c -= NFA_ADD_NL; |
2015 | 2024 | } |
2016 | 2025 |
|
2017 | 2026 | STRCPY(code, ""); |
@@ -2217,6 +2226,10 @@ nfa_set_code(c) |
2217 | 2226 | case NFA_NLOWER:STRCPY(code, "NFA_NLOWER"); break; |
2218 | 2227 | case NFA_UPPER: STRCPY(code, "NFA_UPPER"); break; |
2219 | 2228 | case NFA_NUPPER:STRCPY(code, "NFA_NUPPER"); break; |
| 2229 | + case NFA_LOWER_IC: STRCPY(code, "NFA_LOWER_IC"); break; |
| 2230 | + case NFA_NLOWER_IC: STRCPY(code, "NFA_NLOWER_IC"); break; |
| 2231 | + case NFA_UPPER_IC: STRCPY(code, "NFA_UPPER_IC"); break; |
| 2232 | + case NFA_NUPPER_IC: STRCPY(code, "NFA_NUPPER_IC"); break; |
2220 | 2233 |
|
2221 | 2234 | default: |
2222 | 2235 | STRCPY(code, "CHAR(x)"); |
@@ -2687,6 +2700,10 @@ nfa_max_width(startstate, depth) |
2687 | 2700 | case NFA_NLOWER: |
2688 | 2701 | case NFA_UPPER: |
2689 | 2702 | case NFA_NUPPER: |
| 2703 | + case NFA_LOWER_IC: |
| 2704 | + case NFA_NLOWER_IC: |
| 2705 | + case NFA_UPPER_IC: |
| 2706 | + case NFA_NUPPER_IC: |
2690 | 2707 | /* possibly non-ascii */ |
2691 | 2708 | #ifdef FEAT_MBYTE |
2692 | 2709 | if (has_mbyte) |
@@ -3765,6 +3782,9 @@ pim_equal(one, two) |
3765 | 3782 | if (two_unused) |
3766 | 3783 | /* one is used and two is not: not equal */ |
3767 | 3784 | return FALSE; |
| 3785 | + /* compare the state id */ |
| 3786 | + if (one->state->id != two->state->id) |
| 3787 | + return FALSE; |
3768 | 3788 | /* compare the position */ |
3769 | 3789 | if (REG_MULTI) |
3770 | 3790 | return one->end.pos.lnum == two->end.pos.lnum |
@@ -3841,6 +3861,10 @@ match_follows(startstate, depth) |
3841 | 3861 | case NFA_NLOWER: |
3842 | 3862 | case NFA_UPPER: |
3843 | 3863 | case NFA_NUPPER: |
| 3864 | + case NFA_LOWER_IC: |
| 3865 | + case NFA_NLOWER_IC: |
| 3866 | + case NFA_UPPER_IC: |
| 3867 | + case NFA_NUPPER_IC: |
3844 | 3868 | case NFA_START_COLL: |
3845 | 3869 | case NFA_START_NEG_COLL: |
3846 | 3870 | case NFA_NEWL: |
@@ -4096,7 +4120,7 @@ addstate(l, state, subs_arg, pim, off) |
4096 | 4120 | sub = &subs->norm; |
4097 | 4121 | } |
4098 | 4122 | #ifdef FEAT_SYN_HL |
4099 | | - else if (state->c >= NFA_ZOPEN) |
| 4123 | + else if (state->c >= NFA_ZOPEN && state->c <= NFA_ZOPEN9) |
4100 | 4124 | { |
4101 | 4125 | subidx = state->c - NFA_ZOPEN; |
4102 | 4126 | sub = &subs->synt; |
@@ -4165,6 +4189,13 @@ addstate(l, state, subs_arg, pim, off) |
4165 | 4189 | } |
4166 | 4190 |
|
4167 | 4191 | subs = addstate(l, state->out, subs, pim, off); |
| 4192 | + /* "subs" may have changed, need to set "sub" again */ |
| 4193 | +#ifdef FEAT_SYN_HL |
| 4194 | + if (state->c >= NFA_ZOPEN && state->c <= NFA_ZOPEN9) |
| 4195 | + sub = &subs->synt; |
| 4196 | + else |
| 4197 | +#endif |
| 4198 | + sub = &subs->norm; |
4168 | 4199 |
|
4169 | 4200 | if (save_in_use == -1) |
4170 | 4201 | { |
@@ -4213,7 +4244,7 @@ addstate(l, state, subs_arg, pim, off) |
4213 | 4244 | sub = &subs->norm; |
4214 | 4245 | } |
4215 | 4246 | #ifdef FEAT_SYN_HL |
4216 | | - else if (state->c >= NFA_ZCLOSE) |
| 4247 | + else if (state->c >= NFA_ZCLOSE && state->c <= NFA_ZCLOSE9) |
4217 | 4248 | { |
4218 | 4249 | subidx = state->c - NFA_ZCLOSE; |
4219 | 4250 | sub = &subs->synt; |
@@ -4257,6 +4288,13 @@ addstate(l, state, subs_arg, pim, off) |
4257 | 4288 | } |
4258 | 4289 |
|
4259 | 4290 | subs = addstate(l, state->out, subs, pim, off); |
| 4291 | + /* "subs" may have changed, need to set "sub" again */ |
| 4292 | +#ifdef FEAT_SYN_HL |
| 4293 | + if (state->c >= NFA_ZCLOSE && state->c <= NFA_ZCLOSE9) |
| 4294 | + sub = &subs->synt; |
| 4295 | + else |
| 4296 | +#endif |
| 4297 | + sub = &subs->norm; |
4260 | 4298 |
|
4261 | 4299 | if (REG_MULTI) |
4262 | 4300 | sub->list.multi[subidx].end = save_lpos; |
@@ -5872,6 +5910,28 @@ nfa_regmatch(prog, start, submatch, m) |
5872 | 5910 | ADD_STATE_IF_MATCH(t->state); |
5873 | 5911 | break; |
5874 | 5912 |
|
| 5913 | + case NFA_LOWER_IC: /* [a-z] */ |
| 5914 | + result = ri_lower(curc) || (ireg_ic && ri_upper(curc)); |
| 5915 | + ADD_STATE_IF_MATCH(t->state); |
| 5916 | + break; |
| 5917 | + |
| 5918 | + case NFA_NLOWER_IC: /* [^a-z] */ |
| 5919 | + result = curc != NUL |
| 5920 | + && !(ri_lower(curc) || (ireg_ic && ri_upper(curc))); |
| 5921 | + ADD_STATE_IF_MATCH(t->state); |
| 5922 | + break; |
| 5923 | + |
| 5924 | + case NFA_UPPER_IC: /* [A-Z] */ |
| 5925 | + result = ri_upper(curc) || (ireg_ic && ri_lower(curc)); |
| 5926 | + ADD_STATE_IF_MATCH(t->state); |
| 5927 | + break; |
| 5928 | + |
| 5929 | + case NFA_NUPPER_IC: /* ^[A-Z] */ |
| 5930 | + result = curc != NUL |
| 5931 | + && !(ri_upper(curc) || (ireg_ic && ri_lower(curc))); |
| 5932 | + ADD_STATE_IF_MATCH(t->state); |
| 5933 | + break; |
| 5934 | + |
5875 | 5935 | case NFA_BACKREF1: |
5876 | 5936 | case NFA_BACKREF2: |
5877 | 5937 | case NFA_BACKREF3: |
|
0 commit comments