Skip to content

Commit 73eacbf

Browse files
brammooldouglasdrumond
authored andcommitted
patch 7.4.704 Problem: Searching for a character matches an illegal byte and causes invalid memory access. (Dominique Pelle) Solution: Do not match an invalid byte when search for a character in a string. Fix equivalence classes using negative numbers, which result in illegal bytes.
1 parent dbbc001 commit 73eacbf

File tree

4 files changed

+74
-60
lines changed

4 files changed

+74
-60
lines changed

src/misc2.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1885,9 +1885,12 @@ vim_strchr(string, c)
18851885
{
18861886
while (*p != NUL)
18871887
{
1888-
if (utf_ptr2char(p) == c)
1888+
int l = (*mb_ptr2len)(p);
1889+
1890+
/* Avoid matching an illegal byte here. */
1891+
if (utf_ptr2char(p) == c && l > 1)
18891892
return p;
1890-
p += (*mb_ptr2len)(p);
1893+
p += l;
18911894
}
18921895
return NULL;
18931896
}

src/regexp.c

Lines changed: 51 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -845,23 +845,24 @@ reg_equi_class(c)
845845
#else
846846
switch (c)
847847
{
848-
case 'A': case '\300': case '\301': case '\302':
848+
/* Do not use '\300' style, it results in a negative number. */
849+
case 'A': case 0xc0: case 0xc1: case 0xc2:
850+
case 0xc3: case 0xc4: case 0xc5:
849851
CASEMBC(0x100) CASEMBC(0x102) CASEMBC(0x104) CASEMBC(0x1cd)
850852
CASEMBC(0x1de) CASEMBC(0x1e0) CASEMBC(0x1ea2)
851-
case '\303': case '\304': case '\305':
852-
regmbc('A'); regmbc('\300'); regmbc('\301');
853-
regmbc('\302'); regmbc('\303'); regmbc('\304');
854-
regmbc('\305');
853+
regmbc('A'); regmbc(0xc0); regmbc(0xc1);
854+
regmbc(0xc2); regmbc(0xc3); regmbc(0xc4);
855+
regmbc(0xc5);
855856
REGMBC(0x100) REGMBC(0x102) REGMBC(0x104)
856857
REGMBC(0x1cd) REGMBC(0x1de) REGMBC(0x1e0)
857858
REGMBC(0x1ea2)
858859
return;
859860
case 'B': CASEMBC(0x1e02) CASEMBC(0x1e06)
860861
regmbc('B'); REGMBC(0x1e02) REGMBC(0x1e06)
861862
return;
862-
case 'C': case '\307':
863+
case 'C': case 0xc7:
863864
CASEMBC(0x106) CASEMBC(0x108) CASEMBC(0x10a) CASEMBC(0x10c)
864-
regmbc('C'); regmbc('\307');
865+
regmbc('C'); regmbc(0xc7);
865866
REGMBC(0x106) REGMBC(0x108) REGMBC(0x10a)
866867
REGMBC(0x10c)
867868
return;
@@ -870,11 +871,11 @@ reg_equi_class(c)
870871
regmbc('D'); REGMBC(0x10e) REGMBC(0x110)
871872
REGMBC(0x1e0a) REGMBC(0x1e0e) REGMBC(0x1e10)
872873
return;
873-
case 'E': case '\310': case '\311': case '\312': case '\313':
874+
case 'E': case 0xc8: case 0xc9: case 0xca: case 0xcb:
874875
CASEMBC(0x112) CASEMBC(0x114) CASEMBC(0x116) CASEMBC(0x118)
875876
CASEMBC(0x11a) CASEMBC(0x1eba) CASEMBC(0x1ebc)
876-
regmbc('E'); regmbc('\310'); regmbc('\311');
877-
regmbc('\312'); regmbc('\313');
877+
regmbc('E'); regmbc(0xc8); regmbc(0xc9);
878+
regmbc(0xca); regmbc(0xcb);
878879
REGMBC(0x112) REGMBC(0x114) REGMBC(0x116)
879880
REGMBC(0x118) REGMBC(0x11a) REGMBC(0x1eba)
880881
REGMBC(0x1ebc)
@@ -894,11 +895,11 @@ reg_equi_class(c)
894895
regmbc('H'); REGMBC(0x124) REGMBC(0x126)
895896
REGMBC(0x1e22) REGMBC(0x1e26) REGMBC(0x1e28)
896897
return;
897-
case 'I': case '\314': case '\315': case '\316': case '\317':
898+
case 'I': case 0xcc: case 0xcd: case 0xce: case 0xcf:
898899
CASEMBC(0x128) CASEMBC(0x12a) CASEMBC(0x12c) CASEMBC(0x12e)
899900
CASEMBC(0x130) CASEMBC(0x1cf) CASEMBC(0x1ec8)
900-
regmbc('I'); regmbc('\314'); regmbc('\315');
901-
regmbc('\316'); regmbc('\317');
901+
regmbc('I'); regmbc(0xcc); regmbc(0xcd);
902+
regmbc(0xce); regmbc(0xcf);
902903
REGMBC(0x128) REGMBC(0x12a) REGMBC(0x12c)
903904
REGMBC(0x12e) REGMBC(0x130) REGMBC(0x1cf)
904905
REGMBC(0x1ec8)
@@ -920,20 +921,20 @@ reg_equi_class(c)
920921
case 'M': CASEMBC(0x1e3e) CASEMBC(0x1e40)
921922
regmbc('M'); REGMBC(0x1e3e) REGMBC(0x1e40)
922923
return;
923-
case 'N': case '\321':
924+
case 'N': case 0xd1:
924925
CASEMBC(0x143) CASEMBC(0x145) CASEMBC(0x147) CASEMBC(0x1e44)
925926
CASEMBC(0x1e48)
926-
regmbc('N'); regmbc('\321');
927+
regmbc('N'); regmbc(0xd1);
927928
REGMBC(0x143) REGMBC(0x145) REGMBC(0x147)
928929
REGMBC(0x1e44) REGMBC(0x1e48)
929930
return;
930-
case 'O': case '\322': case '\323': case '\324': case '\325':
931-
case '\326': case '\330':
931+
case 'O': case 0xd2: case 0xd3: case 0xd4: case 0xd5:
932+
case 0xd6: case 0xd8:
932933
CASEMBC(0x14c) CASEMBC(0x14e) CASEMBC(0x150) CASEMBC(0x1a0)
933934
CASEMBC(0x1d1) CASEMBC(0x1ea) CASEMBC(0x1ec) CASEMBC(0x1ece)
934-
regmbc('O'); regmbc('\322'); regmbc('\323');
935-
regmbc('\324'); regmbc('\325'); regmbc('\326');
936-
regmbc('\330');
935+
regmbc('O'); regmbc(0xd2); regmbc(0xd3);
936+
regmbc(0xd4); regmbc(0xd5); regmbc(0xd6);
937+
regmbc(0xd8);
937938
REGMBC(0x14c) REGMBC(0x14e) REGMBC(0x150)
938939
REGMBC(0x1a0) REGMBC(0x1d1) REGMBC(0x1ea)
939940
REGMBC(0x1ec) REGMBC(0x1ece)
@@ -956,12 +957,12 @@ reg_equi_class(c)
956957
regmbc('T'); REGMBC(0x162) REGMBC(0x164)
957958
REGMBC(0x166) REGMBC(0x1e6a) REGMBC(0x1e6e)
958959
return;
959-
case 'U': case '\331': case '\332': case '\333': case '\334':
960+
case 'U': case 0xd9: case 0xda: case 0xdb: case 0xdc:
960961
CASEMBC(0x168) CASEMBC(0x16a) CASEMBC(0x16c) CASEMBC(0x16e)
961962
CASEMBC(0x170) CASEMBC(0x172) CASEMBC(0x1af) CASEMBC(0x1d3)
962963
CASEMBC(0x1ee6)
963-
regmbc('U'); regmbc('\331'); regmbc('\332');
964-
regmbc('\333'); regmbc('\334');
964+
regmbc('U'); regmbc(0xd9); regmbc(0xda);
965+
regmbc(0xdb); regmbc(0xdc);
965966
REGMBC(0x168) REGMBC(0x16a) REGMBC(0x16c)
966967
REGMBC(0x16e) REGMBC(0x170) REGMBC(0x172)
967968
REGMBC(0x1af) REGMBC(0x1d3) REGMBC(0x1ee6)
@@ -977,10 +978,10 @@ reg_equi_class(c)
977978
case 'X': CASEMBC(0x1e8a) CASEMBC(0x1e8c)
978979
regmbc('X'); REGMBC(0x1e8a) REGMBC(0x1e8c)
979980
return;
980-
case 'Y': case '\335':
981+
case 'Y': case 0xdd:
981982
CASEMBC(0x176) CASEMBC(0x178) CASEMBC(0x1e8e) CASEMBC(0x1ef2)
982983
CASEMBC(0x1ef6) CASEMBC(0x1ef8)
983-
regmbc('Y'); regmbc('\335');
984+
regmbc('Y'); regmbc(0xdd);
984985
REGMBC(0x176) REGMBC(0x178) REGMBC(0x1e8e)
985986
REGMBC(0x1ef2) REGMBC(0x1ef6) REGMBC(0x1ef8)
986987
return;
@@ -990,23 +991,23 @@ reg_equi_class(c)
990991
REGMBC(0x17d) REGMBC(0x1b5) REGMBC(0x1e90)
991992
REGMBC(0x1e94)
992993
return;
993-
case 'a': case '\340': case '\341': case '\342':
994-
case '\343': case '\344': case '\345':
994+
case 'a': case 0xe0: case 0xe1: case 0xe2:
995+
case 0xe3: case 0xe4: case 0xe5:
995996
CASEMBC(0x101) CASEMBC(0x103) CASEMBC(0x105) CASEMBC(0x1ce)
996997
CASEMBC(0x1df) CASEMBC(0x1e1) CASEMBC(0x1ea3)
997-
regmbc('a'); regmbc('\340'); regmbc('\341');
998-
regmbc('\342'); regmbc('\343'); regmbc('\344');
999-
regmbc('\345');
998+
regmbc('a'); regmbc(0xe0); regmbc(0xe1);
999+
regmbc(0xe2); regmbc(0xe3); regmbc(0xe4);
1000+
regmbc(0xe5);
10001001
REGMBC(0x101) REGMBC(0x103) REGMBC(0x105)
10011002
REGMBC(0x1ce) REGMBC(0x1df) REGMBC(0x1e1)
10021003
REGMBC(0x1ea3)
10031004
return;
10041005
case 'b': CASEMBC(0x1e03) CASEMBC(0x1e07)
10051006
regmbc('b'); REGMBC(0x1e03) REGMBC(0x1e07)
10061007
return;
1007-
case 'c': case '\347':
1008+
case 'c': case 0xe7:
10081009
CASEMBC(0x107) CASEMBC(0x109) CASEMBC(0x10b) CASEMBC(0x10d)
1009-
regmbc('c'); regmbc('\347');
1010+
regmbc('c'); regmbc(0xe7);
10101011
REGMBC(0x107) REGMBC(0x109) REGMBC(0x10b)
10111012
REGMBC(0x10d)
10121013
return;
@@ -1015,11 +1016,11 @@ reg_equi_class(c)
10151016
regmbc('d'); REGMBC(0x10f) REGMBC(0x111)
10161017
REGMBC(0x1e0b) REGMBC(0x01e0f) REGMBC(0x1e11)
10171018
return;
1018-
case 'e': case '\350': case '\351': case '\352': case '\353':
1019+
case 'e': case 0xe8: case 0xe9: case 0xea: case 0xeb:
10191020
CASEMBC(0x113) CASEMBC(0x115) CASEMBC(0x117) CASEMBC(0x119)
10201021
CASEMBC(0x11b) CASEMBC(0x1ebb) CASEMBC(0x1ebd)
1021-
regmbc('e'); regmbc('\350'); regmbc('\351');
1022-
regmbc('\352'); regmbc('\353');
1022+
regmbc('e'); regmbc(0xe8); regmbc(0xe9);
1023+
regmbc(0xea); regmbc(0xeb);
10231024
REGMBC(0x113) REGMBC(0x115) REGMBC(0x117)
10241025
REGMBC(0x119) REGMBC(0x11b) REGMBC(0x1ebb)
10251026
REGMBC(0x1ebd)
@@ -1040,11 +1041,11 @@ reg_equi_class(c)
10401041
REGMBC(0x1e23) REGMBC(0x1e27) REGMBC(0x1e29)
10411042
REGMBC(0x1e96)
10421043
return;
1043-
case 'i': case '\354': case '\355': case '\356': case '\357':
1044+
case 'i': case 0xec: case 0xed: case 0xee: case 0xef:
10441045
CASEMBC(0x129) CASEMBC(0x12b) CASEMBC(0x12d) CASEMBC(0x12f)
10451046
CASEMBC(0x1d0) CASEMBC(0x1ec9)
1046-
regmbc('i'); regmbc('\354'); regmbc('\355');
1047-
regmbc('\356'); regmbc('\357');
1047+
regmbc('i'); regmbc(0xec); regmbc(0xed);
1048+
regmbc(0xee); regmbc(0xef);
10481049
REGMBC(0x129) REGMBC(0x12b) REGMBC(0x12d)
10491050
REGMBC(0x12f) REGMBC(0x1d0) REGMBC(0x1ec9)
10501051
return;
@@ -1065,20 +1066,20 @@ reg_equi_class(c)
10651066
case 'm': CASEMBC(0x1e3f) CASEMBC(0x1e41)
10661067
regmbc('m'); REGMBC(0x1e3f) REGMBC(0x1e41)
10671068
return;
1068-
case 'n': case '\361':
1069+
case 'n': case 0xf1:
10691070
CASEMBC(0x144) CASEMBC(0x146) CASEMBC(0x148) CASEMBC(0x149)
10701071
CASEMBC(0x1e45) CASEMBC(0x1e49)
1071-
regmbc('n'); regmbc('\361');
1072+
regmbc('n'); regmbc(0xf1);
10721073
REGMBC(0x144) REGMBC(0x146) REGMBC(0x148)
10731074
REGMBC(0x149) REGMBC(0x1e45) REGMBC(0x1e49)
10741075
return;
1075-
case 'o': case '\362': case '\363': case '\364': case '\365':
1076-
case '\366': case '\370':
1076+
case 'o': case 0xf2: case 0xf3: case 0xf4: case 0xf5:
1077+
case 0xf6: case 0xf8:
10771078
CASEMBC(0x14d) CASEMBC(0x14f) CASEMBC(0x151) CASEMBC(0x1a1)
10781079
CASEMBC(0x1d2) CASEMBC(0x1eb) CASEMBC(0x1ed) CASEMBC(0x1ecf)
1079-
regmbc('o'); regmbc('\362'); regmbc('\363');
1080-
regmbc('\364'); regmbc('\365'); regmbc('\366');
1081-
regmbc('\370');
1080+
regmbc('o'); regmbc(0xf2); regmbc(0xf3);
1081+
regmbc(0xf4); regmbc(0xf5); regmbc(0xf6);
1082+
regmbc(0xf8);
10821083
REGMBC(0x14d) REGMBC(0x14f) REGMBC(0x151)
10831084
REGMBC(0x1a1) REGMBC(0x1d2) REGMBC(0x1eb)
10841085
REGMBC(0x1ed) REGMBC(0x1ecf)
@@ -1101,12 +1102,12 @@ reg_equi_class(c)
11011102
regmbc('t'); REGMBC(0x163) REGMBC(0x165) REGMBC(0x167)
11021103
REGMBC(0x1e6b) REGMBC(0x1e6f) REGMBC(0x1e97)
11031104
return;
1104-
case 'u': case '\371': case '\372': case '\373': case '\374':
1105+
case 'u': case 0xf9: case 0xfa: case 0xfb: case 0xfc:
11051106
CASEMBC(0x169) CASEMBC(0x16b) CASEMBC(0x16d) CASEMBC(0x16f)
11061107
CASEMBC(0x171) CASEMBC(0x173) CASEMBC(0x1b0) CASEMBC(0x1d4)
11071108
CASEMBC(0x1ee7)
1108-
regmbc('u'); regmbc('\371'); regmbc('\372');
1109-
regmbc('\373'); regmbc('\374');
1109+
regmbc('u'); regmbc(0xf9); regmbc(0xfa);
1110+
regmbc(0xfb); regmbc(0xfc);
11101111
REGMBC(0x169) REGMBC(0x16b) REGMBC(0x16d)
11111112
REGMBC(0x16f) REGMBC(0x171) REGMBC(0x173)
11121113
REGMBC(0x1b0) REGMBC(0x1d4) REGMBC(0x1ee7)
@@ -1123,10 +1124,10 @@ reg_equi_class(c)
11231124
case 'x': CASEMBC(0x1e8b) CASEMBC(0x1e8d)
11241125
regmbc('x'); REGMBC(0x1e8b) REGMBC(0x1e8d)
11251126
return;
1126-
case 'y': case '\375': case '\377':
1127+
case 'y': case 0xfd: case 0xff:
11271128
CASEMBC(0x177) CASEMBC(0x1e8f) CASEMBC(0x1e99)
11281129
CASEMBC(0x1ef3) CASEMBC(0x1ef7) CASEMBC(0x1ef9)
1129-
regmbc('y'); regmbc('\375'); regmbc('\377');
1130+
regmbc('y'); regmbc(0xfd); regmbc(0xff);
11301131
REGMBC(0x177) REGMBC(0x1e8f) REGMBC(0x1e99)
11311132
REGMBC(0x1ef3) REGMBC(0x1ef7) REGMBC(0x1ef9)
11321133
return;

src/testdir/test44.in

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,17 +24,25 @@ x:" Now search for multi-byte with composing char
2424
x:" find word by change of word class
2525
/ち\<カヨ\>は
2626
x:" Test \%u, [\u] and friends
27+
:" c
2728
/\%u20ac
28-
x/[\u4f7f\u5929]\+
29-
x/\%U12345678
30-
x/[\U1234abcd\u1234\uabcd]
31-
x/\%d21879b
32-
x/ [[=A=]]* [[=B=]]* [[=C=]]* [[=D=]]* [[=E=]]* [[=F=]]* [[=G=]]* [[=H=]]* [[=I=]]* [[=J=]]* [[=K=]]* [[=L=]]* [[=M=]]* [[=N=]]* [[=O=]]* [[=P=]]* [[=Q=]]* [[=R=]]* [[=S=]]* [[=T=]]* [[=U=]]* [[=V=]]* [[=W=]]* [[=X=]]* [[=Y=]]* [[=Z=]]*/e
33-
x/ [[=a=]]* [[=b=]]* [[=c=]]* [[=d=]]* [[=e=]]* [[=f=]]* [[=g=]]* [[=h=]]* [[=i=]]* [[=j=]]* [[=k=]]* [[=l=]]* [[=m=]]* [[=n=]]* [[=o=]]* [[=p=]]* [[=q=]]* [[=r=]]* [[=s=]]* [[=t=]]* [[=u=]]* [[=v=]]* [[=w=]]* [[=x=]]* [[=y=]]* [[=z=]]*/e
34-
x:" Test backwards search from a multi-byte char
29+
x:" d
30+
/[\u4f7f\u5929]\+
31+
x:" e
32+
/\%U12345678
33+
x:" f
34+
/[\U1234abcd\u1234\uabcd]
35+
x:" g
36+
/\%d21879b
37+
x:" h
38+
/ [[=A=]]* [[=B=]]* [[=C=]]* [[=D=]]* [[=E=]]* [[=F=]]* [[=G=]]* [[=H=]]* [[=I=]]* [[=J=]]* [[=K=]]* [[=L=]]* [[=M=]]* [[=N=]]* [[=O=]]* [[=P=]]* [[=Q=]]* [[=R=]]* [[=S=]]* [[=T=]]* [[=U=]]* [[=V=]]* [[=W=]]* [[=X=]]* [[=Y=]]* [[=Z=]]*/e
39+
x:" i
40+
/ [[=a=]]* [[=b=]]* [[=c=]]* [[=d=]]* [[=e=]]* [[=f=]]* [[=g=]]* [[=h=]]* [[=i=]]* [[=j=]]* [[=k=]]* [[=l=]]* [[=m=]]* [[=n=]]* [[=o=]]* [[=p=]]* [[=q=]]* [[=r=]]* [[=s=]]* [[=t=]]* [[=u=]]* [[=v=]]* [[=w=]]* [[=x=]]* [[=y=]]* [[=z=]]*/e
41+
x:" j Test backwards search from a multi-byte char
3542
/x
3643
x?.
37-
x:let @w=':%s#comb[i]nations#œ̄ṣ́m̥̄ᾱ̆́#g'
44+
x:" k
45+
:let @w=':%s#comb[i]nations#œ̄ṣ́m̥̄ᾱ̆́#g'
3846
:@w
3947
:?^1?,$w! test.out
4048
:e! test.out

src/version.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -756,6 +756,8 @@ static char *(features[]) =
756756

757757
static int included_patches[] =
758758
{ /* Add new patch number below this line */
759+
/**/
760+
704,
759761
/**/
760762
703,
761763
/**/

0 commit comments

Comments
 (0)