@@ -16,22 +16,26 @@ function MBStringChars($string, $encoding) {
1616ini_set ('include_path ' , __DIR__ );
1717include_once ('common.inc ' );
1818
19- // EUC-JP
2019$ euc_jp = pack ('H* ' , '30313233a4b3a4cecab8bbfacef3a4cfc6fccbdcb8eca4c7a4b9a1a34555432d4a50a4f2bbc8a4c3a4c6a4a4a4dea4b9a1a3c6fccbdcb8eca4cfccccc5ddbdada4a4a1a3 ' );
21- // UTF-8
22- $ utf8 = pack ('H* ' , 'e288ae2045e28b856461203d2051 ' ); // has 2 multi-byte characters: [e288ae 20 45 e28b85 64 61 20 3d 20 51]
23- // UTF-16LE
20+ $ utf8 = pack ('H* ' , 'e288ae2045e28b856461203d2051 ' ); // has 2 multi-byte characters: [e288ae 20 45 e28b85 64 61 20 3d 20 51]
2421$ utf16le = pack ('H* ' , '1a043804400438043b043b04380446043004200069007300200043007900720069006c006c0069006300 ' );
22+ $ utf32be = mb_convert_encoding ($ utf8 , 'UTF-32BE ' , 'UTF-8 ' );
23+ $ iso2022jp = mb_convert_encoding ("漢字 abc カナ " , 'ISO-2022-JP ' , 'UTF-8 ' ); // [<escape sequence>1b2442 3441 3b7a <escape sequence>1b2842 20 61 62 63 20 <escape sequence>1b2442 252b 254a <escape sequence>1b2842]
24+ $ jis = mb_convert_encoding ("漢字 abc カナ " , 'JIS ' , 'UTF-8 ' );
25+ // For testing ISO-2022-JP-2004, add a Kanji character which is in JISX 0213
26+ $ iso2022jp2004 = mb_convert_encoding ("漢字 abc カナ凜 " , 'ISO-2022-JP-2004 ' , 'UTF-8 ' ); // [1b242851 3441 3b7a 1b2842 20 61 62 63 20 1b242851 252b 254a 7425 1b2842]
27+ $ iso2022jpms = mb_convert_encoding ("漢字 abc カナ " , 'ISO-2022-JP-MS ' , 'UTF-8 ' ); // [1b2442 3441 3b7a 1b2842 20 61 62 63 20 1b2442 252b 254a 1b2842]
28+ $ iso2022jp_kddi = mb_convert_encoding ("漢字 abc カナ " , 'ISO-2022-JP-KDDI ' , 'UTF-8 ' );
2529
2630print "== EUC-JP == \n" ;
27- print MBStringChars (mb_strcut ($ euc_jp , 6 , 5 ,'EUC-JP ' ), 'EUC-JP ' ) . "\n" ;
28- print MBStringChars (mb_strcut ($ euc_jp , 5 , 5 ,'EUC-JP ' ), 'EUC-JP ' ) . "\n" ;
29- print MBStringChars (mb_strcut ($ euc_jp , 0 , 100 ,'EUC-JP ' ), 'EUC-JP ' ) . "\n" ;
31+ print MBStringChars (mb_strcut ($ euc_jp , 6 , 5 , 'EUC-JP ' ), 'EUC-JP ' ) . "\n" ;
32+ print MBStringChars (mb_strcut ($ euc_jp , 5 , 5 , 'EUC-JP ' ), 'EUC-JP ' ) . "\n" ;
33+ print MBStringChars (mb_strcut ($ euc_jp , 0 , 100 , 'EUC-JP ' ), 'EUC-JP ' ) . "\n" ;
3034
31- $ str = mb_strcut ($ euc_jp , 100 , 10 ,'EUC-JP ' );
35+ $ str = mb_strcut ($ euc_jp , 100 , 10 , 'EUC-JP ' );
3236($ str === "" ) ? print "OK \n" : print "No good \n" ;
3337
34- $ str = mb_strcut ($ euc_jp , -100 , 10 ,'EUC-JP ' );
38+ $ str = mb_strcut ($ euc_jp , -100 , 10 , 'EUC-JP ' );
3539($ str !== "" ) ? print "OK \n" : print "No good \n" ;
3640
3741print "== UTF-8 == \n" ;
@@ -45,6 +49,17 @@ print MBStringChars(mb_strcut($utf8, 1, 2, 'UTF-8'), 'UTF-8') . "\n";
4549print MBStringChars (mb_strcut ($ utf8 , 1 , 3 , 'UTF-8 ' ), 'UTF-8 ' ) . "\n" ;
4650print MBStringChars (mb_strcut ($ utf8 , 1 , 4 , 'UTF-8 ' ), 'UTF-8 ' ) . "\n" ;
4751
52+ print MBStringChars (mb_strcut ('AåBäCöDü ' , 2 , 100 , 'UTF-8 ' ), 'UTF-8 ' ) . "\n" ;
53+
54+ print "== UTF-16 == \n" ;
55+ print "Single byte: [ " . bin2hex (mb_strcut ("\xFF" , 0 , 100 , 'UTF-16 ' )) . "] \n" ;
56+ print "With from=1: [ " . bin2hex (mb_strcut ("\xff\x01" , 1 , 100 , "UTF-16 " )) . "] \n" ;
57+ print "Bad surrogate: [ " . bin2hex (mb_strcut ("\xD9\xFF" , 0 , 100 , "UTF-16 " )) . "] \n" ;
58+ print "Bad surrogate followed by other bytes: [ " . bin2hex (mb_strcut ("\xd9\x00\x12C " , 0 , 100 , "UTF-16 " )) . "] \n" ;
59+ print "BE byte order mark: [ " . bin2hex (mb_strcut ("\xFE\xFF" , 0 , 100 , "UTF-16 " )) . "] \n" ;
60+ print "LE byte order mark: [ " . bin2hex (mb_strcut ("\xFF\xFE" , 0 , 100 , "UTF-16 " )) . "] \n" ;
61+ print "Length=0: [ " . bin2hex (mb_strcut ("\x00\x01\x00\x00" , 1 , -512 , "UTF-16 " )) . "] \n" ;
62+
4863print "== UTF-16LE == \n" ;
4964print MBStringChars (mb_strcut ($ utf16le , 0 , 0 , 'UTF-16LE ' ), 'UTF-16LE ' ) . "\n" ;
5065print MBStringChars (mb_strcut ($ utf16le , 0 , 1 , 'UTF-16LE ' ), 'UTF-16LE ' ) . "\n" ;
@@ -54,6 +69,162 @@ print MBStringChars(mb_strcut($utf16le, 1, 2, 'UTF-16LE'), 'UTF-16LE') . "\n";
5469print MBStringChars (mb_strcut ($ utf16le , 1 , 3 , 'UTF-16LE ' ), 'UTF-16LE ' ) . "\n" ;
5570print MBStringChars (mb_strcut ($ utf16le , 1 , 4 , 'UTF-16LE ' ), 'UTF-16LE ' ) . "\n" ;
5671
72+ print "Single byte: [ " . bin2hex (mb_strcut ("\xFF" , 0 , 100 , 'UTF-16LE ' )) . "] \n" ;
73+
74+ print "== UTF-32BE == \n" ;
75+ print MBStringChars (mb_strcut ($ utf32be , 0 , 3 , 'UTF-32BE ' ), 'UTF-32BE ' ) . "\n" ;
76+ print MBStringChars (mb_strcut ($ utf32be , 0 , 4 , 'UTF-32BE ' ), 'UTF-32BE ' ) . "\n" ;
77+ print MBStringChars (mb_strcut ($ utf32be , 0 , 5 , 'UTF-32BE ' ), 'UTF-32BE ' ) . "\n" ;
78+ print MBStringChars (mb_strcut ($ utf32be , 1 , 8 , 'UTF-32BE ' ), 'UTF-32BE ' ) . "\n" ;
79+ print MBStringChars (mb_strcut ($ utf32be , 3 , 9 , 'UTF-32BE ' ), 'UTF-32BE ' ) . "\n" ;
80+
81+ print "== ISO-2022-JP == \n" ;
82+ print MBStringChars (mb_strcut ($ iso2022jp , 0 , 3 , 'ISO-2022-JP ' ), 'ISO-2022-JP ' ) . "\n" ;
83+ print MBStringChars (mb_strcut ($ iso2022jp , 0 , 4 , 'ISO-2022-JP ' ), 'ISO-2022-JP ' ) . "\n" ;
84+ print MBStringChars (mb_strcut ($ iso2022jp , 0 , 5 , 'ISO-2022-JP ' ), 'ISO-2022-JP ' ) . "\n" ;
85+ print MBStringChars (mb_strcut ($ iso2022jp , 0 , 6 , 'ISO-2022-JP ' ), 'ISO-2022-JP ' ) . "\n" ;
86+ print MBStringChars (mb_strcut ($ iso2022jp , 0 , 7 , 'ISO-2022-JP ' ), 'ISO-2022-JP ' ) . "\n" ;
87+ print MBStringChars (mb_strcut ($ iso2022jp , 0 , 8 , 'ISO-2022-JP ' ), 'ISO-2022-JP ' ) . "\n" ;
88+
89+ print MBStringChars (mb_strcut ($ iso2022jp , 1 , 3 , 'ISO-2022-JP ' ), 'ISO-2022-JP ' ) . "\n" ;
90+ print MBStringChars (mb_strcut ($ iso2022jp , 1 , 6 , 'ISO-2022-JP ' ), 'ISO-2022-JP ' ) . "\n" ;
91+ print MBStringChars (mb_strcut ($ iso2022jp , 1 , 8 , 'ISO-2022-JP ' ), 'ISO-2022-JP ' ) . "\n" ;
92+
93+ print MBStringChars (mb_strcut ($ iso2022jp , 2 , 5 , 'ISO-2022-JP ' ), 'ISO-2022-JP ' ) . "\n" ;
94+ print MBStringChars (mb_strcut ($ iso2022jp , 5 , 9 , 'ISO-2022-JP ' ), 'ISO-2022-JP ' ) . "\n" ;
95+ print MBStringChars (mb_strcut ($ iso2022jp , 5 , 11 , 'ISO-2022-JP ' ), 'ISO-2022-JP ' ) . "\n" ;
96+ print MBStringChars (mb_strcut ($ iso2022jp , 6 , 13 , 'ISO-2022-JP ' ), 'ISO-2022-JP ' ) . "\n" ;
97+ print MBStringChars (mb_strcut ($ iso2022jp , 7 , 13 , 'ISO-2022-JP ' ), 'ISO-2022-JP ' ) . "\n" ;
98+
99+ print MBStringChars (mb_strcut ($ iso2022jp , 1 , 100 , 'ISO-2022-JP ' ), 'ISO-2022-JP ' ) . "\n" ;
100+ print MBStringChars (mb_strcut ($ iso2022jp , 50 , 100 , 'ISO-2022-JP ' ), 'ISO-2022-JP ' ) . "\n" ;
101+
102+ print "Error followed by ASCII char: [ " . bin2hex (mb_strcut ("\xdaK " , 0 , 100 , "ISO-2022-JP " )) . "] \n" ;
103+
104+ print "== ISO-2022-JP-2004 == \n" ;
105+ print MBStringChars (mb_strcut ($ iso2022jp2004 , 0 , 3 , 'ISO-2022-JP-2004 ' ), 'ISO-2022-JP-2004 ' ) . "\n" ;
106+ print MBStringChars (mb_strcut ($ iso2022jp2004 , 0 , 4 , 'ISO-2022-JP-2004 ' ), 'ISO-2022-JP-2004 ' ) . "\n" ;
107+ print MBStringChars (mb_strcut ($ iso2022jp2004 , 0 , 5 , 'ISO-2022-JP-2004 ' ), 'ISO-2022-JP-2004 ' ) . "\n" ;
108+ print MBStringChars (mb_strcut ($ iso2022jp2004 , 0 , 6 , 'ISO-2022-JP-2004 ' ), 'ISO-2022-JP-2004 ' ) . "\n" ;
109+ print MBStringChars (mb_strcut ($ iso2022jp2004 , 0 , 7 , 'ISO-2022-JP-2004 ' ), 'ISO-2022-JP-2004 ' ) . "\n" ;
110+ print MBStringChars (mb_strcut ($ iso2022jp2004 , 0 , 8 , 'ISO-2022-JP-2004 ' ), 'ISO-2022-JP-2004 ' ) . "\n" ;
111+ print MBStringChars (mb_strcut ($ iso2022jp2004 , 0 , 9 , 'ISO-2022-JP-2004 ' ), 'ISO-2022-JP-2004 ' ) . "\n" ;
112+
113+ print MBStringChars (mb_strcut ($ iso2022jp2004 , 1 , 3 , 'ISO-2022-JP-2004 ' ), 'ISO-2022-JP-2004 ' ) . "\n" ;
114+ print MBStringChars (mb_strcut ($ iso2022jp2004 , 1 , 6 , 'ISO-2022-JP-2004 ' ), 'ISO-2022-JP-2004 ' ) . "\n" ;
115+ print MBStringChars (mb_strcut ($ iso2022jp2004 , 1 , 8 , 'ISO-2022-JP-2004 ' ), 'ISO-2022-JP-2004 ' ) . "\n" ;
116+ print MBStringChars (mb_strcut ($ iso2022jp2004 , 1 , 9 , 'ISO-2022-JP-2004 ' ), 'ISO-2022-JP-2004 ' ) . "\n" ;
117+
118+ print MBStringChars (mb_strcut ($ iso2022jp2004 , 2 , 5 , 'ISO-2022-JP-2004 ' ), 'ISO-2022-JP-2004 ' ) . "\n" ;
119+ print MBStringChars (mb_strcut ($ iso2022jp2004 , 5 , 9 , 'ISO-2022-JP-2004 ' ), 'ISO-2022-JP-2004 ' ) . "\n" ;
120+ print MBStringChars (mb_strcut ($ iso2022jp2004 , 5 , 11 , 'ISO-2022-JP-2004 ' ), 'ISO-2022-JP-2004 ' ) . "\n" ;
121+ print MBStringChars (mb_strcut ($ iso2022jp2004 , 6 , 13 , 'ISO-2022-JP-2004 ' ), 'ISO-2022-JP-2004 ' ) . "\n" ;
122+ print MBStringChars (mb_strcut ($ iso2022jp2004 , 7 , 13 , 'ISO-2022-JP-2004 ' ), 'ISO-2022-JP-2004 ' ) . "\n" ;
123+
124+ print MBStringChars (mb_strcut ($ iso2022jp2004 , 1 , 100 , 'ISO-2022-JP-2004 ' ), 'ISO-2022-JP-2004 ' ) . "\n" ;
125+ print MBStringChars (mb_strcut ($ iso2022jp2004 , 50 , 100 , 'ISO-2022-JP-2004 ' ), 'ISO-2022-JP-2004 ' ) . "\n" ;
126+
127+ print "== ISO-2022-JP-MS == \n" ;
128+ print MBStringChars (mb_strcut ($ iso2022jpms , 0 , 3 , 'ISO-2022-JP-MS ' ), 'ISO-2022-JP-MS ' ) . "\n" ;
129+ print MBStringChars (mb_strcut ($ iso2022jpms , 0 , 4 , 'ISO-2022-JP-MS ' ), 'ISO-2022-JP-MS ' ) . "\n" ;
130+ print MBStringChars (mb_strcut ($ iso2022jpms , 0 , 5 , 'ISO-2022-JP-MS ' ), 'ISO-2022-JP-MS ' ) . "\n" ;
131+ print MBStringChars (mb_strcut ($ iso2022jpms , 0 , 6 , 'ISO-2022-JP-MS ' ), 'ISO-2022-JP-MS ' ) . "\n" ;
132+ print MBStringChars (mb_strcut ($ iso2022jpms , 0 , 7 , 'ISO-2022-JP-MS ' ), 'ISO-2022-JP-MS ' ) . "\n" ;
133+ print MBStringChars (mb_strcut ($ iso2022jpms , 0 , 8 , 'ISO-2022-JP-MS ' ), 'ISO-2022-JP-MS ' ) . "\n" ;
134+ print MBStringChars (mb_strcut ($ iso2022jpms , 0 , 9 , 'ISO-2022-JP-MS ' ), 'ISO-2022-JP-MS ' ) . "\n" ;
135+
136+ print MBStringChars (mb_strcut ($ iso2022jpms , 1 , 3 , 'ISO-2022-JP-MS ' ), 'ISO-2022-JP-MS ' ) . "\n" ;
137+ print MBStringChars (mb_strcut ($ iso2022jpms , 1 , 6 , 'ISO-2022-JP-MS ' ), 'ISO-2022-JP-MS ' ) . "\n" ;
138+ print MBStringChars (mb_strcut ($ iso2022jpms , 1 , 8 , 'ISO-2022-JP-MS ' ), 'ISO-2022-JP-MS ' ) . "\n" ;
139+ print MBStringChars (mb_strcut ($ iso2022jpms , 1 , 9 , 'ISO-2022-JP-MS ' ), 'ISO-2022-JP-MS ' ) . "\n" ;
140+
141+ print MBStringChars (mb_strcut ($ iso2022jpms , 2 , 5 , 'ISO-2022-JP-MS ' ), 'ISO-2022-JP-MS ' ) . "\n" ;
142+ print MBStringChars (mb_strcut ($ iso2022jpms , 5 , 9 , 'ISO-2022-JP-MS ' ), 'ISO-2022-JP-MS ' ) . "\n" ;
143+ print MBStringChars (mb_strcut ($ iso2022jpms , 5 , 11 , 'ISO-2022-JP-MS ' ), 'ISO-2022-JP-MS ' ) . "\n" ;
144+ print MBStringChars (mb_strcut ($ iso2022jpms , 6 , 13 , 'ISO-2022-JP-MS ' ), 'ISO-2022-JP-MS ' ) . "\n" ;
145+ print MBStringChars (mb_strcut ($ iso2022jpms , 7 , 13 , 'ISO-2022-JP-MS ' ), 'ISO-2022-JP-MS ' ) . "\n" ;
146+
147+ print MBStringChars (mb_strcut ($ iso2022jpms , 1 , 100 , 'ISO-2022-JP-MS ' ), 'ISO-2022-JP-MS ' ) . "\n" ;
148+ print MBStringChars (mb_strcut ($ iso2022jpms , 50 , 100 , 'ISO-2022-JP-MS ' ), 'ISO-2022-JP-MS ' ) . "\n" ;
149+
150+ print "== JIS == \n" ;
151+ print MBStringChars (mb_strcut ($ jis , 0 , 3 , 'JIS ' ), 'JIS ' ) . "\n" ;
152+ print MBStringChars (mb_strcut ($ jis , 0 , 4 , 'JIS ' ), 'JIS ' ) . "\n" ;
153+ print MBStringChars (mb_strcut ($ jis , 0 , 5 , 'JIS ' ), 'JIS ' ) . "\n" ;
154+ print MBStringChars (mb_strcut ($ jis , 0 , 6 , 'JIS ' ), 'JIS ' ) . "\n" ;
155+ print MBStringChars (mb_strcut ($ jis , 0 , 7 , 'JIS ' ), 'JIS ' ) . "\n" ;
156+ print MBStringChars (mb_strcut ($ jis , 0 , 8 , 'JIS ' ), 'JIS ' ) . "\n" ;
157+
158+ print MBStringChars (mb_strcut ($ jis , 1 , 3 , 'JIS ' ), 'JIS ' ) . "\n" ;
159+ print MBStringChars (mb_strcut ($ jis , 1 , 6 , 'JIS ' ), 'JIS ' ) . "\n" ;
160+ print MBStringChars (mb_strcut ($ jis , 1 , 8 , 'JIS ' ), 'JIS ' ) . "\n" ;
161+
162+ print MBStringChars (mb_strcut ($ jis , 2 , 5 , 'JIS ' ), 'JIS ' ) . "\n" ;
163+ print MBStringChars (mb_strcut ($ jis , 5 , 9 , 'JIS ' ), 'JIS ' ) . "\n" ;
164+ print MBStringChars (mb_strcut ($ jis , 5 , 11 , 'JIS ' ), 'JIS ' ) . "\n" ;
165+ print MBStringChars (mb_strcut ($ jis , 6 , 13 , 'JIS ' ), 'JIS ' ) . "\n" ;
166+ print MBStringChars (mb_strcut ($ jis , 7 , 13 , 'JIS ' ), 'JIS ' ) . "\n" ;
167+
168+ print MBStringChars (mb_strcut ($ jis , 1 , 100 , 'JIS ' ), 'JIS ' ) . "\n" ;
169+ print MBStringChars (mb_strcut ($ jis , 50 , 100 , 'JIS ' ), 'JIS ' ) . "\n" ;
170+
171+ print "0xA3: [ " . bin2hex (mb_strcut ("\xA3aaaaaa " , 0 , 100 , 'JIS ' )) . "] \n" ;
172+ print "Bad escape sequence followed by null byte: [ " . bin2hex (mb_strcut ("\x1b\x00" , 1 , 100 , "JIS " )) . "] \n" ;
173+
174+ print "== ISO-2022-JP-KDDI == \n" ;
175+ print MBStringChars (mb_strcut ($ iso2022jp_kddi , 0 , 3 , 'ISO-2022-JP-KDDI ' ), 'ISO-2022-JP-KDDI ' ) . "\n" ;
176+ print MBStringChars (mb_strcut ($ iso2022jp_kddi , 0 , 4 , 'ISO-2022-JP-KDDI ' ), 'ISO-2022-JP-KDDI ' ) . "\n" ;
177+ print MBStringChars (mb_strcut ($ iso2022jp_kddi , 0 , 5 , 'ISO-2022-JP-KDDI ' ), 'ISO-2022-JP-KDDI ' ) . "\n" ;
178+ print MBStringChars (mb_strcut ($ iso2022jp_kddi , 0 , 6 , 'ISO-2022-JP-KDDI ' ), 'ISO-2022-JP-KDDI ' ) . "\n" ;
179+ print MBStringChars (mb_strcut ($ iso2022jp_kddi , 0 , 7 , 'ISO-2022-JP-KDDI ' ), 'ISO-2022-JP-KDDI ' ) . "\n" ;
180+ print MBStringChars (mb_strcut ($ iso2022jp_kddi , 0 , 8 , 'ISO-2022-JP-KDDI ' ), 'ISO-2022-JP-KDDI ' ) . "\n" ;
181+
182+ print MBStringChars (mb_strcut ($ iso2022jp_kddi , 1 , 3 , 'ISO-2022-JP-KDDI ' ), 'ISO-2022-JP-KDDI ' ) . "\n" ;
183+ print MBStringChars (mb_strcut ($ iso2022jp_kddi , 1 , 6 , 'ISO-2022-JP-KDDI ' ), 'ISO-2022-JP-KDDI ' ) . "\n" ;
184+ print MBStringChars (mb_strcut ($ iso2022jp_kddi , 1 , 8 , 'ISO-2022-JP-KDDI ' ), 'ISO-2022-JP-KDDI ' ) . "\n" ;
185+
186+ print MBStringChars (mb_strcut ($ iso2022jp_kddi , 2 , 5 , 'ISO-2022-JP-KDDI ' ), 'ISO-2022-JP-KDDI ' ) . "\n" ;
187+ print MBStringChars (mb_strcut ($ iso2022jp_kddi , 5 , 9 , 'ISO-2022-JP-KDDI ' ), 'ISO-2022-JP-KDDI ' ) . "\n" ;
188+ print MBStringChars (mb_strcut ($ iso2022jp_kddi , 5 , 11 , 'ISO-2022-JP-KDDI ' ), 'ISO-2022-JP-KDDI ' ) . "\n" ;
189+ print MBStringChars (mb_strcut ($ iso2022jp_kddi , 6 , 13 , 'ISO-2022-JP-KDDI ' ), 'ISO-2022-JP-KDDI ' ) . "\n" ;
190+ print MBStringChars (mb_strcut ($ iso2022jp_kddi , 7 , 13 , 'ISO-2022-JP-KDDI ' ), 'ISO-2022-JP-KDDI ' ) . "\n" ;
191+
192+ print MBStringChars (mb_strcut ($ iso2022jp_kddi , 1 , 100 , 'ISO-2022-JP-KDDI ' ), 'ISO-2022-JP-KDDI ' ) . "\n" ;
193+ print MBStringChars (mb_strcut ($ iso2022jp_kddi , 50 , 100 , 'ISO-2022-JP-KDDI ' ), 'ISO-2022-JP-KDDI ' ) . "\n" ;
194+
195+ print "== CP50220 == \n" ;
196+
197+ print "Single byte 0xFF: [ " . bin2hex (mb_strcut ("\xFF" , 0 , 100 , 'CP50220 ' )) . "] \n" ;
198+ print "Double byte 0xFF: [ " . bin2hex (mb_strcut ("\xFF\xFF" , 0 , 100 , 'CP50220 ' )) . "] \n" ;
199+ print "Sample string with multiple null bytes: [ " . bin2hex (mb_strcut ("\xCF\x00\x00\x00\x00\x00d \x00\x00" , 0 , 100 , 'CP50220 ' )) . "] \n" ;
200+ print "Bad escape sequence preceded by bad bytes: [ " . bin2hex (mb_strcut ("\xFF\xFF\x1B\x00" , 0 , 100 , 'CP50220 ' )) . "] \n" ;
201+ print "Good JISX 0208 sequence, but it won't fit in max number of bytes: [ " . bin2hex (mb_strcut ("\x1B\$BGV \x17" , 0 , 100 , 'CP50220 ' )) . "] \n" ;
202+ print "Bad escape sequence followed by GR kana: [ " . bin2hex (mb_strcut ("\x1B\$\xAC\x13" , 0 , 100 , 'CP50220 ' )) . "] \n" ;
203+
204+ print "== UTF-7 == \n" ;
205+
206+ print "Single byte 0x01: [ " . mb_strcut ("\x01" , 0 , 100 , 'UTF-7 ' ) . "] \n" ;
207+ print "UTF-16 section ends abruptly: [ " . mb_strcut ("+Q " , 1 , 100 , 'UTF-7 ' ) . "] \n" ;
208+ print "UTF-16 section ends abruptly in middle of 2nd codepoint: [ " . mb_strcut ("+QxxC " , 0 , 100 , 'UTF-7 ' ) . "] \n" ;
209+ print "Cutting in middle of UTF-16 section: [ " . mb_strcut ("+UUU " , -1 , 255 , "UTF-7 " ) . "] \n" ;
210+ print "Cutting in middle of UTF-16 section (2): [ " . mb_strcut ("+UUUU " , -2 , 255 , "UTF-7 " ) . "] \n" ;
211+
212+ print "== UTF7-IMAP == \n" ;
213+
214+ print "Single byte 0x01: [ " . mb_strcut ("\x01" , 0 , 100 , 'UTF7-IMAP ' ) . "] \n" ;
215+ print "UTF-16 section ends abruptly: [ " . mb_strcut ("&Q " , 1 , 100 , 'UTF7-IMAP ' ) . "] \n" ;
216+ print "UTF-16 section ends abruptly in middle of 2nd codepoint: [ " . mb_strcut ("&QxxC " , 0 , 100 , 'UTF7-IMAP ' ) . "] \n" ;
217+ print "UTF-16 section is terminated improperly: [ " . mb_strcut ("&i6o \x83" , 0 , 100 , 'UTF7-IMAP ' ) . "] \n" ;
218+
219+ print "== GB18030 == \n" ;
220+
221+ print "Invalid byte 0xF5: [ " . bin2hex (mb_strcut ("\xF5a " , 1 , 100 , 'GB18030 ' )) . "] \n" ;
222+ print "Double-byte char: [ " . bin2hex (mb_strcut ("\xAFw " , -1 , 100 , "GB18030 " )) . "] \n" ;
223+
224+ print "== UHC == \n" ;
225+
226+ print "Single byte 0x96: [ " . bin2hex (mb_strcut ("\x96" , 1 , 1280 , "UHC " )) . "] \n" ;
227+
57228?>
58229--EXPECT--
59230== EUC-JP ==
72243[]
73244[e288ae]
74245[e288ae 20]
246+ [c3a5 42 c3a4 43 c3b6 44 c3bc]
247+ == UTF-16 ==
248+ Single byte: []
249+ With from=1: []
250+ Bad surrogate: []
251+ Bad surrogate followed by other bytes: [003f1243]
252+ BE byte order mark: []
253+ LE byte order mark: []
254+ Length=0: []
75255== UTF-16LE ==
76256[]
77257[]
80260[1a04]
81261[1a04]
82262[1a04 3804]
263+ Single byte: []
264+ == UTF-32BE ==
265+ []
266+ [0000222e]
267+ [0000222e]
268+ [0000222e 00000020]
269+ [0000222e 00000020]
270+ == ISO-2022-JP ==
271+ []
272+ []
273+ []
274+ []
275+ []
276+ [1b244234411b2842]
277+ []
278+ []
279+ [1b244234411b2842]
280+ []
281+ [1b24423b7a1b2842 20]
282+ [1b24423b7a1b2842 20 61 62]
283+ [1b24423b7a1b2842 20 61 62 63 20]
284+ [20 61 62 63 20 1b2442252b1b2842]
285+ [1b244234411b2842 1b24423b7a1b2842 20 61 62 63 20 1b2442252b1b2842]
286+ []
287+ Error followed by ASCII char: [4b]
288+ == ISO-2022-JP-2004 ==
289+ []
290+ []
291+ []
292+ []
293+ []
294+ []
295+ [1b24285134411b2842]
296+ []
297+ []
298+ []
299+ [1b24285134411b2842]
300+ []
301+ [1b24285134411b2842]
302+ [1b24285134411b2842 1b2428513b7a1b2842]
303+ [1b2428513b7a1b2842 20 61 62 63]
304+ [1b2428513b7a1b2842 20 61 62 63]
305+ [1b24285134411b2842 1b2428513b7a1b2842 20 61 62 63 20 1b242851252b1b2842 1b242851254a1b2842]
306+ []
307+ == ISO-2022-JP-MS ==
308+ []
309+ []
310+ []
311+ []
312+ []
313+ [1b244234411b2842]
314+ [1b244234411b2842]
315+ []
316+ []
317+ [1b244234411b2842]
318+ [1b244234411b2842]
319+ []
320+ [1b24423b7a1b2842 20]
321+ [1b24423b7a1b2842 20 61 62]
322+ [1b24423b7a1b2842 20 61 62 63 20]
323+ [20 61 62 63 20 1b2442252b1b2842]
324+ [1b244234411b2842 1b24423b7a1b2842 20 61 62 63 20 1b2442252b1b2842]
325+ []
326+ == JIS ==
327+ []
328+ []
329+ []
330+ []
331+ []
332+ [1b244234411b2842]
333+ []
334+ []
335+ [1b244234411b2842]
336+ []
337+ [1b24423b7a1b2842 20]
338+ [1b24423b7a1b2842 20 61 62]
339+ [1b24423b7a1b2842 20 61 62 63 20]
340+ [20 61 62 63 20 1b2442252b1b2842]
341+ [1b244234411b2842 1b24423b7a1b2842 20 61 62 63 20 1b2442252b1b2842]
342+ []
343+ 0xA3: []
344+ Bad escape sequence followed by null byte: []
345+ == ISO-2022-JP-KDDI ==
346+ []
347+ []
348+ []
349+ []
350+ []
351+ [1b244234411b2842]
352+ []
353+ []
354+ [1b244234411b2842]
355+ []
356+ [1b24423b7a1b2842 20]
357+ [1b24423b7a1b2842 20 61 62]
358+ [1b24423b7a1b2842 20 61 62 63 20]
359+ [20 61 62 63 20 1b2442252b1b2842]
360+ [1b244234411b2842 1b24423b7a1b2842 20 61 62 63 20 1b2442252b1b2842]
361+ []
362+ == CP50220 ==
363+ Single byte 0xFF: []
364+ Double byte 0xFF: [3f]
365+ Sample string with multiple null bytes: [1b2442255e001b2842]
366+ Bad escape sequence preceded by bad bytes: [3f3f3f00]
367+ Good JISX 0208 sequence, but it won't fit in max number of bytes: []
368+ Bad escape sequence followed by GR kana: []
369+ == UTF-7 ==
370+ Single byte 0x01: []
371+ UTF-16 section ends abruptly: []
372+ UTF-16 section ends abruptly in middle of 2nd codepoint: [+Qxw-]
373+ Cutting in middle of UTF-16 section: []
374+ Cutting in middle of UTF-16 section (2): []
375+ == UTF7-IMAP ==
376+ Single byte 0x01: [?]
377+ UTF-16 section ends abruptly: []
378+ UTF-16 section ends abruptly in middle of 2nd codepoint: []
379+ UTF-16 section is terminated improperly: []
380+ == GB18030 ==
381+ Invalid byte 0xF5: []
382+ Double-byte char: []
383+ == UHC ==
384+ Single byte 0x96: [96]
0 commit comments