1010
1111namespace NFUnitTestStringBuilder
1212{
13- [ TestClass ]
14- public class EncodingTests
15- {
16- [ TestMethod ]
17- public void Utf8EncodingTests_Test1 ( )
18- {
19- string str = "this is a normal string that will be used to convert to bytes then back to a string" ;
20-
21- byte [ ] data = new byte [ 128 ] ;
22- int len = str . Length ;
23- int idx = 0 ;
24-
25- Random rand = new Random ( ) ;
26-
27- int cBytes = 0 ;
28-
29- while ( len > 0 )
30- {
31- int size = ( len <= 2 ) ? len : rand . Next ( len / 2 ) + 1 ;
32- len -= size ;
33-
34- int cnt = Encoding . UTF8 . GetBytes ( str , idx , size , data , cBytes ) ;
35-
36- Assert . AreEqual ( str . Substring ( idx , size ) , new string ( Encoding . UTF8 . GetChars ( data , cBytes , cnt ) ) ) ;
37-
38- cBytes += cnt ;
39- idx += size ;
40- }
41- Assert . AreEqual ( cBytes , str . Length ) ;
42- string strAfter = new string ( Encoding . UTF8 . GetChars ( data , 0 , cBytes ) ) ;
43- Assert . AreEqual ( str , strAfter ) ;
44- }
45-
46- [ TestMethod ]
47- public void Utf8EncodingTests_Test2 ( )
48- {
49- string str = "this is a normal string that will be used to convert to bytes then back to a string" ;
50- byte [ ] data = Encoding . UTF8 . GetBytes ( str ) ;
51- Assert . AreEqual ( data . Length , str . Length ) ;
52- string strAfter = new string ( Encoding . UTF8 . GetChars ( data ) ) ;
53- Assert . AreEqual ( str , strAfter ) ;
54- }
55-
56- [ TestMethod ]
57- public void Utf8EncodingTests_Test3 ( )
58- {
59- // This tests involves a string with a special character
60- string str = "AB\u010D AB" ;
61- byte [ ] data = new byte [ 4 ] ;
62- int count = Encoding . UTF8 . GetBytes ( str , 1 , 3 , data , 0 ) ;
63- Assert . AreEqual ( 4 , count ) ;
64- Assert . AreEqual ( "B\u010D A" , new string ( Encoding . UTF8 . GetChars ( data ) ) ) ;
65- }
66- }
67- }
13+ [ TestClass ]
14+ public class EncodingTests
15+ {
16+ [ TestMethod ]
17+ public void Utf8EncodingTests_Test1 ( )
18+ {
19+ string str = "this is a normal string that will be used to convert to bytes then back to a string" ;
20+
21+ byte [ ] data = new byte [ 128 ] ;
22+ int len = str . Length ;
23+ int idx = 0 ;
24+
25+ Random rand = new Random ( ) ;
26+
27+ int cBytes = 0 ;
28+
29+ while ( len > 0 )
30+ {
31+ int size = ( len <= 2 ) ? len : rand . Next ( len / 2 ) + 1 ;
32+ len -= size ;
33+
34+ int cnt = Encoding . UTF8 . GetBytes ( str , idx , size , data , cBytes ) ;
35+
36+ Assert . AreEqual ( str . Substring ( idx , size ) , new string ( Encoding . UTF8 . GetChars ( data , cBytes , cnt ) ) ) ;
37+
38+ cBytes += cnt ;
39+ idx += size ;
40+ }
41+ Assert . AreEqual ( cBytes , str . Length ) ;
42+ string strAfter = new string ( Encoding . UTF8 . GetChars ( data , 0 , cBytes ) ) ;
43+ Assert . AreEqual ( str , strAfter ) ;
44+ }
45+
46+ [ TestMethod ]
47+ public void Utf8EncodingTests_Test2 ( )
48+ {
49+ string str = "this is a normal string that will be used to convert to bytes then back to a string" ;
50+ byte [ ] data = Encoding . UTF8 . GetBytes ( str ) ;
51+ Assert . AreEqual ( data . Length , str . Length ) ;
52+ string strAfter = new string ( Encoding . UTF8 . GetChars ( data ) ) ;
53+ Assert . AreEqual ( str , strAfter ) ;
54+ }
55+
56+ [ TestMethod ]
57+ public void Utf8EncodingTests_Test3 ( )
58+ {
59+ // This tests involves a string with a special character
60+ string str = "AB\u010D AB" ;
61+ byte [ ] data = new byte [ 4 ] ;
62+ int count = Encoding . UTF8 . GetBytes ( str , 1 , 3 , data , 0 ) ;
63+ Assert . AreEqual ( 4 , count ) ;
64+ Assert . AreEqual ( "B\u010D A" , new string ( Encoding . UTF8 . GetChars ( data ) ) ) ;
65+
66+ }
67+
68+ private void RoundtripUtf8 ( byte [ ] input , byte [ ] expected , int expectedStringLength )
69+ {
70+ string decoded = Encoding . UTF8 . GetString ( input , 0 , input . Length ) ;
71+ byte [ ] reencoded = Encoding . UTF8 . GetBytes ( decoded ) ;
72+ Assert . AreEqual ( expectedStringLength , decoded . Length ) ;
73+ CollectionAssert . AreEqual ( expected , reencoded , $ "Failed on input: { BitConverter . ToString ( input ) } ") ;
74+ }
75+
76+ [ TestMethod ]
77+ public void Utf8EncodingTests_TestValid2ByteSequence ( )
78+ {
79+ byte [ ] input = new byte [ ] { 0xC2 , 0xA9 } ; // U+00A9 ©
80+ byte [ ] expected = new byte [ ] { 0xC2 , 0xA9 } ;
81+ RoundtripUtf8 ( input , expected , 1 ) ;
82+ }
83+
84+ [ TestMethod ]
85+ public void Utf8EncodingTests_TestValid3ByteSequence ( )
86+ {
87+ byte [ ] input = new byte [ ] { 0xE2 , 0x82 , 0xAC } ; // U+20AC €
88+ byte [ ] expected = new byte [ ] { 0xE2 , 0x82 , 0xAC } ;
89+ RoundtripUtf8 ( input , expected , 1 ) ;
90+ }
91+
92+ [ TestMethod ]
93+ public void Utf8EncodingTests_TestValid4ByteSequence ( )
94+ {
95+ byte [ ] input = new byte [ ] { 0xF0 , 0x9F , 0x98 , 0x80 } ; // U+1F600 😀
96+ byte [ ] expected = new byte [ ] { 0xF0 , 0x9F , 0x98 , 0x80 } ;
97+ RoundtripUtf8 ( input , expected , 2 ) ;
98+ }
99+
100+
101+ [ TestMethod ]
102+ public void Utf8EncodingTests_TestOverlongEncoding1 ( )
103+ {
104+ // Should produce � (U+FFFD) for each bad byte pair
105+ byte [ ] input = new byte [ ] { 0xC0 , 0xAF } ; // Overlong '/'
106+ byte [ ] expected = new byte [ ] { 0xEF , 0xBF , 0xBD , 0xEF , 0xBF , 0xBD } ;
107+ RoundtripUtf8 ( input , expected , 2 ) ;
108+ }
109+
110+ [ TestMethod ]
111+ public void Utf8EncodingTests_TestOverlongEncoding2 ( )
112+ {
113+ byte [ ] input = new byte [ ] { 0xE0 , 0x80 , 0xAF } ; // Overlong '/'
114+ byte [ ] expected = new byte [ ] { 0xEF , 0xBF , 0xBD , 0xEF , 0xBF , 0xBD , 0xEF , 0xBF , 0xBD } ;
115+ RoundtripUtf8 ( input , expected , 3 ) ;
116+ }
117+
118+ [ TestMethod ]
119+ public void Utf8EncodingTests_TestInvalid3ByteLastByteInvalid ( )
120+ {
121+ byte [ ] input = new byte [ ] { 0xE2 , 0x82 , 0xFE } ; // UTF-8 3 bytes, 0xFE is the invalid character
122+ byte [ ] expected = new byte [ ] { 0xEF , 0xBF , 0xBD , 0xEF , 0xBF , 0xBD } ;
123+ RoundtripUtf8 ( input , expected , 2 ) ;
124+ }
125+
126+ [ TestMethod ]
127+ public void Utf8EncodingTests_TestInvalid3ByteMiddleByteInvalid ( )
128+ {
129+ byte [ ] input = new byte [ ] { 0xE2 , 0xFE , 0xAC } ; // UTF-8 3 bytes, 0xFE is the invalid character
130+ byte [ ] expected = new byte [ ] { 0xEF , 0xBF , 0xBD , 0xEF , 0xBF , 0xBD , 0xEF , 0xBF , 0xBD } ;
131+ RoundtripUtf8 ( input , expected , 3 ) ;
132+ }
133+
134+ [ TestMethod ]
135+ public void Utf8EncodingTests_TestInvalid4ByteLastByteInvalid ( )
136+ {
137+ byte [ ] input = new byte [ ] { 0xF0 , 0x9F , 0x98 , 0xFE } ; // UTF-8 4 bytes, 0xFE is the invalid character
138+ byte [ ] expected = new byte [ ] { 0xEF , 0xBF , 0xBD , 0xEF , 0xBF , 0xBD } ;
139+ RoundtripUtf8 ( input , expected , 2 ) ;
140+ }
141+
142+ // 0xF0, 0x9F, 0x98, 0x80
143+ [ TestMethod ]
144+ public void Utf8EncodingTests_TestInvalid4ByteThirdByteInvalid ( )
145+ {
146+ byte [ ] input = new byte [ ] { 0xF0 , 0x9F , 0xFE , 0x80 } ; // UTF-8 4 bytes, 0xFE is the invalid character
147+ byte [ ] expected = new byte [ ] { 0xEF , 0xBF , 0xBD , 0xEF , 0xBF , 0xBD , 0xEF , 0xBF , 0xBD } ;
148+ RoundtripUtf8 ( input , expected , 3 ) ;
149+ }
150+
151+ [ TestMethod ]
152+ public void Utf8EncodingTests_TestInvalid4ByteSecondByteInvalid ( )
153+ {
154+ byte [ ] input = new byte [ ] { 0xF0 , 0xFE , 0x98 , 0x80 } ; // UTF-8 4 bytes, 0xFE is the invalid character
155+ byte [ ] expected = new byte [ ] { 0xEF , 0xBF , 0xBD , 0xEF , 0xBF , 0xBD , 0xEF , 0xBF , 0xBD , 0xEF , 0xBF , 0xBD } ;
156+ RoundtripUtf8 ( input , expected , 4 ) ;
157+ }
158+
159+ [ TestMethod ]
160+ public void Utf8EncodingTests_TestIsolatedContinuationByte ( )
161+ {
162+ byte [ ] input = new byte [ ] { 0x80 } ; // Invalid lone continuation byte
163+ string decoded = Encoding . UTF8 . GetString ( input , 0 , input . Length ) ;
164+ byte [ ] reencoded = Encoding . UTF8 . GetBytes ( decoded ) ;
165+ CollectionAssert . AreEqual ( new byte [ ] { 0xEF , 0xBF , 0xBD } , reencoded ) ;
166+ }
167+
168+ [ TestMethod ]
169+ public void Utf8EncodingTests_TestIllegalCodePositionSurrogate ( )
170+ {
171+ byte [ ] input = new byte [ ] { 0xED , 0xA0 , 0x80 } ; // U+D800 high surrogate
172+ string decoded = Encoding . UTF8 . GetString ( input , 0 , input . Length ) ;
173+ byte [ ] reencoded = Encoding . UTF8 . GetBytes ( decoded ) ;
174+ CollectionAssert . AreEqual ( new byte [ ] { 0xEF , 0xBF , 0xBD , 0xEF , 0xBF , 0xBD , 0xEF , 0xBF , 0xBD } , reencoded ) ;
175+ }
176+
177+ [ TestMethod ]
178+ public void Utf8EncodingTests_TestMaximumValidCodepoint ( )
179+ {
180+ byte [ ] input = new byte [ ] { 0xF4 , 0x8F , 0xBF , 0xBF } ; // U+10FFFF
181+ byte [ ] expected = new byte [ ] { 0xF4 , 0x8F , 0xBF , 0xBF } ;
182+ string decoded = Encoding . UTF8 . GetString ( input , 0 , input . Length ) ;
183+ RoundtripUtf8 ( input , expected , 2 ) ;
184+ }
185+
186+ [ TestMethod ]
187+ public void Utf8EncodingTests_TestCodepointBeyondUPlus10FFFF ( )
188+ {
189+ byte [ ] input = new byte [ ] { 0xF4 , 0x90 , 0x80 , 0x80 } ; // > U+10FFFF
190+ string decoded = Encoding . UTF8 . GetString ( input , 0 , input . Length ) ;
191+ byte [ ] reencoded = Encoding . UTF8 . GetBytes ( decoded ) ;
192+ // All 4 bytes become one or two � depending on how it was handled
193+ CollectionAssert . AreEqual (
194+ new byte [ ] { 0xEF , 0xBF , 0xBD , 0xEF , 0xBF , 0xBD , 0xEF , 0xBF , 0xBD , 0xEF , 0xBF , 0xBD } , reencoded ) ;
195+ }
196+
197+ [ TestMethod ]
198+ public void Utf8EncodingTests_TestUnexpectedContinuationSequence ( )
199+ {
200+ byte [ ] input = new byte [ ] { 0xC2 , 0x41 } ; // Valid 0xC2 (start of 2-byte), but 0x41 (A) not valid continuation
201+ string decoded = Encoding . UTF8 . GetString ( input , 0 , input . Length ) ;
202+ byte [ ] reencoded = Encoding . UTF8 . GetBytes ( decoded ) ;
203+ // Expect: �A
204+ CollectionAssert . AreEqual ( new byte [ ] { 0xEF , 0xBF , 0xBD , 0x41 } , reencoded ) ;
205+ }
206+
207+ [ TestMethod ]
208+ public void Utf8EncodingTests_TestASCII ( )
209+ {
210+ byte [ ] input = new byte [ ] { 0x47 , 0x6F , 0x6F , 0x64 } ; // "Good"
211+ byte [ ] expected = new byte [ ] { 0x47 , 0x6F , 0x6F , 0x64 } ;
212+ RoundtripUtf8 ( input , expected , 4 ) ;
213+ }
214+
215+ [ TestMethod ]
216+ public void Utf8EncodingTests_TestASCIIWithInvalidByte ( )
217+ {
218+ byte [ ] input = new byte [ ] { 0x42 , 0x41 , 0x44 , 0xEF , 0xFF } ; // BAD��
219+ byte [ ] expected = new byte [ ] { 0x42 , 0x41 , 0x44 , 0xEF , 0xBF , 0xBD , 0xEF , 0xBF , 0xBD } ;
220+ RoundtripUtf8 ( input , expected , 5 ) ;
221+ }
222+
223+ [ TestMethod ]
224+ public void Utf8EncodingTests_TestASCIIWithInvalidByteInBetween ( )
225+ {
226+ byte [ ] input = new byte [ ] { 0x42 , 0x41 , 0x44 , 0xEF , 0xFF , 0x42 , 0x41 , 0x44 } ; // BAD��BAD
227+ byte [ ] expected = new byte [ ] { 0x42 , 0x41 , 0x44 , 0xEF , 0xBF , 0xBD , 0xEF , 0xBF , 0xBD , 0x42 , 0x41 , 0x44 } ;
228+ RoundtripUtf8 ( input , expected , 8 ) ;
229+ }
230+
231+ [ TestMethod ]
232+ public void Utf8EncodingTests_TestASCIIWithNull ( )
233+ {
234+ // All characters will be dropped after the null character in nanoFramework
235+ byte [ ] input = new byte [ ] { 0x54 , 0x65 , 0x73 , 0x74 , 0x20 , 0x6E , 0x75 , 0x6C , 0x6C , 0x00 ,
236+ 0x61 , 0x66 , 0x74 , 0x65 , 0x72 , 0x20 , 0x6E , 0x75 , 0x6C , 0x6C } ; // Test null\0after null
237+ byte [ ] expected = new byte [ ] { 0x54 , 0x65 , 0x73 , 0x74 , 0x20 , 0x6E , 0x75 , 0x6C , 0x6C } ;
238+ RoundtripUtf8 ( input , expected , 9 ) ;
239+ }
240+
241+ [ TestMethod ]
242+ public void Utf8EncodingTests_TestStringWithEmoji ( )
243+ {
244+ string strInput = "nanoFramework is fantastic 🚀" ;
245+ var input = Encoding . UTF8 . GetBytes ( strInput ) ;
246+ byte [ ] expected = new byte [ ] { 0x6E , 0x61 , 0x6E , 0x6F , 0x46 , 0x72 , 0x61 , 0x6D , 0x65 , 0x77 , 0x6F , 0x72 , 0x6B , 0x20 , 0x69 ,
247+ 0x73 , 0x20 , 0x66 , 0x61 , 0x6E , 0x74 , 0x61 , 0x73 , 0x74 , 0x69 , 0x63 , 0x20 , 0xF0 , 0x9F , 0x9A , 0x80 } ;
248+ RoundtripUtf8 ( input , expected , 29 ) ;
249+ }
250+
251+ [ TestMethod ]
252+ public void Utf8EncodingTests_TestFullASCIIRange ( )
253+ {
254+ // Full ASCII Range except the null character
255+ byte [ ] input = new byte [ ]
256+ {
257+ 0x01 , 0x02 , 0x03 , 0x04 , 0x05 , 0x06 , 0x07 ,
258+ 0x08 , 0x09 , 0x0A , 0x0B , 0x0C , 0x0D , 0x0E , 0x0F ,
259+ 0x10 , 0x11 , 0x12 , 0x13 , 0x14 , 0x15 , 0x16 , 0x17 ,
260+ 0x18 , 0x19 , 0x1A , 0x1B , 0x1C , 0x1D , 0x1E , 0x1F ,
261+ 0x20 , 0x21 , 0x22 , 0x23 , 0x24 , 0x25 , 0x26 , 0x27 ,
262+ 0x28 , 0x29 , 0x2A , 0x2B , 0x2C , 0x2D , 0x2E , 0x2F ,
263+ 0x30 , 0x31 , 0x32 , 0x33 , 0x34 , 0x35 , 0x36 , 0x37 ,
264+ 0x38 , 0x39 , 0x3A , 0x3B , 0x3C , 0x3D , 0x3E , 0x3F ,
265+ 0x40 , 0x41 , 0x42 , 0x43 , 0x44 , 0x45 , 0x46 , 0x47 ,
266+ 0x48 , 0x49 , 0x4A , 0x4B , 0x4C , 0x4D , 0x4E , 0x4F ,
267+ 0x50 , 0x51 , 0x52 , 0x53 , 0x54 , 0x55 , 0x56 , 0x57 ,
268+ 0x58 , 0x59 , 0x5A , 0x5B , 0x5C , 0x5D , 0x5E , 0x5F ,
269+ 0x60 , 0x61 , 0x62 , 0x63 , 0x64 , 0x65 , 0x66 , 0x67 ,
270+ 0x68 , 0x69 , 0x6A , 0x6B , 0x6C , 0x6D , 0x6E , 0x6F ,
271+ 0x70 , 0x71 , 0x72 , 0x73 , 0x74 , 0x75 , 0x76 , 0x77 ,
272+ 0x78 , 0x79 , 0x7A , 0x7B , 0x7C , 0x7D , 0x7E , 0x7F
273+ } ;
274+ string decoded = Encoding . UTF8 . GetString ( input , 0 , input . Length ) ;
275+ byte [ ] reencoded = Encoding . UTF8 . GetBytes ( decoded ) ;
276+ RoundtripUtf8 ( reencoded , input , 127 ) ;
277+ }
278+ }
279+ }
0 commit comments