@@ -245,6 +245,63 @@ TEST(LlvmLibcStringConverterTest, UTF8To32ErrorHandling) {
245
245
ASSERT_EQ (static_cast <int >(sc.getSourceIndex ()), 4 );
246
246
}
247
247
248
+ TEST (LlvmLibcStringConverterTest, InvalidCharacterOutsideBounds) {
249
+ // if an invalid character exists in the source string but we don't have space
250
+ // to write it, we should return a "stop converting" error rather than an
251
+ // invalid character error
252
+
253
+ // first 4 bytes are clown emoji (🤡)
254
+ // next 3 form an invalid character
255
+ const char *src1 = " \xF0\x9F\xA4\xA1\x90\x88\x30 " ;
256
+ LIBC_NAMESPACE::internal::mbstate ps1;
257
+ LIBC_NAMESPACE::internal::StringConverter<char8_t > sc1 (
258
+ reinterpret_cast <const char8_t *>(src1), &ps1, 1 );
259
+
260
+ auto res1 = sc1.popUTF32 ();
261
+ ASSERT_TRUE (res1.has_value ());
262
+ ASSERT_EQ (static_cast <int >(res1.value ()), 0x1f921 );
263
+ ASSERT_EQ (static_cast <int >(sc1.getSourceIndex ()), 4 );
264
+
265
+ res1 = sc1.popUTF32 ();
266
+ ASSERT_FALSE (res1.has_value ());
267
+ // no space to write error NOT invalid character error (EILSEQ)
268
+ ASSERT_EQ (static_cast <int >(res1.error ()), -1 );
269
+ ASSERT_EQ (static_cast <int >(sc1.getSourceIndex ()), 4 );
270
+
271
+ const wchar_t src2[] = {
272
+ static_cast <wchar_t >(0x1f921 ), static_cast <wchar_t >(0xffffff ),
273
+ static_cast <wchar_t >(0x0 )}; // clown emoji, invalid utf32
274
+ LIBC_NAMESPACE::internal::mbstate ps2;
275
+ LIBC_NAMESPACE::internal::StringConverter<char32_t > sc2 (
276
+ reinterpret_cast <const char32_t *>(src2), &ps2, 4 );
277
+
278
+ auto res2 = sc2.popUTF8 ();
279
+ ASSERT_TRUE (res2.has_value ());
280
+ ASSERT_EQ (static_cast <int >(res2.value ()), 0xF0 );
281
+ ASSERT_EQ (static_cast <int >(sc2.getSourceIndex ()), 1 );
282
+
283
+ res2 = sc2.popUTF8 ();
284
+ ASSERT_TRUE (res2.has_value ());
285
+ ASSERT_EQ (static_cast <int >(res2.value ()), 0x9F );
286
+ ASSERT_EQ (static_cast <int >(sc2.getSourceIndex ()), 1 );
287
+
288
+ res2 = sc2.popUTF8 ();
289
+ ASSERT_TRUE (res2.has_value ());
290
+ ASSERT_EQ (static_cast <int >(res2.value ()), 0xA4 );
291
+ ASSERT_EQ (static_cast <int >(sc2.getSourceIndex ()), 1 );
292
+
293
+ res2 = sc2.popUTF8 ();
294
+ ASSERT_TRUE (res2.has_value ());
295
+ ASSERT_EQ (static_cast <int >(res2.value ()), 0xA1 );
296
+ ASSERT_EQ (static_cast <int >(sc2.getSourceIndex ()), 1 );
297
+
298
+ res2 = sc2.popUTF8 ();
299
+ ASSERT_FALSE (res2.has_value ());
300
+ // no space to write error NOT invalid character error (EILSEQ)
301
+ ASSERT_EQ (static_cast <int >(res2.error ()), -1 );
302
+ ASSERT_EQ (static_cast <int >(sc2.getSourceIndex ()), 1 );
303
+ }
304
+
248
305
TEST (LlvmLibcStringConverterTest, MultipleStringConverters32To8) {
249
306
/*
250
307
We do NOT test partially popping a character and expecting the next
0 commit comments