@@ -16,20 +16,22 @@ TEST(LlvmLibcCharacterConverterUTF32To8Test, OneByte) {
1616 LIBC_NAMESPACE::internal::mbstate state;
1717 LIBC_NAMESPACE::internal::CharacterConverter cr (&state);
1818
19- char32_t utf32_A = 0x41 ;
19+ // utf8 1-byte encodings are identical to their utf32 representations
20+ char32_t utf32_A = 0x41 ; // 'A'
2021 cr.push (utf32_A);
2122 auto popped = cr.pop_utf8 ();
2223 ASSERT_EQ (popped.error , 0 );
2324 ASSERT_EQ (static_cast <char >(popped.out ), ' A' );
2425 ASSERT_TRUE (cr.isComplete ());
2526
26- char32_t utf32_B = 0x42 ;
27+ char32_t utf32_B = 0x42 ; // 'B'
2728 cr.push (utf32_B);
2829 popped = cr.pop_utf8 ();
2930 ASSERT_EQ (popped.error , 0 );
3031 ASSERT_EQ (static_cast <char >(popped.out ), ' B' );
3132 ASSERT_TRUE (cr.isComplete ());
3233
34+ // should error if we try to pop another utf8 byte out
3335 popped = cr.pop_utf8 ();
3436 ASSERT_NE (popped.error , 0 );
3537}
@@ -38,6 +40,7 @@ TEST(LlvmLibcCharacterConverterUTF32To8Test, TwoByte) {
3840 LIBC_NAMESPACE::internal::mbstate state;
3941 LIBC_NAMESPACE::internal::CharacterConverter cr (&state);
4042
43+ // testing utf32: 0xff -> utf8: 0xc3 0xbf
4144 char32_t utf32 = 0xff ;
4245 cr.push (utf32);
4346 auto popped = cr.pop_utf8 ();
@@ -49,6 +52,7 @@ TEST(LlvmLibcCharacterConverterUTF32To8Test, TwoByte) {
4952 ASSERT_EQ (static_cast <int >(popped.out ), 0xbf );
5053 ASSERT_TRUE (cr.isComplete ());
5154
55+ // testing utf32: 0x58e -> utf8: 0xd6 0x8e
5256 utf32 = 0x58e ;
5357 cr.push (utf32);
5458 popped = cr.pop_utf8 ();
@@ -60,6 +64,7 @@ TEST(LlvmLibcCharacterConverterUTF32To8Test, TwoByte) {
6064 ASSERT_EQ (static_cast <int >(popped.out ), 0x8e );
6165 ASSERT_TRUE (cr.isComplete ());
6266
67+ // should error if we try to pop another utf8 byte out
6368 popped = cr.pop_utf8 ();
6469 ASSERT_NE (popped.error , 0 );
6570}
@@ -68,6 +73,7 @@ TEST(LlvmLibcCharacterConverterUTF32To8Test, ThreeByte) {
6873 LIBC_NAMESPACE::internal::mbstate state;
6974 LIBC_NAMESPACE::internal::CharacterConverter cr (&state);
7075
76+ // testing utf32: 0xac15 -> utf8: 0xea 0xb0 0x95
7177 char32_t utf32 = 0xac15 ;
7278 cr.push (utf32);
7379 auto popped = cr.pop_utf8 ();
@@ -83,6 +89,7 @@ TEST(LlvmLibcCharacterConverterUTF32To8Test, ThreeByte) {
8389 ASSERT_EQ (static_cast <int >(popped.out ), 0x95 );
8490 ASSERT_TRUE (cr.isComplete ());
8591
92+ // testing utf32: 0x267b -> utf8: 0xe2 0x99 0xbb
8693 utf32 = 0x267b ;
8794 cr.push (utf32);
8895 popped = cr.pop_utf8 ();
@@ -98,6 +105,7 @@ TEST(LlvmLibcCharacterConverterUTF32To8Test, ThreeByte) {
98105 ASSERT_EQ (static_cast <int >(popped.out ), 0xbb );
99106 ASSERT_TRUE (cr.isComplete ());
100107
108+ // should error if we try to pop another utf8 byte out
101109 popped = cr.pop_utf8 ();
102110 ASSERT_NE (popped.error , 0 );
103111}
@@ -106,36 +114,47 @@ TEST(LlvmLibcCharacterConverterUTF32To8Test, FourByte) {
106114 LIBC_NAMESPACE::internal::mbstate state;
107115 LIBC_NAMESPACE::internal::CharacterConverter cr (&state);
108116
109- char32_t utf32 = 0xac15 ;
117+ // testing utf32: 0x1f921 -> utf8: 0xf0 0x9f 0xa4 0xa1
118+ char32_t utf32 = 0x1f921 ;
110119 cr.push (utf32);
111120 auto popped = cr.pop_utf8 ();
112121 ASSERT_EQ (popped.error , 0 );
113- ASSERT_EQ (static_cast <int >(popped.out ), 0xea );
122+ ASSERT_EQ (static_cast <int >(popped.out ), 0xf0 );
114123 ASSERT_TRUE (!cr.isComplete ());
115124 popped = cr.pop_utf8 ();
116125 ASSERT_EQ (popped.error , 0 );
117- ASSERT_EQ (static_cast <int >(popped.out ), 0xb0 );
126+ ASSERT_EQ (static_cast <int >(popped.out ), 0x9f );
118127 ASSERT_TRUE (!cr.isComplete ());
119128 popped = cr.pop_utf8 ();
120129 ASSERT_EQ (popped.error , 0 );
121- ASSERT_EQ (static_cast <int >(popped.out ), 0x95 );
130+ ASSERT_EQ (static_cast <int >(popped.out ), 0xa4 );
131+ ASSERT_TRUE (!cr.isComplete ());
132+ popped = cr.pop_utf8 ();
133+ ASSERT_EQ (popped.error , 0 );
134+ ASSERT_EQ (static_cast <int >(popped.out ), 0xa1 );
122135 ASSERT_TRUE (cr.isComplete ());
123136
124- utf32 = 0x267b ;
137+ // testing utf32: 0x12121 -> utf8: 0xf0 0x92 0x84 0xa1
138+ utf32 = 0x12121 ;
125139 cr.push (utf32);
126140 popped = cr.pop_utf8 ();
127141 ASSERT_EQ (popped.error , 0 );
128- ASSERT_EQ (static_cast <int >(popped.out ), 0xe2 );
142+ ASSERT_EQ (static_cast <int >(popped.out ), 0xf0 );
129143 ASSERT_TRUE (!cr.isComplete ());
130144 popped = cr.pop_utf8 ();
131145 ASSERT_EQ (popped.error , 0 );
132- ASSERT_EQ (static_cast <int >(popped.out ), 0x99 );
146+ ASSERT_EQ (static_cast <int >(popped.out ), 0x92 );
133147 ASSERT_TRUE (!cr.isComplete ());
134148 popped = cr.pop_utf8 ();
135149 ASSERT_EQ (popped.error , 0 );
136- ASSERT_EQ (static_cast <int >(popped.out ), 0xbb );
150+ ASSERT_EQ (static_cast <int >(popped.out ), 0x84 );
151+ ASSERT_TRUE (!cr.isComplete ());
152+ popped = cr.pop_utf8 ();
153+ ASSERT_EQ (popped.error , 0 );
154+ ASSERT_EQ (static_cast <int >(popped.out ), 0xa1 );
137155 ASSERT_TRUE (cr.isComplete ());
138156
157+ // should error if we try to pop another utf8 byte out
139158 popped = cr.pop_utf8 ();
140159 ASSERT_NE (popped.error , 0 );
141160}
0 commit comments