@@ -49,21 +49,103 @@ TEST(usascii_to_utf16)
49
49
}
50
50
}
51
51
52
+ #ifdef _WIN32
53
+ #define UTF16 (x ) L ## x
54
+ #else
55
+ #define UTF16 (x ) u ## x
56
+ #endif
57
+
58
+ TEST (utf16_to_utf8)
59
+ {
60
+ #if !defined(__GLIBCXX__)
61
+ std::wstring_convert<std::codecvt_utf8_utf16<utf16char>, utf16char> conversion;
62
+ #endif
63
+
64
+ // encodes to single byte character
65
+ VERIFY_ARE_EQUAL (" ABC987" , utility::conversions::utf16_to_utf8 (UTF16 (" ABC987" )));
66
+
67
+ // encodes to 2 byte character
68
+ utf16string input;
69
+ input.push_back (0x80 );
70
+ input.push_back (0x14D );
71
+ input.push_back (0x7FF );
72
+ auto result = utility::conversions::utf16_to_utf8 (input);
73
+ #if defined(__GLIBCXX__)
74
+ VERIFY_ARE_EQUAL (-62 , result[0 ]);
75
+ VERIFY_ARE_EQUAL (-128 , result[1 ]);
76
+ VERIFY_ARE_EQUAL (-59 , result[2 ]);
77
+ VERIFY_ARE_EQUAL (-115 , result[3 ]);
78
+ VERIFY_ARE_EQUAL (-33 , result[4 ]);
79
+ VERIFY_ARE_EQUAL (-65 , result[5 ]);
80
+ #else
81
+ VERIFY_ARE_EQUAL (conversion.to_bytes (input), result);
82
+ #endif
83
+
84
+ // encodes to 3 byte character
85
+ input.clear ();
86
+ input.push_back (0x800 );
87
+ input.push_back (0x14AB );
88
+ input.push_back (0xFFFF );
89
+ result = utility::conversions::utf16_to_utf8 (input);
90
+ #if defined(__GLIBCXX__)
91
+ VERIFY_ARE_EQUAL (-32 , result[0 ]);
92
+ VERIFY_ARE_EQUAL (-96 , result[1 ]);
93
+ VERIFY_ARE_EQUAL (-128 , result[2 ]);
94
+ VERIFY_ARE_EQUAL (-31 , result[3 ]);
95
+ VERIFY_ARE_EQUAL (-110 , result[4 ]);
96
+ VERIFY_ARE_EQUAL (-85 , result[5 ]);
97
+ VERIFY_ARE_EQUAL (-17 , result[6 ]);
98
+ VERIFY_ARE_EQUAL (-65 , result[7 ]);
99
+ VERIFY_ARE_EQUAL (-65 , result[8 ]);
100
+ #else
101
+ VERIFY_ARE_EQUAL (conversion.to_bytes (input), result);
102
+ #endif
103
+
104
+ // surrogate pair - encodes to 4 byte character
105
+ input.clear ();
106
+ // U+10000
107
+ input.push_back (0xD800 );
108
+ input.push_back (0xDC00 );
109
+ // U+12345
110
+ input.push_back (0xD802 );
111
+ input.push_back (0xDD29 );
112
+ // U+10FFFF
113
+ input.push_back (0xDA3F );
114
+ input.push_back (0xDFFF );
115
+ result = utility::conversions::utf16_to_utf8 (input);
116
+ #if defined(__GLIBCXX__)
117
+ VERIFY_ARE_EQUAL (-16 , result[0 ]);
118
+ VERIFY_ARE_EQUAL (-112 , result[1 ]);
119
+ VERIFY_ARE_EQUAL (-128 , result[2 ]);
120
+ VERIFY_ARE_EQUAL (-128 , result[3 ]);
121
+ VERIFY_ARE_EQUAL (-16 , result[4 ]);
122
+ VERIFY_ARE_EQUAL (-112 , result[5 ]);
123
+ VERIFY_ARE_EQUAL (-92 , result[6 ]);
124
+ VERIFY_ARE_EQUAL (-87 , result[7 ]);
125
+ VERIFY_ARE_EQUAL (-14 , result[8 ]);
126
+ VERIFY_ARE_EQUAL (-97 , result[9 ]);
127
+ VERIFY_ARE_EQUAL (-65 , result[10 ]);
128
+ VERIFY_ARE_EQUAL (-65 , result[11 ]);
129
+ #else
130
+ VERIFY_ARE_EQUAL (conversion.to_bytes (input), result);
131
+ #endif
132
+ }
133
+
52
134
TEST (utf8_to_utf16)
53
135
{
54
136
#if !defined(__GLIBCXX__)
55
137
std::wstring_convert<std::codecvt_utf8_utf16<utf16char>, utf16char> conversion;
56
138
#endif
57
139
58
140
// single byte character
59
- VERIFY_ARE_EQUAL (_XPLATSTR (" ABC123" ), utility::conversions::utf8_to_utf16 (" ABC123" ));
141
+ VERIFY_ARE_EQUAL (UTF16 (" ABC123" ), utility::conversions::utf8_to_utf16 (" ABC123" ));
60
142
61
143
// 2 byte character
62
144
std::string input;
63
- input.push_back (unsigned char ( 207 ) ); // 11001111
64
- input.push_back (unsigned char ( 129 ) ); // 10000001
65
- input.push_back (unsigned char ( 198 ) ); // 11000110
66
- input.push_back (unsigned char ( 141 ) ); // 10001101
145
+ input.push_back (207u ); // 11001111
146
+ input.push_back (129u ); // 10000001
147
+ input.push_back (198u ); // 11000110
148
+ input.push_back (141u ); // 10001101
67
149
auto result = utility::conversions::utf8_to_utf16 (input);
68
150
#if defined(__GLIBCXX__)
69
151
VERIFY_ARE_EQUAL (961 , result[0 ]);
@@ -74,12 +156,12 @@ TEST(utf8_to_utf16)
74
156
75
157
// 3 byte character
76
158
input.clear ();
77
- input.push_back (unsigned char ( 230 ) ); // 11100110
78
- input.push_back (unsigned char ( 141 ) ); // 10001101
79
- input.push_back (unsigned char ( 157 ) ); // 10011101
80
- input.push_back (unsigned char ( 231 ) ); // 11100111
81
- input.push_back (unsigned char ( 143 ) ); // 10001111
82
- input.push_back (unsigned char ( 156 ) ); // 10011100
159
+ input.push_back (230u ); // 11100110
160
+ input.push_back (141u ); // 10001101
161
+ input.push_back (157u ); // 10011101
162
+ input.push_back (231u ); // 11100111
163
+ input.push_back (143u ); // 10001111
164
+ input.push_back (156u ); // 10011100
83
165
result = utility::conversions::utf8_to_utf16 (input);
84
166
#if defined(__GLIBCXX__)
85
167
VERIFY_ARE_EQUAL (25437 , result[0 ]);
@@ -90,14 +172,14 @@ TEST(utf8_to_utf16)
90
172
91
173
// 4 byte character
92
174
input.clear ();
93
- input.push_back (unsigned char ( 240 ) ); // 11110000
94
- input.push_back (unsigned char ( 173 ) ); // 10101101
95
- input.push_back (unsigned char ( 157 ) ); // 10011101
96
- input.push_back (unsigned char ( 143 ) ); // 10001111
97
- input.push_back (unsigned char ( 240 ) ); // 11111000
98
- input.push_back (unsigned char ( 161 ) ); // 10100001
99
- input.push_back (unsigned char ( 191 ) ); // 10111111
100
- input.push_back (unsigned char ( 191 ) ); // 10111111
175
+ input.push_back (240u ); // 11110000
176
+ input.push_back (173u ); // 10101101
177
+ input.push_back (157u ); // 10011101
178
+ input.push_back (143u ); // 10001111
179
+ input.push_back (240u ); // 11111000
180
+ input.push_back (161u ); // 10100001
181
+ input.push_back (191u ); // 10111111
182
+ input.push_back (191u ); // 10111111
101
183
result = utility::conversions::utf8_to_utf16 (input);
102
184
#if defined(__GLIBCXX__)
103
185
VERIFY_ARE_EQUAL (55413 , result[0 ]);
@@ -113,20 +195,20 @@ TEST(utf8_to_utf16_errors)
113
195
{
114
196
// missing second continuation byte
115
197
std::string input;
116
- input.push_back (unsigned char ( 207 ) ); // 11001111
198
+ input.push_back (207u ); // 11001111
117
199
VERIFY_THROWS (utility::conversions::utf8_to_utf16 (input), std::invalid_argument);
118
200
119
201
// missing third continuation byte
120
202
input.clear ();
121
- input.push_back (unsigned char ( 230 ) ); // 11100110
122
- input.push_back (unsigned char ( 141 ) ); // 10001101
203
+ input.push_back (230u ); // 11100110
204
+ input.push_back (141u ); // 10001101
123
205
VERIFY_THROWS (utility::conversions::utf8_to_utf16 (input), std::invalid_argument);
124
206
125
207
// missing fourth continuation byte
126
208
input.clear ();
127
- input.push_back (unsigned char ( 240 ) ); // 11110000
128
- input.push_back (unsigned char ( 173 ) ); // 10101101
129
- input.push_back (unsigned char ( 157 ) ); // 10011101
209
+ input.push_back (240u ); // 11110000
210
+ input.push_back (173u ); // 10101101
211
+ input.push_back (157u ); // 10011101
130
212
VERIFY_THROWS (utility::conversions::utf8_to_utf16 (input), std::invalid_argument);
131
213
}
132
214
0 commit comments