@@ -252,6 +252,8 @@ const std::error_category & __cdecl linux_category()
252
252
253
253
}
254
254
255
+ #define UTF8_ // TODO
256
+
255
257
utf16string __cdecl conversions::utf8_to_utf16 (const std::string &s)
256
258
{
257
259
#if defined(CPPREST_STDLIB_UNICODE_CONVERSIONS)
@@ -268,25 +270,25 @@ utf16string __cdecl conversions::utf8_to_utf16(const std::string &s)
268
270
const auto leadingBits = 0x3F ;
269
271
while (srcRemainingSize > 0 )
270
272
{
271
- if (*src < 0x80 ) // single byte character, 0x0 to 0x7F
273
+ if (*src < 0x7F ) // single byte character, 0x0 to 0x7F
272
274
{
273
275
dest.push_back (utf16string::value_type (*src));
274
276
}
275
277
else
276
278
{
277
279
unsigned char numContBytes = 0 ;
278
280
int32_t codePoint;
279
- if (*src < 0xE0 ) // 2 byte character, 0x80 to 0x7FF
281
+ if (*src <= 0xDF ) // 2 byte character, 0x80 to 0x7FF
280
282
{
281
283
codePoint = *src & 0x1F ;
282
284
numContBytes = 1 ;
283
285
}
284
- else if (*src < 0xF0 ) // 3 byte character, 0x800 to 0xFFFF
286
+ else if (*src <= 0xEF ) // 3 byte character, 0x800 to 0xFFFF
285
287
{
286
288
codePoint = *src & 0xF ;
287
289
numContBytes = 2 ;
288
290
}
289
- else if (*src < 0xF8 ) // 4 byte character, 0x10000 to 0x10FFFF
291
+ else if (*src <= 0xF7 ) // 4 byte character, 0x10000 to 0x10FFFF
290
292
{
291
293
codePoint = *src & 0x7 ;
292
294
numContBytes = 3 ;
@@ -340,9 +342,40 @@ std::string __cdecl conversions::utf16_to_utf8(const utf16string &w)
340
342
return conversion.to_bytes (w);
341
343
#else
342
344
std::string dest;
343
- dest.reserve (w.size ());
345
+ dest.reserve (w.size ()); // TODO size
344
346
345
-
347
+ const utf16string::value_type *src = w.c_str ();
348
+ auto srcRemainingSize = w.size ();
349
+ while (srcRemainingSize > 0 )
350
+ {
351
+ if (*src >= 0xD800 && *src <= 0xDBFF )
352
+ {
353
+ // Found a high surrogate.
354
+ // TODO in the future check to make sure ....
355
+
356
+
357
+ }
358
+ else if (*src <= 0xFFFF )
359
+ {
360
+ if (*src < 0x7F ) // single byte character
361
+ {
362
+ dest.push_back (static_cast <char >(*src));
363
+ }
364
+ else if (*src <= 0x7FF ) // 2 bytes needed
365
+ {
366
+ dest.push_back ((*src >> 3 ) | 0xC0 );
367
+ dest.push_back ((*src << 5 ) | )
368
+ }
369
+ else // 3 bytes needed
370
+ {
371
+
372
+ }
373
+
374
+ }
375
+
376
+ --srcRemainingSize;
377
+ ++src;
378
+ }
346
379
347
380
return dest;
348
381
#endif
0 commit comments