Skip to content

Commit 36d7286

Browse files
committed
Starting UTF16 to UTF8 conversion.
1 parent ecae840 commit 36d7286

File tree

1 file changed

+39
-6
lines changed

1 file changed

+39
-6
lines changed

Release/src/utilities/asyncrt_utils.cpp

Lines changed: 39 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,8 @@ const std::error_category & __cdecl linux_category()
252252

253253
}
254254

255+
#define UTF8_ // TODO
256+
255257
utf16string __cdecl conversions::utf8_to_utf16(const std::string &s)
256258
{
257259
#if defined(CPPREST_STDLIB_UNICODE_CONVERSIONS)
@@ -268,25 +270,25 @@ utf16string __cdecl conversions::utf8_to_utf16(const std::string &s)
268270
const auto leadingBits = 0x3F;
269271
while (srcRemainingSize > 0)
270272
{
271-
if (*src < 0x80) // single byte character, 0x0 to 0x7F
273+
if (*src < 0x7F) // single byte character, 0x0 to 0x7F
272274
{
273275
dest.push_back(utf16string::value_type(*src));
274276
}
275277
else
276278
{
277279
unsigned char numContBytes = 0;
278280
int32_t codePoint;
279-
if (*src < 0xE0) // 2 byte character, 0x80 to 0x7FF
281+
if (*src <= 0xDF) // 2 byte character, 0x80 to 0x7FF
280282
{
281283
codePoint = *src & 0x1F;
282284
numContBytes = 1;
283285
}
284-
else if (*src < 0xF0) // 3 byte character, 0x800 to 0xFFFF
286+
else if (*src <= 0xEF) // 3 byte character, 0x800 to 0xFFFF
285287
{
286288
codePoint = *src & 0xF;
287289
numContBytes = 2;
288290
}
289-
else if (*src < 0xF8) // 4 byte character, 0x10000 to 0x10FFFF
291+
else if (*src <= 0xF7) // 4 byte character, 0x10000 to 0x10FFFF
290292
{
291293
codePoint = *src & 0x7;
292294
numContBytes = 3;
@@ -340,9 +342,40 @@ std::string __cdecl conversions::utf16_to_utf8(const utf16string &w)
340342
return conversion.to_bytes(w);
341343
#else
342344
std::string dest;
343-
dest.reserve(w.size());
345+
dest.reserve(w.size()); // TODO size
344346

345-
347+
const utf16string::value_type *src = w.c_str();
348+
auto srcRemainingSize = w.size();
349+
while (srcRemainingSize > 0)
350+
{
351+
if (*src >= 0xD800 && *src <= 0xDBFF)
352+
{
353+
// Found a high surrogate.
354+
// TODO in the future check to make sure ....
355+
356+
357+
}
358+
else if (*src <= 0xFFFF)
359+
{
360+
if (*src < 0x7F) // single byte character
361+
{
362+
dest.push_back(static_cast<char>(*src));
363+
}
364+
else if (*src <= 0x7FF) // 2 bytes needed
365+
{
366+
dest.push_back((*src >> 3) | 0xC0);
367+
dest.push_back((*src << 5) | )
368+
}
369+
else // 3 bytes needed
370+
{
371+
372+
}
373+
374+
}
375+
376+
--srcRemainingSize;
377+
++src;
378+
}
346379

347380
return dest;
348381
#endif

0 commit comments

Comments
 (0)