Skip to content

Commit f0560ce

Browse files
committed
String: Fix mangled decoding of UTF-16 strings containing surrogate pairs in createStringFromData()
1 parent e3f3d32 commit f0560ce

File tree

1 file changed

+54
-14
lines changed

1 file changed

+54
-14
lines changed

modules/juce_core/text/juce_String.cpp

Lines changed: 54 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1994,24 +1994,25 @@ String String::createStringFromData (const void* const unknownData, int size)
19941994
if (size == 1)
19951995
return charToString ((juce_wchar) data[0]);
19961996

1997-
if (CharPointer_UTF16::isByteOrderMarkBigEndian (data)
1998-
|| CharPointer_UTF16::isByteOrderMarkLittleEndian (data))
1999-
{
2000-
const int numChars = size / 2 - 1;
1997+
const auto bigEndianData = CharPointer_UTF16::isByteOrderMarkBigEndian (data);
20011998

2002-
StringCreationHelper builder ((size_t) numChars);
1999+
if (bigEndianData || CharPointer_UTF16::isByteOrderMarkLittleEndian (data))
2000+
{
2001+
const auto numUnits = size / 2 - 1;
2002+
const auto src = unalignedPointerCast<const uint16*> (data + 2);
2003+
const auto swapBytes = bigEndianData ? ByteOrder::swapIfLittleEndian<uint16>
2004+
: ByteOrder::swapIfBigEndian<uint16>;
20032005

2004-
auto src = unalignedPointerCast<const uint16*> (data + 2);
2006+
StringCreationHelper builder ((size_t) numUnits);
20052007

2006-
if (CharPointer_UTF16::isByteOrderMarkBigEndian (data))
2007-
{
2008-
for (int i = 0; i < numChars; ++i)
2009-
builder.write ((juce_wchar) ByteOrder::swapIfLittleEndian (src[i]));
2010-
}
2011-
else
2008+
for (int i = 0; i < numUnits;)
20122009
{
2013-
for (int i = 0; i < numChars; ++i)
2014-
builder.write ((juce_wchar) ByteOrder::swapIfBigEndian (src[i]));
2010+
const uint16 wideBuffer[] { swapBytes (src[i]),
2011+
swapBytes ((i + 1 == numUnits) ? (uint16) 0 : src[i + 1]) };
2012+
const CharPointer_UTF16 ptr { reinterpret_cast<const CharPointer_UTF16::CharType*> (wideBuffer) };
2013+
2014+
builder.write (*ptr);
2015+
i += (int) ((ptr + 1).getAddress() - ptr.getAddress());
20152016
}
20162017

20172018
builder.write (0);
@@ -3011,6 +3012,45 @@ class StringTests final : public UnitTest
30113012
for (auto c : str)
30123013
expectEquals (c, parts[index++]);
30133014
}
3015+
3016+
const CharPointer_UTF8 expectedString { "glass \xc2\xbd full" };
3017+
const CharPointer_UTF8 emojiExpectedString { "hello JUCE \xf0\x9f\xa7\x83" };
3018+
3019+
beginTest ("createStringFromData reads LE UTF-16");
3020+
{
3021+
constexpr char buffer[] = "\xff\xfe\x67\x00\x6c\x00\x61\x00\x73\x00\x73\x00\x20\x00\xbd\x00\x20\x00\x66\x00\x75\x00\x6c\x00\x6c\x00";
3022+
expect (expectedString == String::createStringFromData (buffer, sizeof (buffer)));
3023+
3024+
constexpr char emojiBuffer[] = "\xff\xfe\x68\x00\x65\x00\x6c\x00\x6c\x00\x6f\x00\x20\x00\x4a\x00\x55\x00\x43\x00\x45\x00\x20\x00\x3e\xd8\xc3\xdd";
3025+
const auto emojiActualString = String::createStringFromData (emojiBuffer, sizeof (emojiBuffer));
3026+
expect (emojiExpectedString == emojiActualString);
3027+
}
3028+
3029+
beginTest ("createStringFromData reads BE UTF-16");
3030+
{
3031+
constexpr char buffer[] = "\xfe\xff\x00\x67\x00\x6c\x00\x61\x00\x73\x00\x73\x00\x20\x00\xbd\x00\x20\x00\x66\x00\x75\x00\x6c\x00\x6c";
3032+
expect (expectedString == String::createStringFromData (buffer, sizeof (buffer)));
3033+
3034+
constexpr char emojiBuffer[] = "\xfe\xff\x00\x68\x00\x65\x00\x6c\x00\x6c\x00\x6f\x00\x20\x00\x4a\x00\x55\x00\x43\x00\x45\x00\x20\xd8\x3e\xdd\xc3";
3035+
const auto emojiActualString = String::createStringFromData (emojiBuffer, sizeof (emojiBuffer));
3036+
expect (emojiExpectedString == emojiActualString);
3037+
}
3038+
3039+
beginTest ("createStringFromData reads UTF-8");
3040+
{
3041+
constexpr char buffer[] = "glass \xc2\xbd full";
3042+
expect (expectedString == String::createStringFromData (buffer, sizeof (buffer)));
3043+
3044+
constexpr char emojiBuffer[] = "hello JUCE \xf0\x9f\xa7\x83";
3045+
const auto emojiActualString = String::createStringFromData (emojiBuffer, sizeof (emojiBuffer));
3046+
expect (emojiExpectedString == emojiActualString);
3047+
}
3048+
3049+
beginTest ("createStringFromData reads Windows 1252");
3050+
{
3051+
constexpr char buffer[] = "glass \xBD full";
3052+
expect (expectedString == String::createStringFromData (buffer, sizeof (buffer)));
3053+
}
30143054
}
30153055
};
30163056

0 commit comments

Comments
 (0)