25
25
26
26
#include " stdafx.h"
27
27
28
+ #if !defined(__GLIBCXX__)
29
+ #include < codecvt>
30
+ #endif
31
+
28
32
#include < locale_guard.h>
29
33
30
34
using namespace utility ;
@@ -44,17 +48,86 @@ TEST(usascii_to_utf16)
44
48
VERIFY_ARE_EQUAL ((utf16char)str_ascii[i], str_utf16[i]);
45
49
}
46
50
}
47
-
48
- TEST (default_to_utf16 )
51
+
52
+ TEST (utf8_to_utf16 )
49
53
{
50
- // TODO: find some string that actually uses something unique to the default code page.
51
- std::string str_default (" This is a test" );
52
- utf16string str_utf16 = utility::conversions::usascii_to_utf16 (str_default);
53
-
54
- for (size_t i = 0 ; i < str_default.size (); ++i)
55
- {
56
- VERIFY_ARE_EQUAL ((utf16char)str_default[i], str_utf16[i]);
57
- }
54
+ #if !defined(__GLIBCXX__)
55
+ std::wstring_convert<std::codecvt_utf8_utf16<utf16char>, utf16char> conversion;
56
+ #endif
57
+
58
+ // single byte character
59
+ VERIFY_ARE_EQUAL (_XPLATSTR (" ABC123" ), utility::conversions::utf8_to_utf16 (" ABC123" ));
60
+
61
+ // 2 byte character
62
+ std::string input;
63
+ input.push_back (unsigned char (207 )); // 11001111
64
+ input.push_back (unsigned char (129 )); // 10000001
65
+ input.push_back (unsigned char (198 )); // 11000110
66
+ input.push_back (unsigned char (141 )); // 10001101
67
+ auto result = utility::conversions::utf8_to_utf16 (input);
68
+ #if defined(__GLIBCXX__)
69
+ VERIFY_ARE_EQUAL (961 , result[0 ]);
70
+ VERIFY_ARE_EQUAL (397 , result[1 ]);
71
+ #else
72
+ VERIFY_ARE_EQUAL (conversion.from_bytes (input), result);
73
+ #endif
74
+
75
+ // 3 byte character
76
+ input.clear ();
77
+ input.push_back (unsigned char (230 )); // 11100110
78
+ input.push_back (unsigned char (141 )); // 10001101
79
+ input.push_back (unsigned char (157 )); // 10011101
80
+ input.push_back (unsigned char (231 )); // 11100111
81
+ input.push_back (unsigned char (143 )); // 10001111
82
+ input.push_back (unsigned char (156 )); // 10011100
83
+ result = utility::conversions::utf8_to_utf16 (input);
84
+ #if defined(__GLIBCXX__)
85
+ VERIFY_ARE_EQUAL (25437 , result[0 ]);
86
+ VERIFY_ARE_EQUAL (29660 , result[1 ]);
87
+ #else
88
+ VERIFY_ARE_EQUAL (conversion.from_bytes (input), result);
89
+ #endif
90
+
91
+ // 4 byte character
92
+ input.clear ();
93
+ input.push_back (unsigned char (240 )); // 11110000
94
+ input.push_back (unsigned char (173 )); // 10101101
95
+ input.push_back (unsigned char (157 )); // 10011101
96
+ input.push_back (unsigned char (143 )); // 10001111
97
+ input.push_back (unsigned char (240 )); // 11111000
98
+ input.push_back (unsigned char (161 )); // 10100001
99
+ input.push_back (unsigned char (191 )); // 10111111
100
+ input.push_back (unsigned char (191 )); // 10111111
101
+ result = utility::conversions::utf8_to_utf16 (input);
102
+ #if defined(__GLIBCXX__)
103
+ VERIFY_ARE_EQUAL (55413 , result[0 ]);
104
+ VERIFY_ARE_EQUAL (57167 , result[1 ]);
105
+ VERIFY_ARE_EQUAL (55296 , result[2 ]);
106
+ VERIFY_ARE_EQUAL (57160 , result[3 ]);
107
+ #else
108
+ VERIFY_ARE_EQUAL (conversion.from_bytes (input), result);
109
+ #endif
110
+ }
111
+
112
+ TEST (utf8_to_utf16_errors)
113
+ {
114
+ // missing second continuation byte
115
+ std::string input;
116
+ input.push_back (unsigned char (207 )); // 11001111
117
+ VERIFY_THROWS (utility::conversions::utf8_to_utf16 (input), std::invalid_argument);
118
+
119
+ // missing third continuation byte
120
+ input.clear ();
121
+ input.push_back (unsigned char (230 )); // 11100110
122
+ input.push_back (unsigned char (141 )); // 10001101
123
+ VERIFY_THROWS (utility::conversions::utf8_to_utf16 (input), std::invalid_argument);
124
+
125
+ // missing fourth continuation byte
126
+ input.clear ();
127
+ input.push_back (unsigned char (240 )); // 11110000
128
+ input.push_back (unsigned char (173 )); // 10101101
129
+ input.push_back (unsigned char (157 )); // 10011101
130
+ VERIFY_THROWS (utility::conversions::utf8_to_utf16 (input), std::invalid_argument);
58
131
}
59
132
60
133
TEST (latin1_to_utf16)
0 commit comments