@@ -15,7 +15,7 @@ std::wstring ConvertUtf8ToUtf16(std::string_view utf8_str)
1515
1616 while (i < utf8_str.length ()) {
1717 uint32_t codepoint = 0 ;
18- int bytes = 0 ;
18+ size_t bytes = 0 ;
1919
2020 unsigned char c = static_cast <unsigned char >(utf8_str[i]);
2121
@@ -36,21 +36,29 @@ std::wstring ConvertUtf8ToUtf16(std::string_view utf8_str)
3636 bytes = 4 ;
3737 }
3838 else {
39- // Invalid UTF-8 sequence
39+ // Invalid UTF-8 sequence - skip this byte
4040 ++i;
4141 continue ;
4242 }
4343
4444 // Process remaining bytes
45- for (int j = 1 ; j < bytes && (i + j) < utf8_str.length (); ++j) {
45+ bool valid = true ;
46+ for (size_t j = 1 ; j < bytes && (i + j) < utf8_str.length (); ++j) {
4647 unsigned char next = static_cast <unsigned char >(utf8_str[i + j]);
4748 if ((next & 0xC0 ) != 0x80 ) {
48- // Invalid UTF-8 sequence
49+ // Invalid continuation byte
50+ valid = false ;
4951 break ;
5052 }
5153 codepoint = (codepoint << 6 ) | (next & 0x3F );
5254 }
5355
56+ if (!valid || (i + bytes) > utf8_str.length ()) {
57+ // Skip invalid sequence
58+ ++i;
59+ continue ;
60+ }
61+
5462 // Convert to UTF-16
5563 if (codepoint <= 0xFFFF ) {
5664 // Character in BMP
@@ -107,30 +115,33 @@ bool Utf16Reader::getline(std::wstring& line)
107115 wchar_t next;
108116 if (ReadChar (next)) {
109117 if (next == L' \n ' ) {
118+ // Found \r\n - line ending
110119 break ;
111120 }
112- line += ch;
121+ // \r followed by something else - include only the next char
113122 line += next;
114123 }
115- } else {
124+ // Single \r as line ending
125+ break ;
126+ }
127+ else {
116128 line += ch;
117129 }
118130 }
119131 return found_data;
120132}
121133
122- bool Utf16Reader::eof () const {
134+ bool Utf16Reader::eof () const
135+ {
123136 return file_.eof ();
124137}
125138
126- void Utf16Reader::close () {
127- file_.close ();
128- }
139+ void Utf16Reader::close () { file_.close (); }
129140
130141bool Utf16Reader::ReadChar (wchar_t & ch)
131142{
132143 file_.read (reinterpret_cast <char *>(&ch), sizeof (ch));
133- if (file_.gcount () != sizeof (ch)) {
144+ if (file_.gcount () != static_cast <std::streamsize>( sizeof (ch) )) {
134145 file_.setstate (std::ios::eofbit);
135146 return false ;
136147 }
@@ -142,30 +153,30 @@ bool Utf16Reader::ReadChar(wchar_t& ch)
142153Utf16Writer::Utf16Writer (const std::filesystem::path& filename)
143154 : file_(filename, std::ios::binary)
144155{
145- if (!file_.is_open ()) {
156+ if (!file_.is_open ())
157+ {
146158 throw std::runtime_error (" Failed to open file: " + filename.string ());
147159 }
148160 file_.write (reinterpret_cast <const char *>(&UTF16LE_BOM), sizeof (UTF16LE_BOM));
161+ if (!file_.good ()) {
162+ throw std::runtime_error (" Failed to write BOM to file: " + filename.string ());
163+ }
149164}
150165
151166Utf16Writer::~Utf16Writer ()
152167{
153168 file_.close ();
154169}
155170
156- bool Utf16Writer::is_open () const {
171+ bool Utf16Writer::is_open () const
172+ {
157173 return file_.is_open ();
158174}
159175
160176Utf16Writer& Utf16Writer::operator <<(std::wstring_view str)
161177{
162- file_.write (reinterpret_cast <const char *>(str.data ()), str.length () * sizeof (wchar_t ));
163- return *this ;
164- }
165-
166- Utf16Writer& Utf16Writer::operator <<(const std::wstring& str)
167- {
168- file_.write (reinterpret_cast <const char *>(str.data ()), str.length () * sizeof (wchar_t ));
178+ file_.write (reinterpret_cast <const char *>(str.data ()),
179+ static_cast <std::streamsize>(str.length () * sizeof (wchar_t )));
169180 return *this ;
170181}
171182
@@ -178,21 +189,24 @@ Utf16Writer& Utf16Writer::operator<<(wchar_t ch)
178189Utf16Writer& Utf16Writer::operator <<(const wchar_t * str)
179190{
180191 const std::wstring_view view (str);
181- file_.write (reinterpret_cast <const char *>(view.data ()), view.length () * sizeof (wchar_t ));
192+ file_.write (reinterpret_cast <const char *>(view.data ()),
193+ static_cast <std::streamsize>(view.length () * sizeof (wchar_t )));
182194 return *this ;
183195}
184196
185197Utf16Writer& Utf16Writer::operator <<(uint32_t value)
186198{
187199 const std::wstring str = std::to_wstring (value);
188- file_.write (reinterpret_cast <const char *>(str.data ()), str.length () * sizeof (wchar_t ));
200+ file_.write (reinterpret_cast <const char *>(str.data ()),
201+ static_cast <std::streamsize>(str.length () * sizeof (wchar_t )));
189202 return *this ;
190203}
191204
192205Utf16Writer& Utf16Writer::operator <<(std::string_view str)
193206{
194207 // Convert to UTF-16 from UTF-8
195208 std::wstring converted = ConvertUtf8ToUtf16 (str);
196- file_.write (reinterpret_cast <const char *>(converted.data ()), converted.length () * sizeof (wchar_t ));
209+ file_.write (reinterpret_cast <const char *>(converted.data ()),
210+ static_cast <std::streamsize>(converted.length () * sizeof (wchar_t )));
197211 return *this ;
198212}
0 commit comments