3737#include <xtl.h>
3838#endif
3939
40+ #define UTF8_WALKBYTE (string ) (*((*(string))++))
41+
4042static unsigned leading_ones (uint8_t c )
4143{
4244 unsigned ones = 0 ;
@@ -89,13 +91,14 @@ size_t utf8_conv_utf32(uint32_t *out, size_t out_chars,
8991bool utf16_conv_utf8 (uint8_t * out , size_t * out_chars ,
9092 const uint16_t * in , size_t in_size )
9193{
92- static uint8_t kUtf8Limits [5 ] = { 0xC0 , 0xE0 , 0xF0 , 0xF8 , 0xFC };
93- size_t out_pos = 0 ;
94- size_t in_pos = 0 ;
94+ size_t out_pos = 0 ;
95+ size_t in_pos = 0 ;
96+ static const
97+ uint8_t utf8_limits [5 ] = { 0xC0 , 0xE0 , 0xF0 , 0xF8 , 0xFC };
9598
9699 for (;;)
97100 {
98- unsigned numAdds ;
101+ unsigned num_adds ;
99102 uint32_t value ;
100103
101104 if (in_pos == in_size )
@@ -124,21 +127,21 @@ bool utf16_conv_utf8(uint8_t *out, size_t *out_chars,
124127 value = (((value - 0xD800 ) << 10 ) | (c2 - 0xDC00 )) + 0x10000 ;
125128 }
126129
127- for (numAdds = 1 ; numAdds < 5 ; numAdds ++ )
128- if (value < (((uint32_t )1 ) << (numAdds * 5 + 6 )))
130+ for (num_adds = 1 ; num_adds < 5 ; num_adds ++ )
131+ if (value < (((uint32_t )1 ) << (num_adds * 5 + 6 )))
129132 break ;
130133 if (out )
131- out [out_pos ] = (char )(kUtf8Limits [ numAdds - 1 ]
132- + (value >> (6 * numAdds )));
134+ out [out_pos ] = (char )(utf8_limits [ num_adds - 1 ]
135+ + (value >> (6 * num_adds )));
133136 out_pos ++ ;
134137 do
135138 {
136- numAdds -- ;
139+ num_adds -- ;
137140 if (out )
138141 out [out_pos ] = (char )(0x80
139- + ((value >> (6 * numAdds )) & 0x3F ));
142+ + ((value >> (6 * num_adds )) & 0x3F ));
140143 out_pos ++ ;
141- }while (numAdds != 0 );
144+ }while (num_adds != 0 );
142145 }
143146
144147 * out_chars = out_pos ;
@@ -166,13 +169,15 @@ size_t utf8cpy(char *d, size_t d_len, const char *s, size_t chars)
166169 while (* sb && chars -- > 0 )
167170 {
168171 sb ++ ;
169- while ((* sb & 0xC0 ) == 0x80 ) sb ++ ;
172+ while ((* sb & 0xC0 ) == 0x80 )
173+ sb ++ ;
170174 }
171175
172176 if ((size_t )(sb - sb_org ) > d_len - 1 /* NUL */ )
173177 {
174178 sb = sb_org + d_len - 1 ;
175- while ((* sb & 0xC0 ) == 0x80 ) sb -- ;
179+ while ((* sb & 0xC0 ) == 0x80 )
180+ sb -- ;
176181 }
177182
178183 memcpy (d , sb_org , sb - sb_org );
@@ -184,14 +189,18 @@ size_t utf8cpy(char *d, size_t d_len, const char *s, size_t chars)
184189const char * utf8skip (const char * str , size_t chars )
185190{
186191 const uint8_t * strb = (const uint8_t * )str ;
192+
187193 if (!chars )
188194 return str ;
195+
189196 do
190197 {
191198 strb ++ ;
192- while ((* strb & 0xC0 )== 0x80 ) strb ++ ;
199+ while ((* strb & 0xC0 )== 0x80 )
200+ strb ++ ;
193201 chars -- ;
194- } while (chars );
202+ }while (chars );
203+
195204 return (const char * )strb ;
196205}
197206
@@ -211,24 +220,22 @@ size_t utf8len(const char *string)
211220 return ret ;
212221}
213222
214- #define utf8_walkbyte (string ) (*((*(string))++))
215-
216223/* Does not validate the input, returns garbage if it's not UTF-8. */
217224uint32_t utf8_walk (const char * * string )
218225{
219- uint8_t first = utf8_walkbyte (string );
226+ uint8_t first = UTF8_WALKBYTE (string );
220227 uint32_t ret = 0 ;
221228
222229 if (first < 128 )
223230 return first ;
224231
225- ret = (ret << 6 ) | (utf8_walkbyte (string ) & 0x3F );
232+ ret = (ret << 6 ) | (UTF8_WALKBYTE (string ) & 0x3F );
226233 if (first >= 0xE0 )
227234 {
228- ret = (ret << 6 ) | (utf8_walkbyte (string ) & 0x3F );
235+ ret = (ret << 6 ) | (UTF8_WALKBYTE (string ) & 0x3F );
229236 if (first >= 0xF0 )
230237 {
231- ret = (ret << 6 ) | (utf8_walkbyte (string ) & 0x3F );
238+ ret = (ret << 6 ) | (UTF8_WALKBYTE (string ) & 0x3F );
232239 return ret | (first & 7 ) << 18 ;
233240 }
234241 return ret | (first & 15 ) << 12 ;
@@ -277,9 +284,7 @@ bool utf16_to_char_string(const uint16_t *in, char *s, size_t len)
277284static char * mb_to_mb_string_alloc (const char * str ,
278285 enum CodePage cp_in , enum CodePage cp_out )
279286{
280- char * path_buf = NULL ;
281287 wchar_t * path_buf_wide = NULL ;
282- int path_buf_len = 0 ;
283288 int path_buf_wide_len = MultiByteToWideChar (cp_in , 0 , str , -1 , NULL , 0 );
284289
285290 /* Windows 95 will return 0 from these functions with
@@ -292,54 +297,51 @@ static char *mb_to_mb_string_alloc(const char *str,
292297 * MultiByteToWideChar also supports CP_UTF7 and CP_UTF8.
293298 */
294299
295- if (path_buf_wide_len )
300+ if (!path_buf_wide_len )
301+ return strdup (str );
302+
303+ path_buf_wide = (wchar_t * )
304+ calloc (path_buf_wide_len + sizeof (wchar_t ), sizeof (wchar_t ));
305+
306+ if (path_buf_wide )
296307 {
297- path_buf_wide = ( wchar_t * )
298- calloc ( path_buf_wide_len + sizeof ( wchar_t ), sizeof ( wchar_t ) );
308+ MultiByteToWideChar ( cp_in , 0 ,
309+ str , -1 , path_buf_wide , path_buf_wide_len );
299310
300- if (path_buf_wide )
311+ if (* path_buf_wide )
301312 {
302- MultiByteToWideChar ( cp_in , 0 ,
303- str , -1 , path_buf_wide , path_buf_wide_len );
313+ int path_buf_len = WideCharToMultiByte ( cp_out , 0 ,
314+ path_buf_wide , -1 , NULL , 0 , NULL , NULL );
304315
305- if (* path_buf_wide )
316+ if (path_buf_len )
306317 {
307- path_buf_len = WideCharToMultiByte ( cp_out , 0 ,
308- path_buf_wide , -1 , NULL , 0 , NULL , NULL );
318+ char * path_buf = ( char * )
319+ calloc ( path_buf_len + sizeof ( char ), sizeof ( char ) );
309320
310- if (path_buf_len )
321+ if (path_buf )
311322 {
312- path_buf = (char * )
313- calloc (path_buf_len + sizeof (char ), sizeof (char ));
323+ WideCharToMultiByte (cp_out , 0 ,
324+ path_buf_wide , -1 , path_buf ,
325+ path_buf_len , NULL , NULL );
314326
315- if (path_buf )
316- {
317- WideCharToMultiByte (cp_out , 0 ,
318- path_buf_wide , -1 , path_buf ,
319- path_buf_len , NULL , NULL );
320-
321- free (path_buf_wide );
327+ free (path_buf_wide );
322328
323- if (* path_buf )
324- return path_buf ;
329+ if (* path_buf )
330+ return path_buf ;
325331
326- free (path_buf );
327- return NULL ;
328- }
329- }
330- else
331- {
332- free (path_buf_wide );
333- return strdup (str );
332+ free (path_buf );
333+ return NULL ;
334334 }
335335 }
336+ else
337+ {
338+ free (path_buf_wide );
339+ return strdup (str );
340+ }
336341 }
337- }
338- else
339- return strdup (str );
340342
341- if (path_buf_wide )
342343 free (path_buf_wide );
344+ }
343345
344346 return NULL ;
345347}
@@ -379,13 +381,13 @@ char* local_to_utf8_string_alloc(const char *str)
379381wchar_t * utf8_to_utf16_string_alloc (const char * str )
380382{
381383#ifdef _WIN32
382- int len = 0 ;
383- int out_len = 0 ;
384+ int len = 0 ;
385+ int out_len = 0 ;
384386#else
385- size_t len = 0 ;
387+ size_t len = 0 ;
386388 size_t out_len = 0 ;
387389#endif
388- wchar_t * buf = NULL ;
390+ wchar_t * buf = NULL ;
389391
390392 if (!str || !* str )
391393 return NULL ;
0 commit comments