@@ -205,9 +205,10 @@ int IsColorEscape(const char* str)
205205 }
206206 if (isdigit (str[1 ])) {
207207 return 2 ;
208- } else if (str[1 ] == ' x' || str[1 ] == ' X' ) {
208+ }
209+ else if (str[1 ] == ' x' || str[1 ] == ' X' ) {
209210 for (int c = 0 ; c < 6 ; c++) {
210- if ( !isxdigit (str[c + 2 ]) ) {
211+ if (!isxdigit (str[c + 2 ])) {
211212 return 0 ;
212213 }
213214 }
@@ -216,23 +217,77 @@ int IsColorEscape(const char* str)
216217 return 0 ;
217218}
218219
220+ int IsColorEscape (std::u32string_view str)
221+ {
222+ if (str.size () < 2 || str[0 ] != ' ^' ) {
223+ return 0 ;
224+ }
225+
226+ auto discrim = str[1 ];
227+
228+ // Check for indexed colour escape like ^7.
229+ // Avoid using isdigit as we only accept arabic numerals.
230+ if (discrim >= U' 0' && discrim <= U' 9' ) {
231+ return 2 ;
232+ }
233+
234+ // Check for direct colour escape like ^x123ABC.
235+ if (str.size () >= 8 && (discrim == ' x' || discrim == ' X' )) {
236+ for (int c = 0 ; c < 6 ; c++) {
237+ auto ch = str[c + 2 ];
238+ bool const isHexDigit = (ch >= U' 0' && ch <= U' 9' ) || (ch >= U' A' && ch <= U' F' ) || (ch >= U' a' && ch <= U' f' );
239+ if (!isHexDigit) {
240+ return 0 ;
241+ }
242+ }
243+ return 8 ;
244+ }
245+
246+ // Fallthrough indicates no recognized colour code.
247+ return 0 ;
248+ }
249+
219250void ReadColorEscape (const char * str, col3_t out)
220251{
221252 int len = IsColorEscape (str);
222253 switch (len) {
223254 case 2 :
224255 VectorCopy (colorEscape[str[1 ] - ' 0' ], out);
225256 break ;
257+ case 8 :
258+ {
259+ int xr, xg, xb;
260+ sscanf (str + 2 , " %2x%2x%2x" , &xr, &xg, &xb);
261+ out[0 ] = xr / 255 .0f ;
262+ out[1 ] = xg / 255 .0f ;
263+ out[2 ] = xb / 255 .0f ;
264+ }
265+ break ;
266+ }
267+ }
268+
269+ std::u32string_view ReadColorEscape (std::u32string_view str, col3_t out)
270+ {
271+ int len = IsColorEscape (str);
272+ switch (len) {
273+ case 2 :
274+ VectorCopy (colorEscape[str[1 ] - U' 0' ], out);
275+ break ;
226276 case 8 :
227277 {
228278 int xr, xg, xb;
229- sscanf (str + 2 , " %2x%2x%2x" , &xr, &xg, &xb);
279+ char buf[7 ]{};
280+ for (size_t i = 0 ; i < 6 ; ++i) {
281+ buf[i] = (char )str[i + 2 ];
282+ }
283+ sscanf (buf, " %2x%2x%2x" , &xr, &xg, &xb);
230284 out[0 ] = xr / 255 .0f ;
231285 out[1 ] = xg / 255 .0f ;
232286 out[2 ] = xb / 255 .0f ;
233287 }
234288 break ;
235289 }
290+ return str.substr (len);
236291}
237292
238293// ================
@@ -279,3 +334,164 @@ dword StringHash(const char* str, int mask)
279334 }
280335 return hash & mask;
281336}
337+
338+ dword StringHash (std::string_view str, int mask)
339+ {
340+ size_t len = str.length ();
341+ dword hash = 0 ;
342+ for (size_t i = 0 ; i < len; i++) {
343+ hash += (str[i] * 4999 ) ^ (((dword)i + 17 ) * 2003 );
344+ }
345+ return hash & mask;
346+ }
347+
348+ #ifdef _WIN32
349+ #include < Windows.h>
350+
351+ static wchar_t * WidenCodepageString (const char * str, UINT codepage)
352+ {
353+ if (!str) {
354+ return nullptr ;
355+ }
356+ // Early-out if empty, avoids ambigious error return from MBTWC.
357+ if (!*str) {
358+ wchar_t * wstr = new wchar_t [1 ];
359+ *wstr = L' \0 ' ;
360+ return wstr;
361+ }
362+ DWORD cb = (DWORD)strlen (str);
363+ int cch = MultiByteToWideChar (codepage, MB_ERR_INVALID_CHARS, str, cb, nullptr , 0 );
364+ if (cch == 0 ) {
365+ // Invalid string or other error.
366+ return nullptr ;
367+ }
368+ wchar_t * wstr = new wchar_t [cch + 1 ]; // sized MBTWC doesn't include terminator.
369+ MultiByteToWideChar (codepage, 0 , str, cb, wstr, cch);
370+ wstr[cch] = ' \0 ' ;
371+ return wstr;
372+ }
373+
374+ wchar_t * WidenANSIString (const char * str)
375+ {
376+ return WidenCodepageString (str, CP_ACP);
377+ }
378+
379+ wchar_t * WidenOEMString (const char * str)
380+ {
381+ return WidenCodepageString (str, CP_OEMCP);
382+ }
383+
384+ wchar_t * WidenUTF8String (const char * str)
385+ {
386+ return WidenCodepageString (str, CP_UTF8);
387+ }
388+
389+ char * NarrowCodepageString (const wchar_t * str, UINT codepage)
390+ {
391+ if (!str) {
392+ return nullptr ;
393+ }
394+ if (!*str) {
395+ char * nstr = new char [1 ];
396+ *nstr = ' \0 ' ;
397+ return nstr;
398+ }
399+ DWORD cch = (DWORD)wcslen (str);
400+ int cb = WideCharToMultiByte (codepage, 0 , str, cch, nullptr , 0 , nullptr , nullptr );
401+ if (cb == 0 ) {
402+ // Invalid string or other error.
403+ return nullptr ;
404+ }
405+ char * nstr = new char [cb + 1 ];
406+ WideCharToMultiByte (codepage, 0 , str, cch, nstr, cb, nullptr , nullptr );
407+ nstr[cb] = ' \0 ' ;
408+ return nstr;
409+ }
410+
411+ void FreeWideString (wchar_t * str)
412+ {
413+ if (str) {
414+ delete[] str;
415+ }
416+ }
417+
418+ char * NarrowANSIString (const wchar_t * str)
419+ {
420+ return NarrowCodepageString (str, CP_ACP);
421+ }
422+
423+ char * NarrowOEMString (const wchar_t * str)
424+ {
425+ return NarrowCodepageString (str, CP_OEMCP);
426+ }
427+
428+ char * NarrowUTF8String (const wchar_t * str)
429+ {
430+ return NarrowCodepageString (str, CP_UTF8);
431+ }
432+
433+ IndexedUTF32String IndexUTF8ToUTF32 (std::string_view input)
434+ {
435+ IndexedUTF32String ret{};
436+
437+ size_t byteCount = input.size ();
438+ auto & offsets = ret.sourceCodeUnitOffsets ;
439+ offsets.reserve (byteCount); // conservative reservation
440+ std::vector<char32_t > codepoints;
441+
442+ auto bytes = (uint8_t const *)input.data ();
443+ for (size_t byteIdx = 0 ; byteIdx < byteCount;) {
444+ uint8_t const * b = bytes + byteIdx;
445+ size_t left = byteCount - byteIdx;
446+ offsets.push_back (byteIdx);
447+
448+ char32_t codepoint{};
449+ if (*b >> 7 == 0b0 ) { // 0xxx'xxxx
450+ codepoint = *b;
451+ byteIdx += 1 ;
452+ }
453+ else if (left >= 2 &&
454+ b[0 ] >> 5 == 0b110 &&
455+ b[1 ] >> 6 == 0b10 )
456+ {
457+ auto p0 = (uint32_t )b[0 ] & 0b1'1111 ;
458+ auto p1 = (uint32_t )b[1 ] & 0b11'1111 ;
459+ codepoint = p0 << 6 | p1;
460+ byteIdx += 2 ;
461+ }
462+ else if (left >= 3 &&
463+ b[0 ] >> 4 == 0b1110 &&
464+ b[1 ] >> 6 == 0b10 &&
465+ b[2 ] >> 6 == 0b10 )
466+ {
467+ auto p0 = (uint32_t )b[0 ] & 0b1111 ;
468+ auto p1 = (uint32_t )b[1 ] & 0b11'1111 ;
469+ auto p2 = (uint32_t )b[2 ] & 0b11'1111 ;
470+ codepoint = p0 << 12 | p1 << 6 | p2;
471+ byteIdx += 3 ;
472+ }
473+ else if (left >= 4 &&
474+ b[0 ] >> 3 == 0b11110 &&
475+ b[1 ] >> 6 == 0b10 &&
476+ b[2 ] >> 6 == 0b10 &&
477+ b[3 ] >> 6 == 0b10 )
478+ {
479+ auto p0 = (uint32_t )b[0 ] & 0b111 ;
480+ auto p1 = (uint32_t )b[1 ] & 0b11'1111 ;
481+ auto p2 = (uint32_t )b[2 ] & 0b11'1111 ;
482+ auto p3 = (uint32_t )b[2 ] & 0b11'1111 ;
483+ codepoint = p0 << 18 | p1 << 12 | p2 << 6 | p3;
484+ byteIdx += 4 ;
485+ }
486+ else {
487+ codepoints.push_back (0xFFFDu );
488+ byteIdx += 1 ;
489+ }
490+ codepoints.push_back (codepoint);
491+ }
492+
493+ ret.text = std::u32string (codepoints.begin (), codepoints.end ());
494+ return ret;
495+ }
496+
497+ #endif
0 commit comments