@@ -10,6 +10,7 @@ in the source distribution for its full text.
1010#include "XUtils.h"
1111
1212#include <assert.h>
13+ #include <ctype.h> // IWYU pragma: keep
1314#include <errno.h>
1415#include <fcntl.h>
1516#include <limits.h>
@@ -236,6 +237,296 @@ size_t strnlen(const char* str, size_t maxLen) {
236237}
237238#endif
238239
240+ #ifdef HAVE_LIBNCURSESW
241+ static void String_encodeWChar (WCharEncoderState * ps , wchar_t wc ) {
242+ assert (!ps -> buf || ps -> pos < ps -> size );
243+
244+ char tempBuf [MB_LEN_MAX ];
245+ char * dest = ps -> buf ? (char * )ps -> buf + ps -> pos : tempBuf ;
246+
247+ // It is unnecessarily expensive to fix the output string if the caller
248+ // gives an incorrect buffer size. This function would not support any
249+ // truncation of the output string.
250+ size_t len = wcrtomb (dest , wc , & ps -> mbState );
251+ assert (len > 0 );
252+ if (len == (size_t )-1 ) {
253+ assert (len != (size_t )-1 );
254+ fail ();
255+ }
256+ if (ps -> buf && len > ps -> size - ps -> pos ) {
257+ assert (!ps -> buf || len <= ps -> size - ps -> pos );
258+ fail ();
259+ }
260+
261+ ps -> pos += len ;
262+ }
263+ #else
264+ static void String_encodeWChar (WCharEncoderState * ps , int c ) {
265+ assert (!ps -> buf || ps -> pos < ps -> size );
266+
267+ char * buf = ps -> buf ;
268+ if (buf ) {
269+ buf [ps -> pos ] = (char )c ;
270+ }
271+
272+ ps -> pos += 1 ;
273+ }
274+ #endif
275+
276+ void EncodePrintableString (WCharEncoderState * ps , const char * src , size_t maxLen , EncodeWChar encodeWChar ) {
277+ assert (src || maxLen == 0 );
278+
279+ size_t pos = 0 ;
280+ bool wasReplaced = false;
281+
282+ #ifdef HAVE_LIBNCURSESW
283+ const wchar_t replacementChar = CRT_utf8 ? L'\xFFFD' : L'?' ;
284+ wchar_t ch ;
285+
286+ mbstate_t decState ;
287+ memset (& decState , 0 , sizeof (decState ));
288+ #else
289+ const char replacementChar = '?' ;
290+ char ch ;
291+ #endif
292+
293+ do {
294+ size_t len = 0 ;
295+ bool shouldReplace = false;
296+ ch = 0 ;
297+
298+ if (pos < maxLen ) {
299+ // Read the next character from the byte sequence
300+ #ifdef HAVE_LIBNCURSESW
301+ mbstate_t newState ;
302+ memcpy (& newState , & decState , sizeof (newState ));
303+ len = mbrtowc (& ch , & src [pos ], maxLen - pos , & newState );
304+
305+ assert (len != 0 || ch == 0 );
306+ switch (len ) {
307+ case (size_t )-2 :
308+ errno = EILSEQ ;
309+ shouldReplace = true;
310+ len = maxLen - pos ;
311+ break ;
312+
313+ case (size_t )-1 :
314+ shouldReplace = true;
315+ len = 1 ;
316+ break ;
317+
318+ default :
319+ memcpy (& decState , & newState , sizeof (decState ));
320+ }
321+ #else
322+ len = 1 ;
323+ ch = src [pos ];
324+ #endif
325+ }
326+
327+ pos += len ;
328+
329+ // Filter unprintable characters
330+ if (!shouldReplace && ch != 0 ) {
331+ #ifdef HAVE_LIBNCURSESW
332+ shouldReplace = !iswprint (ch );
333+ #else
334+ shouldReplace = !isprint ((unsigned char )ch );
335+ #endif
336+ }
337+
338+ if (shouldReplace ) {
339+ ch = replacementChar ;
340+ if (wasReplaced ) {
341+ continue ;
342+ }
343+ }
344+ wasReplaced = shouldReplace ;
345+
346+ encodeWChar (ps , ch );
347+ } while (ch != 0 );
348+ }
349+
350+ char * String_makePrintable (const char * str , size_t maxLen ) {
351+ WCharEncoderState encState ;
352+
353+ memset (& encState , 0 , sizeof (encState ));
354+ EncodePrintableString (& encState , str , maxLen , String_encodeWChar );
355+ size_t size = encState .pos ;
356+ assert (size > 0 );
357+
358+ memset (& encState , 0 , sizeof (encState ));
359+ char * buf = xMalloc (size );
360+ encState .size = size ;
361+ encState .buf = buf ;
362+ EncodePrintableString (& encState , str , maxLen , String_encodeWChar );
363+ assert (encState .pos == size );
364+
365+ return buf ;
366+ }
367+
368+ bool String_decodeNextWChar (MBStringDecoderState * ps ) {
369+ if (!ps -> str || ps -> maxLen == 0 ) {
370+ return false;
371+ }
372+
373+ // If the previous call of this function encounters an invalid sequence,
374+ // do not continue (because the "mbState" object for mbrtowc() is
375+ // undefined). The caller is supposed to reset the state.
376+ #ifdef HAVE_LIBNCURSESW
377+ bool isStateDefined = ps -> ch != WEOF ;
378+ #else
379+ bool isStateDefined = ps -> ch != EOF ;
380+ #endif
381+ if (!isStateDefined ) {
382+ return false;
383+ }
384+
385+ #ifdef HAVE_LIBNCURSESW
386+ wchar_t wc ;
387+ size_t len = mbrtowc (& wc , ps -> str , ps -> maxLen , & ps -> mbState );
388+ switch (len ) {
389+ case (size_t )-1 :
390+ // Invalid sequence
391+ ps -> ch = WEOF ;
392+ return false;
393+
394+ case (size_t )-2 :
395+ // Incomplete sequence
396+ ps -> str += ps -> maxLen ;
397+ ps -> maxLen = 0 ;
398+ return false;
399+
400+ case 0 :
401+ assert (wc == 0 );
402+
403+ ps -> str = NULL ;
404+ ps -> maxLen = 0 ;
405+ ps -> ch = wc ;
406+ return true;
407+
408+ default :
409+ ps -> str += len ;
410+ ps -> maxLen -= len ;
411+ ps -> ch = wc ;
412+ }
413+ return true;
414+ #else
415+ ps -> ch = * ps -> str ;
416+ if (ps -> ch == 0 ) {
417+ ps -> str = NULL ;
418+ ps -> maxLen = 0 ;
419+ } else {
420+ ps -> str ++ ;
421+ ps -> maxLen -- ;
422+ }
423+ return true;
424+ #endif
425+ }
426+
427+ int String_lineBreakWidth (const char * * str , size_t maxLen , int maxWidth , char separator ) {
428+ assert (* str || maxLen == 0 );
429+
430+ // The caller should ensure (maxWidth >= 0).
431+ // It's possible for a Unicode string to occupy 0 terminal columns, so this
432+ // function allows (maxWidth == 0).
433+ if (maxWidth < 0 )
434+ maxWidth = INT_MAX ;
435+
436+ MBStringDecoderState state ;
437+ memset (& state , 0 , sizeof (state ));
438+ state .str = * str ;
439+ state .maxLen = maxLen ;
440+
441+ int totalWidth = 0 ;
442+ int breakWidth = 0 ;
443+
444+ const char * breakPos = NULL ;
445+ bool inSpaces = true;
446+
447+ while (String_decodeNextWChar (& state )) {
448+ if (state .ch == 0 )
449+ break ;
450+
451+ if (state .ch == ' ' && separator == ' ' && !inSpaces ) {
452+ breakWidth = totalWidth ;
453+ breakPos = * str ;
454+ inSpaces = true;
455+ }
456+
457+ #ifdef HAVE_LIBNCURSESW
458+ int w = wcwidth ((wchar_t )state .ch );
459+ if (w < 0 ) {
460+ // This function should not be used with string containing unprintable
461+ // characters. Tolerate them on release build, however.
462+ assert (w >= 0 );
463+ break ;
464+ }
465+ #else
466+ assert (isprint (state .ch ));
467+ int w = 1 ;
468+ #endif
469+
470+ if (w > maxWidth - totalWidth ) {
471+ // This character cannot fit the line with the given maxWidth.
472+ if (breakPos ) {
473+ // Rewind the scanning state to the last found separator.
474+ totalWidth = breakWidth ;
475+ * str = breakPos ;
476+ }
477+ break ;
478+ }
479+
480+ #ifdef HAVE_LIBNCURSESW
481+ // If the character takes zero columns, include the character in the
482+ // substring if the working encoding is UTF-8, and ignore it otherwise.
483+ // In Unicode, combining characters are always placed after the base
484+ // character, but some legacy 8-bit encodings instead place combining
485+ // characters before the base character.
486+ if (w <= 0 && !CRT_utf8 ) {
487+ continue ;
488+ }
489+ #endif
490+
491+ totalWidth += w ;
492+
493+ // (*str - start) will represent the length of the substring bounded
494+ // by the width limit.
495+ * str = state .str ;
496+
497+ if (state .ch != ' ' )
498+ inSpaces = false;
499+
500+ #ifdef HAVE_LIBNCURSESW
501+ wint_t sepCast = (wint_t )separator ;
502+ #else
503+ int sepCast = (int )separator ;
504+ #endif
505+ if (state .ch == sepCast && separator != ' ' ) {
506+ breakWidth = totalWidth ;
507+ breakPos = * str ;
508+ }
509+ }
510+
511+ return totalWidth ;
512+ }
513+
514+ int String_mbswidth (const char * * str , size_t maxLen , int maxWidth ) {
515+ #ifdef HAVE_LIBNCURSESW
516+ return String_lineBreakWidth (str , maxLen , maxWidth , '\0' );
517+ #else
518+ assert (* str || maxLen == 0 );
519+
520+ if (maxWidth < 0 )
521+ maxWidth = INT_MAX ;
522+
523+ maxLen = MINIMUM ((size_t )maxWidth , maxLen );
524+ size_t len = strnlen (* str , maxLen );
525+ * str += len ;
526+ return (int )len ;
527+ #endif
528+ }
529+
239530int xAsprintf (char * * strp , const char * fmt , ...) {
240531 * strp = NULL ;
241532
0 commit comments