@@ -4234,6 +4234,10 @@ void ImFontAtlasBuildInit(ImFontAtlas* atlas)
42344234 ImFontAtlasUpdateDrawListsSharedData (atlas);
42354235
42364236 // atlas->TexIsBuilt = true;
4237+
4238+ // Lazily initialize char/text classifier
4239+ // FIXME: This could be practically anywhere, and should eventually be parameters to CalcTextSize/word-wrapping code, but there's no obvious spot now.
4240+ ImTextInitClassifiers ();
42374241}
42384242
42394243// Destroy builder and all cached glyphs. Do not destroy actual fonts.
@@ -5371,23 +5375,63 @@ const char* ImTextCalcWordWrapNextLineStart(const char* text, const char* text_e
53715375 return text;
53725376}
53735377
5374- // Character classification for word-wrapping logic
5375- enum
5378+ void ImTextClassifierClear (ImU32* bits, unsigned int codepoint_min, unsigned int codepoint_end, ImWcharClass char_class)
53765379{
5377- ImWcharClass_Blank, ImWcharClass_Punct, ImWcharClass_Other
5378- };
5380+ for (unsigned int c = codepoint_min; c < codepoint_end; c++)
5381+ ImTextClassifierSetCharClass (bits, codepoint_min, codepoint_end, char_class, c);
5382+ }
5383+
5384+ void ImTextClassifierSetCharClass (ImU32* bits, unsigned int codepoint_min, unsigned int codepoint_end, ImWcharClass char_class, unsigned int c)
5385+ {
5386+ IM_ASSERT (c >= codepoint_min && c < codepoint_end);
5387+ c -= codepoint_min;
5388+ const ImU32 shift = (c & 15 ) << 1 ;
5389+ bits[c >> 4 ] = (bits[c >> 4 ] & ~(0x03 << shift)) | (char_class << shift);
5390+ }
5391+
5392+ void ImTextClassifierSetCharClassFromStr (ImU32* bits, unsigned int codepoint_min, unsigned int codepoint_end, ImWcharClass char_class, const char * s)
5393+ {
5394+ const char * s_end = s + strlen (s);
5395+ while (*s)
5396+ {
5397+ unsigned int c;
5398+ s += ImTextCharFromUtf8 (&c, s, s_end);
5399+ ImTextClassifierSetCharClass (bits, codepoint_min, codepoint_end, char_class, c);
5400+ }
5401+ }
5402+
5403+ #define ImTextClassifierGet (_BITS, _CHAR_OFFSET ) ((_BITS[(_CHAR_OFFSET) >> 4 ] >> (((_CHAR_OFFSET) & 15 ) << 1 )) & 0x03 )
5404+
5405+ // 2-bit per character
5406+ static ImU32 g_CharClassifierIsSeparator_0000_007f[128 / 16 ] = {};
5407+ static ImU32 g_CharClassifierIsSeparator_3000_300f[ 16 / 16 ] = {};
5408+
5409+ void ImTextInitClassifiers ()
5410+ {
5411+ if (ImTextClassifierGet (g_CharClassifierIsSeparator_0000_007f, ' ,' ) != 0 )
5412+ return ;
5413+
5414+ // List of hardcoded separators: .,;!?'"
5415+ // Making this dynamic given known ranges is trivial BUT requires us to standardize where you pass them as parameters. (#3002, #8503)
5416+ ImTextClassifierClear (g_CharClassifierIsSeparator_0000_007f, 0 , 128 , ImWcharClass_Other);
5417+ ImTextClassifierSetCharClassFromStr (g_CharClassifierIsSeparator_0000_007f, 0 , 128 , ImWcharClass_Blank, " \t " );
5418+ ImTextClassifierSetCharClassFromStr (g_CharClassifierIsSeparator_0000_007f, 0 , 128 , ImWcharClass_Punct, " .,;!?\" " );
5419+
5420+ ImTextClassifierClear (g_CharClassifierIsSeparator_3000_300f, 0x3000 , 0x300F , ImWcharClass_Other);
5421+ ImTextClassifierSetCharClass (g_CharClassifierIsSeparator_3000_300f, 0x3000 , 0x300F , ImWcharClass_Blank, 0x3000 );
5422+ ImTextClassifierSetCharClass (g_CharClassifierIsSeparator_3000_300f, 0x3000 , 0x300F , ImWcharClass_Punct, 0x3001 );
5423+ ImTextClassifierSetCharClass (g_CharClassifierIsSeparator_3000_300f, 0x3000 , 0x300F , ImWcharClass_Punct, 0x3002 );
5424+ }
53795425
53805426// Simple word-wrapping for English, not full-featured. Please submit failing cases!
53815427// This will return the next location to wrap from. If no wrapping if necessary, this will fast-forward to e.g. text_end.
5382- // FIXME: Much possible improvements (don't cut things like "word !", "word!!!" but cut within "word,,,,", more sensible support for punctuations, support for Unicode punctuations, etc.)
5428+ // Refer to imgui_test_suite's "drawlist_text_wordwrap_1" for tests.
53835429const char * ImFontCalcWordWrapPositionEx (ImFont* font, float size, const char * text, const char * text_end, float wrap_width, ImDrawTextFlags flags)
53845430{
53855431 // For references, possible wrap point marked with ^
53865432 // "aaa bbb, ccc,ddd. eee fff. ggg!"
53875433 // ^ ^ ^ ^ ^__ ^ ^
53885434
5389- // List of hardcoded separators: .,;!?'"
5390-
53915435 // Skip extra blanks after a line returns (that includes not counting them in width computation)
53925436 // e.g. "Hello world" --> "Hello" "World"
53935437
@@ -5439,10 +5483,10 @@ const char* ImFontCalcWordWrapPositionEx(ImFont* font, float size, const char* t
54395483
54405484 // Classify current character
54415485 int curr_type;
5442- if (c == ' ' || c == ' \t ' || c == 0x3000 ) // Inline version of ImCharIsBlankW(c )
5443- curr_type = ImWcharClass_Blank ;
5444- else if (c == ' . ' || c == ' , ' || c == ' ; ' || c == ' ! ' || c == ' ? ' || c == ' \" ' || c == 0x3001 || c == 0x3002 )
5445- curr_type = ImWcharClass_Punct;
5486+ if (c < 128 )
5487+ curr_type = ImTextClassifierGet (g_CharClassifierIsSeparator_0000_007f, c) ;
5488+ else if (c >= 0x3000 && c < 0x3010 )
5489+ curr_type = ImTextClassifierGet (g_CharClassifierIsSeparator_3000_300f, c & 15 ); // -V578
54465490 else
54475491 curr_type = ImWcharClass_Other;
54485492
@@ -5460,7 +5504,7 @@ const char* ImFontCalcWordWrapPositionEx(ImFont* font, float size, const char* t
54605504 else
54615505 {
54625506 // End span: '.X' unless X is a digit
5463- if (prev_type == ImWcharClass_Punct && curr_type != ImWcharClass_Punct && !(c >= ' 0' && c <= ' 9' ))
5507+ if (prev_type == ImWcharClass_Punct && curr_type != ImWcharClass_Punct && !(c >= ' 0' && c <= ' 9' )) // FIXME: Digit checks might be removed if allow custom separators (#8503)
54645508 {
54655509 span_end = s;
54665510 line_width += span_width + blank_width;
0 commit comments