@@ -18,7 +18,8 @@ enum ItemType
1818 StatementSeparator,
1919 StringComponent,
2020 StringSeparator,
21- StringSpace,
21+ StringWhitespace,
22+ StringNewline,
2223 FormatSpecifier,
2324 EscapeSequence,
2425 Group,
@@ -272,6 +273,14 @@ static vector<InstructionTextToken> ParseStringToken(
272273 result.emplace_back (StringToken, string (src.substr (start, end - start)));
273274 };
274275
276+ // We generally split along spaces while keeping words intact, but some cases have
277+ // specific splitting behavior:
278+ //
279+ // - Any format specifier (starting with %) will be treated as an atom even if embedded
280+ // within a word
281+ // - Any escape sequence will also be treated as an atom
282+ // - We split along punctuation like commas, colons, periods, and semicolons, grouping
283+ // trailing punctuation together.
275284 while (curEnd < tail)
276285 {
277286 char c = src[curEnd];
@@ -332,8 +341,10 @@ static vector<Item> CreateStringGroups(const vector<Item>& items)
332341 bool hasStrings = false ;
333342 for (auto & i : items)
334343 {
335- if (i.type == StringSeparator && !i.tokens .empty ())
344+ if (( i.type == StringSeparator) && !i.tokens .empty ())
336345 {
346+ // We try to push separators onto a preceding word, otherwise treat as
347+ // a singular atom
337348 if (pending.empty ())
338349 {
339350 result.push_back (Item {Atom, {}, {i.tokens }, 0 });
@@ -347,6 +358,16 @@ static vector<Item> CreateStringGroups(const vector<Item>& items)
347358 pending.clear ();
348359 hasStrings = true ;
349360 }
361+ else if (i.type == StringWhitespace)
362+ {
363+ // Special case because we let whitespace trail even if over width
364+ if (!pending.empty ())
365+ {
366+ result.push_back (Item {StringComponent, pending, {}, 0 });
367+ pending.clear ();
368+ }
369+ result.push_back (Item {StringWhitespace, i.items , i.tokens , i.width });
370+ }
350371 else if (i.type == FormatSpecifier || i.type == EscapeSequence)
351372 {
352373 if (!pending.empty ())
@@ -795,17 +816,25 @@ vector<DisassemblyTextLine> GenericLineFormatter::FormatLines(
795816 break ;
796817 case StringToken:
797818 {
798- vector<InstructionTextToken> stringTokens = ParseStringToken (token, 512 );
819+ vector<InstructionTextToken> stringTokens = ParseStringToken (token, settings. maximumAnnotationLength );
799820 for (size_t k = 0 ; k < stringTokens.size (); k++)
800821 {
801822 InstructionTextToken subToken = stringTokens[k];
802823 string trimmedSubText = TrimString (subToken.text );
803824 if (trimmedSubText.empty ())
804- items.push_back (Item {StringSeparator , {}, {subToken}, 0 });
825+ items.push_back (Item {StringWhitespace , {}, {subToken}, 0 });
805826 if (trimmedSubText[0 ] == ' %' )
806827 items.push_back (Item {FormatSpecifier, {}, {subToken}, 0 });
807828 else if (!trimmedSubText.empty () && trimmedSubText[0 ] == ' \\ ' )
829+ {
830+ if (trimmedSubText.size () > 1 )
831+ {
832+ if (trimmedSubText[1 ] == ' n' )
833+ items.push_back (Item {StringNewline, {}, {subToken}, 0 });
834+ continue ;
835+ }
808836 items.push_back (Item {EscapeSequence, {}, {subToken}, 0 });
837+ }
809838 else if (trimmedSubText[0 ] == ' ,' || trimmedSubText[0 ] == ' .' || trimmedSubText[0 ] == ' :' || trimmedSubText[0 ] == ' ;' )
810839 items.push_back (Item {StringSeparator, {}, {subToken}, 0 });
811840 else
@@ -908,7 +937,7 @@ vector<DisassemblyTextLine> GenericLineFormatter::FormatLines(
908937
909938 for (auto item = items.begin (); item != items.end ();)
910939 {
911- if (currentWidth + item->width > desiredWidth)
940+ if (currentWidth + item->width > desiredWidth && item-> type != StringWhitespace )
912941 {
913942 // Current item is too wide to fit on the current line, will need to start a new line.
914943 auto next = item;
0 commit comments