@@ -16,6 +16,10 @@ enum ItemType
1616 ArgumentSeparator,
1717 Statement,
1818 StatementSeparator,
19+ StringComponent,
20+ StringSeparator,
21+ FormatSpecifier,
22+ EscapeSequence,
1923 Group,
2024 Container,
2125 StartOfContainer,
@@ -250,6 +254,184 @@ static vector<Item> CreateStatementItems(const vector<Item>& items)
250254 return result;
251255}
252256
257+ static vector<InstructionTextToken> SeparateStringTokens (
258+ const InstructionTextToken& unprocessedStringToken
259+ )
260+ {
261+ // Takes a StringToken and breaks it into sub-StringTokens along boundaries of punctuation
262+ // and spaces
263+ //
264+ // Ex.
265+ // "this.that" -> {"this", ".", "that"}
266+ // "format%llxsomething" -> {"format", "%llx", "something"}
267+ // "meep\n"moop" -> {"meep", "\n", "moop"}
268+
269+ vector<InstructionTextToken> result;
270+ string current;
271+ size_t i = 0 ;
272+ while (i < unprocessedStringToken.text .size ())
273+ {
274+ char c = unprocessedStringToken.text [i];
275+
276+ // Handle format specifiers
277+ if (c == ' %' )
278+ {
279+ if (!current.empty ())
280+ {
281+ result.push_back (InstructionTextToken (StringToken, current));
282+ current.clear ();
283+ }
284+
285+ string format = " %" ;
286+ i++;
287+ while (i < unprocessedStringToken.text .size ())
288+ {
289+ c = unprocessedStringToken.text [i];
290+ if (!isalnum (c) && c != ' .' && c != ' -' )
291+ break ;
292+ format += c;
293+ i++;
294+ }
295+ result.push_back (InstructionTextToken (StringToken, format));
296+ continue ;
297+ }
298+
299+ // Handle escape sequences
300+ if (c == ' \\ ' )
301+ {
302+ if (!current.empty ())
303+ {
304+ result.push_back (InstructionTextToken (StringToken, current));
305+ current.clear ();
306+ }
307+
308+ string escape = " \\ " ;
309+ if (i + 1 < unprocessedStringToken.text .size ())
310+ {
311+ escape += unprocessedStringToken.text [i + 1 ];
312+ i += 2 ;
313+ }
314+ else
315+ i++;
316+ result.push_back (InstructionTextToken (StringToken, escape));
317+ continue ;
318+ }
319+
320+ // Handle punctuation and spaces
321+ if (c == ' ,' || c == ' .' || c == ' :' || c == ' ;' )
322+ {
323+ if (!current.empty ())
324+ {
325+ result.push_back (InstructionTextToken (StringToken, current));
326+ current.clear ();
327+ }
328+
329+ string repeated;
330+ repeated += c;
331+ while (i + 1 < unprocessedStringToken.text .size ())
332+ {
333+ char next = unprocessedStringToken.text [i + 1 ];
334+ if (next == ' ,' || next == ' .' || next == ' :' || next == ' ;' )
335+ {
336+ repeated += next;
337+ i++;
338+ }
339+ else
340+ break ;
341+ }
342+ result.push_back (InstructionTextToken (StringToken, repeated));
343+ }
344+ else if (isspace (c))
345+ {
346+ if (!current.empty ())
347+ {
348+ result.push_back (InstructionTextToken (StringToken, current));
349+ current.clear ();
350+ }
351+
352+ string repeated;
353+ repeated += c;
354+ while (i + 1 < unprocessedStringToken.text .size ())
355+ {
356+ char next = unprocessedStringToken.text [i + 1 ];
357+ if (isspace (next))
358+ {
359+ repeated += next;
360+ i++;
361+ }
362+ else
363+ break ;
364+ }
365+ result.push_back (InstructionTextToken (StringToken, repeated));
366+ }
367+ else
368+ {
369+ current += c;
370+ }
371+ i++;
372+ }
373+
374+ if (!current.empty ())
375+ result.push_back (InstructionTextToken (StringToken, current));
376+
377+ return result;
378+ }
379+
380+ static vector<Item> CreateStringGroups (const vector<Item>& items)
381+ {
382+ vector<Item> result, pending;
383+ bool hasStrings = false ;
384+ for (auto & i : items)
385+ {
386+ if (i.type == StringSeparator && !i.tokens .empty ())
387+ {
388+ if (pending.empty ())
389+ {
390+ result.push_back (Item {Atom, {}, {i.tokens }, 0 });
391+ }
392+ else
393+ {
394+ for (auto & j : i.tokens )
395+ pending.back ().AddTokenToLastAtom (j);
396+ result.push_back (Item {StringComponent, pending, {}, 0 });
397+ }
398+ pending.clear ();
399+ hasStrings = true ;
400+ }
401+ else if (i.type == FormatSpecifier || i.type == EscapeSequence)
402+ {
403+ if (!pending.empty ())
404+ {
405+ result.push_back (Item {StringComponent, pending, {}, 0 });
406+ pending.clear ();
407+ }
408+ result.push_back (Item { Atom, i.items , i.tokens , i.width });
409+ }
410+ else if (i.type == StartOfContainer && pending.empty ())
411+ {
412+ result.push_back (i);
413+ }
414+ else if (i.type == EndOfContainer && hasStrings && !pending.empty ())
415+ {
416+ result.push_back (Item {StringComponent, pending, {}, 0 });
417+ result.push_back (i);
418+ }
419+ else
420+ {
421+ pending.push_back (Item {i.type , CreateStringGroups (i.items ), i.tokens , 0 });
422+ }
423+ }
424+
425+ if (!pending.empty ())
426+ {
427+ if (hasStrings)
428+ result.push_back (Item {StringComponent, pending, {}, 0 });
429+ else
430+ result.insert (result.end (), pending.begin (), pending.end ());
431+ }
432+
433+ return result;
434+ }
253435
254436static vector<Item> CreateAssignmentOperatorGroups (const vector<Item>& items)
255437{
@@ -576,8 +758,7 @@ vector<DisassemblyTextLine> GenericLineFormatter::FormatLines(
576758 size_t tokenIndex = indentationTokens.size ();
577759
578760 // First break the line down into nested container items. A container is anything between a pair of
579- // BraceTokens (except for strings, where the entire string, including the quotes, are treated as
580- // a single atom).
761+ // BraceTokens
581762 vector<Item> items;
582763 stack<vector<Item>> itemStack;
583764 for (; tokenIndex < currentLine.tokens .size (); tokenIndex++)
@@ -591,26 +772,26 @@ vector<DisassemblyTextLine> GenericLineFormatter::FormatLines(
591772 if (tokenIndex + 1 < currentLine.tokens .size ()
592773 && currentLine.tokens [tokenIndex + 1 ].type == StringToken)
593774 {
594- // Treat string tokens surrounded by brace tokens as a unit (this is usually the quotes
595- // surrounding the string)
596- Item atom;
597- atom.type = Atom;
598- atom.tokens .push_back (token);
599- atom.tokens .push_back (currentLine.tokens [tokenIndex + 1 ]);
600- atom.width = 0 ;
601- tokenIndex++;
602- if (tokenIndex + 1 < currentLine.tokens .size ()
603- && currentLine.tokens [tokenIndex + 1 ].type == BraceToken)
604- {
605- atom.tokens .push_back (currentLine.tokens [tokenIndex + 1 ]);
606- tokenIndex++;
607- }
775+ // Create a ContainerContents item and place it onto the item stack. This will hold anything
776+ // inside the container once the end of the container is found.
777+ items.push_back (Item {Container, {}, {}, 0 });
778+ itemStack.push (items);
608779
609- items.push_back (atom);
610- break ;
780+ // Starting a new context
781+ items.clear ();
782+ items.push_back (Item {StartOfContainer, {}, {token}, 0 });
611783 }
612-
613- if (trimmedText == " (" || trimmedText == " [" || trimmedText == " {" )
784+ // Check for end of string - gross!
785+ else if (currentLine.tokens [tokenIndex].type == StringToken
786+ && tokenIndex + 1 < currentLine.tokens .size ()
787+ && currentLine.tokens [tokenIndex + 1 ].type == BraceToken)
788+ {
789+ // Create a ContainerContents item and place it onto the item stack. This will hold anything
790+ // inside the container once the end of the container is found.
791+ items.push_back (Item {Container, {}, {}, 0 });
792+ itemStack.push (items);
793+ }
794+ else if (trimmedText == " (" || trimmedText == " [" || trimmedText == " {" )
614795 {
615796 // Create a ContainerContents item and place it onto the item stack. This will hold anything
616797 // inside the container once the end of the container is found.
@@ -663,6 +844,26 @@ vector<DisassemblyTextLine> GenericLineFormatter::FormatLines(
663844 else
664845 items.push_back (Item {Operator, {}, {token}, 0 });
665846 break ;
847+ case StringToken:
848+ {
849+ vector<InstructionTextToken> stringTokens = SeparateStringTokens (token);
850+ for (size_t k = 0 ; k < stringTokens.size (); k++)
851+ {
852+ InstructionTextToken subToken = stringTokens[k];
853+ string trimmedSubText = TrimString (subToken.text );
854+ if (trimmedSubText.empty ())
855+ items.push_back (Item {StringSeparator, {}, {subToken}, 0 });
856+ if (trimmedSubText[0 ] == ' %' )
857+ items.push_back (Item {FormatSpecifier, {}, {subToken}, 0 });
858+ else if (!trimmedSubText.empty () && trimmedSubText[0 ] == ' \\ ' )
859+ items.push_back (Item {EscapeSequence, {}, {subToken}, 0 });
860+ else if (trimmedSubText[0 ] == ' ,' || trimmedSubText[0 ] == ' .' || trimmedSubText[0 ] == ' :' || trimmedSubText[0 ] == ' ;' )
861+ items.push_back (Item {StringSeparator, {}, {subToken}, 0 });
862+ else
863+ items.push_back (Item {Atom, {}, {subToken}, 0 });
864+ }
865+ break ;
866+ }
666867 default :
667868 items.push_back (Item {Atom, {}, {token}, 0 });
668869 break ;
@@ -699,6 +900,10 @@ vector<DisassemblyTextLine> GenericLineFormatter::FormatLines(
699900 // the previous atom.
700901 items = RelocateStartAndEndOfContainerItems (items);
701902
903+ // Create internal groupings for displaying strings -- grouping items by punctuation, format specifiers, and
904+ // escape sequences
905+ items = CreateStringGroups (items);
906+
702907 // Now that items are done, compute widths for layout
703908 for (auto & j : items)
704909 j.CalculateWidth ();
@@ -765,7 +970,7 @@ vector<DisassemblyTextLine> GenericLineFormatter::FormatLines(
765970 // is a container, always use the splitting behavior.
766971 if (currentWidth == 0 || item->width > desiredContinuationWidth || item->type == Container)
767972 {
768- if (item->type == Argument && currentWidth != 0 )
973+ if (( item->type == Argument || item-> type == StringComponent) && currentWidth != 0 )
769974 {
770975 // If an argument is too wide to show on a single line all by itself, start the argument
771976 // on a new line, and add additional indentation for the continuation of the argument.
0 commit comments