Skip to content

Commit 7b1acf1

Browse files
committed
Add preliminary support for string wrapping arguments
1 parent f373878 commit 7b1acf1

File tree

1 file changed

+226
-21
lines changed

1 file changed

+226
-21
lines changed

formatter/generic/genericformatter.cpp

Lines changed: 226 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@ enum ItemType
1616
ArgumentSeparator,
1717
Statement,
1818
StatementSeparator,
19+
StringComponent,
20+
StringSeparator,
21+
FormatSpecifier,
22+
EscapeSequence,
1923
Group,
2024
Container,
2125
StartOfContainer,
@@ -250,6 +254,184 @@ static vector<Item> CreateStatementItems(const vector<Item>& items)
250254
return result;
251255
}
252256

257+
static vector<InstructionTextToken> SeparateStringTokens(
258+
const InstructionTextToken& unprocessedStringToken
259+
)
260+
{
261+
// Takes a StringToken and breaks it into sub-StringTokens along boundaries of punctuation
262+
// and spaces
263+
//
264+
// Ex.
265+
// "this.that" -> {"this", ".", "that"}
266+
// "format%llxsomething" -> {"format", "%llx", "something"}
267+
// "meep\n"moop" -> {"meep", "\n", "moop"}
268+
269+
vector<InstructionTextToken> result;
270+
string current;
271+
size_t i = 0;
272+
while (i < unprocessedStringToken.text.size())
273+
{
274+
char c = unprocessedStringToken.text[i];
275+
276+
// Handle format specifiers
277+
if (c == '%')
278+
{
279+
if (!current.empty())
280+
{
281+
result.push_back(InstructionTextToken(StringToken, current));
282+
current.clear();
283+
}
284+
285+
string format = "%";
286+
i++;
287+
while (i < unprocessedStringToken.text.size())
288+
{
289+
c = unprocessedStringToken.text[i];
290+
if (!isalnum(c) && c != '.' && c != '-')
291+
break;
292+
format += c;
293+
i++;
294+
}
295+
result.push_back(InstructionTextToken(StringToken, format));
296+
continue;
297+
}
298+
299+
// Handle escape sequences
300+
if (c == '\\')
301+
{
302+
if (!current.empty())
303+
{
304+
result.push_back(InstructionTextToken(StringToken, current));
305+
current.clear();
306+
}
307+
308+
string escape = "\\";
309+
if (i + 1 < unprocessedStringToken.text.size())
310+
{
311+
escape += unprocessedStringToken.text[i + 1];
312+
i += 2;
313+
}
314+
else
315+
i++;
316+
result.push_back(InstructionTextToken(StringToken, escape));
317+
continue;
318+
}
319+
320+
// Handle punctuation and spaces
321+
if (c == ',' || c == '.' || c == ':' || c == ';')
322+
{
323+
if (!current.empty())
324+
{
325+
result.push_back(InstructionTextToken(StringToken, current));
326+
current.clear();
327+
}
328+
329+
string repeated;
330+
repeated += c;
331+
while (i + 1 < unprocessedStringToken.text.size())
332+
{
333+
char next = unprocessedStringToken.text[i + 1];
334+
if (next == ',' || next == '.' || next == ':' || next == ';')
335+
{
336+
repeated += next;
337+
i++;
338+
}
339+
else
340+
break;
341+
}
342+
result.push_back(InstructionTextToken(StringToken, repeated));
343+
}
344+
else if (isspace(c))
345+
{
346+
if (!current.empty())
347+
{
348+
result.push_back(InstructionTextToken(StringToken, current));
349+
current.clear();
350+
}
351+
352+
string repeated;
353+
repeated += c;
354+
while (i + 1 < unprocessedStringToken.text.size())
355+
{
356+
char next = unprocessedStringToken.text[i + 1];
357+
if (isspace(next))
358+
{
359+
repeated += next;
360+
i++;
361+
}
362+
else
363+
break;
364+
}
365+
result.push_back(InstructionTextToken(StringToken, repeated));
366+
}
367+
else
368+
{
369+
current += c;
370+
}
371+
i++;
372+
}
373+
374+
if (!current.empty())
375+
result.push_back(InstructionTextToken(StringToken, current));
376+
377+
return result;
378+
}
379+
380+
static vector<Item> CreateStringGroups(const vector<Item>& items)
381+
{
382+
vector<Item> result, pending;
383+
bool hasStrings = false;
384+
for (auto& i : items)
385+
{
386+
if (i.type == StringSeparator && !i.tokens.empty())
387+
{
388+
if (pending.empty())
389+
{
390+
result.push_back(Item {Atom, {}, {i.tokens}, 0});
391+
}
392+
else
393+
{
394+
for (auto& j : i.tokens)
395+
pending.back().AddTokenToLastAtom(j);
396+
result.push_back(Item {StringComponent, pending, {}, 0});
397+
}
398+
pending.clear();
399+
hasStrings = true;
400+
}
401+
else if (i.type == FormatSpecifier || i.type == EscapeSequence)
402+
{
403+
if (!pending.empty())
404+
{
405+
result.push_back(Item {StringComponent, pending, {}, 0 });
406+
pending.clear();
407+
}
408+
result.push_back(Item { Atom, i.items, i.tokens, i.width});
409+
}
410+
else if (i.type == StartOfContainer && pending.empty())
411+
{
412+
result.push_back(i);
413+
}
414+
else if (i.type == EndOfContainer && hasStrings && !pending.empty())
415+
{
416+
result.push_back(Item {StringComponent, pending, {}, 0});
417+
result.push_back(i);
418+
}
419+
else
420+
{
421+
pending.push_back(Item {i.type, CreateStringGroups(i.items), i.tokens, 0});
422+
}
423+
}
424+
425+
if (!pending.empty())
426+
{
427+
if (hasStrings)
428+
result.push_back(Item {StringComponent, pending, {}, 0});
429+
else
430+
result.insert(result.end(), pending.begin(), pending.end());
431+
}
432+
433+
return result;
434+
}
253435

254436
static vector<Item> CreateAssignmentOperatorGroups(const vector<Item>& items)
255437
{
@@ -576,8 +758,7 @@ vector<DisassemblyTextLine> GenericLineFormatter::FormatLines(
576758
size_t tokenIndex = indentationTokens.size();
577759

578760
// First break the line down into nested container items. A container is anything between a pair of
579-
// BraceTokens (except for strings, where the entire string, including the quotes, are treated as
580-
// a single atom).
761+
// BraceTokens
581762
vector<Item> items;
582763
stack<vector<Item>> itemStack;
583764
for (; tokenIndex < currentLine.tokens.size(); tokenIndex++)
@@ -591,26 +772,26 @@ vector<DisassemblyTextLine> GenericLineFormatter::FormatLines(
591772
if (tokenIndex + 1 < currentLine.tokens.size()
592773
&& currentLine.tokens[tokenIndex + 1].type == StringToken)
593774
{
594-
// Treat string tokens surrounded by brace tokens as a unit (this is usually the quotes
595-
// surrounding the string)
596-
Item atom;
597-
atom.type = Atom;
598-
atom.tokens.push_back(token);
599-
atom.tokens.push_back(currentLine.tokens[tokenIndex + 1]);
600-
atom.width = 0;
601-
tokenIndex++;
602-
if (tokenIndex + 1 < currentLine.tokens.size()
603-
&& currentLine.tokens[tokenIndex + 1].type == BraceToken)
604-
{
605-
atom.tokens.push_back(currentLine.tokens[tokenIndex + 1]);
606-
tokenIndex++;
607-
}
775+
// Create a ContainerContents item and place it onto the item stack. This will hold anything
776+
// inside the container once the end of the container is found.
777+
items.push_back(Item {Container, {}, {}, 0});
778+
itemStack.push(items);
608779

609-
items.push_back(atom);
610-
break;
780+
// Starting a new context
781+
items.clear();
782+
items.push_back(Item {StartOfContainer, {}, {token}, 0});
611783
}
612-
613-
if (trimmedText == "(" || trimmedText == "[" || trimmedText == "{")
784+
// Check for end of string - gross!
785+
else if (currentLine.tokens[tokenIndex].type == StringToken
786+
&& tokenIndex + 1 < currentLine.tokens.size()
787+
&& currentLine.tokens[tokenIndex + 1].type == BraceToken)
788+
{
789+
// Create a ContainerContents item and place it onto the item stack. This will hold anything
790+
// inside the container once the end of the container is found.
791+
items.push_back(Item {Container, {}, {}, 0});
792+
itemStack.push(items);
793+
}
794+
else if (trimmedText == "(" || trimmedText == "[" || trimmedText == "{")
614795
{
615796
// Create a ContainerContents item and place it onto the item stack. This will hold anything
616797
// inside the container once the end of the container is found.
@@ -663,6 +844,26 @@ vector<DisassemblyTextLine> GenericLineFormatter::FormatLines(
663844
else
664845
items.push_back(Item {Operator, {}, {token}, 0});
665846
break;
847+
case StringToken:
848+
{
849+
vector<InstructionTextToken> stringTokens = SeparateStringTokens(token);
850+
for (size_t k = 0; k < stringTokens.size(); k++)
851+
{
852+
InstructionTextToken subToken = stringTokens[k];
853+
string trimmedSubText = TrimString(subToken.text);
854+
if (trimmedSubText.empty())
855+
items.push_back(Item {StringSeparator, {}, {subToken}, 0});
856+
if (trimmedSubText[0] == '%')
857+
items.push_back(Item {FormatSpecifier, {}, {subToken}, 0});
858+
else if (!trimmedSubText.empty() && trimmedSubText[0] == '\\')
859+
items.push_back(Item {EscapeSequence, {}, {subToken}, 0});
860+
else if (trimmedSubText[0] == ',' || trimmedSubText[0] == '.' || trimmedSubText[0] == ':' || trimmedSubText[0] == ';')
861+
items.push_back(Item {StringSeparator, {}, {subToken}, 0});
862+
else
863+
items.push_back(Item {Atom, {}, {subToken}, 0});
864+
}
865+
break;
866+
}
666867
default:
667868
items.push_back(Item {Atom, {}, {token}, 0});
668869
break;
@@ -699,6 +900,10 @@ vector<DisassemblyTextLine> GenericLineFormatter::FormatLines(
699900
// the previous atom.
700901
items = RelocateStartAndEndOfContainerItems(items);
701902

903+
// Create internal groupings for displaying strings -- grouping items by punctuation, format specifiers, and
904+
// escape sequences
905+
items = CreateStringGroups(items);
906+
702907
// Now that items are done, compute widths for layout
703908
for (auto& j : items)
704909
j.CalculateWidth();
@@ -765,7 +970,7 @@ vector<DisassemblyTextLine> GenericLineFormatter::FormatLines(
765970
// is a container, always use the splitting behavior.
766971
if (currentWidth == 0 || item->width > desiredContinuationWidth || item->type == Container)
767972
{
768-
if (item->type == Argument && currentWidth != 0)
973+
if ((item->type == Argument || item->type == StringComponent) && currentWidth != 0)
769974
{
770975
// If an argument is too wide to show on a single line all by itself, start the argument
771976
// on a new line, and add additional indentation for the continuation of the argument.

0 commit comments

Comments
 (0)