@@ -19,7 +19,6 @@ enum ItemType
1919 StringComponent,
2020 StringSeparator,
2121 StringWhitespace,
22- StringNewline,
2322 FormatSpecifier,
2423 EscapeSequence,
2524 Group,
@@ -263,16 +262,26 @@ static vector<InstructionTextToken> ParseStringToken(
263262 const auto & src = unprocessedStringToken.text ;
264263 const size_t tail = src.size ();
265264
266- // Max parsing length set for performance reasons, increase at your own peril!
265+ // Max parsing length set to max annotation length
267266 if (tail > maxParsingLength)
268267 return { unprocessedStringToken };
269-
270268 vector<InstructionTextToken> result;
271269 size_t curStart = 0 , curEnd = 0 ;
270+
272271 auto ConstructToken = [&](size_t start, size_t end) {
273- result.emplace_back (StringToken, string (src.substr (start, end - start)));
272+ InstructionTextToken token = unprocessedStringToken;
273+ const string newTxt = string (src.substr (start, end - start));
274+ token.text = newTxt;
275+ token.width = newTxt.size ();
276+ result.emplace_back (token);
274277 };
275278
279+ auto flushToken = [&](size_t start, size_t end)
280+ {
281+ if (start < end)
282+ ConstructToken (start, end);
283+ };
284+
276285 // We generally split along spaces while keeping words intact, but some cases have
277286 // specific splitting behavior:
278287 //
@@ -288,8 +297,7 @@ static vector<InstructionTextToken> ParseStringToken(
288297 if (c == ' %' )
289298 {
290299 // Flush before format specifier
291- if (curStart < curEnd)
292- ConstructToken (curStart, curEnd);
300+ flushToken (curStart, curEnd);
293301
294302 size_t start = curEnd;
295303 curEnd++;
@@ -301,8 +309,7 @@ static vector<InstructionTextToken> ParseStringToken(
301309 else if (c == ' \\ ' )
302310 {
303311 // Flush before escape sequence
304- if (curStart < curEnd)
305- ConstructToken (curStart, curEnd);
312+ flushToken (curStart, curEnd);
306313
307314 size_t start = curEnd;
308315 curEnd++; // consume '\'
@@ -314,8 +321,8 @@ static vector<InstructionTextToken> ParseStringToken(
314321 else if (c == ' ,' || c == ' .' || c == ' :' || c == ' ;' || isspace (c))
315322 {
316323 // Flush before punctuation
317- if (curStart < curEnd)
318- ConstructToken (curStart, curEnd);
324+ flushToken (curStart, curEnd);
325+
319326 // Group together repeated punctuation
320327 size_t start = curEnd;
321328 while (curEnd < tail && src[curEnd] == c)
@@ -329,9 +336,7 @@ static vector<InstructionTextToken> ParseStringToken(
329336 }
330337 }
331338
332- if (curStart < curEnd)
333- ConstructToken (curStart, curEnd);
334-
339+ flushToken (curStart, curEnd);
335340 return result;
336341}
337342
@@ -341,7 +346,7 @@ static vector<Item> CreateStringGroups(const vector<Item>& items)
341346 bool hasStrings = false ;
342347 for (auto & i : items)
343348 {
344- if (( i.type == StringSeparator) && !i.tokens .empty ())
349+ if (i.type == StringSeparator && !i.tokens .empty ())
345350 {
346351 // We try to push separators onto a preceding word, otherwise treat as
347352 // a singular atom
@@ -370,14 +375,17 @@ static vector<Item> CreateStringGroups(const vector<Item>& items)
370375 }
371376 else if (i.type == FormatSpecifier || i.type == EscapeSequence)
372377 {
378+ // Flush previous tokens before special sequences like format specifiers or
379+ // escape sequences
373380 if (!pending.empty ())
374381 {
375382 result.push_back (Item {StringComponent, pending, {}, 0 });
376383 pending.clear ();
377384 }
378385 result.push_back (Item { Atom, i.items , i.tokens , i.width });
379386 }
380- else if (i.type == StartOfContainer && pending.empty ())
387+
388+ if (i.type == StartOfContainer && pending.empty ())
381389 {
382390 result.push_back (i);
383391 }
@@ -739,6 +747,7 @@ vector<DisassemblyTextLine> GenericLineFormatter::FormatLines(
739747 switch (token.type )
740748 {
741749 case BraceToken:
750+ // Beginning of string
742751 if (tokenIndex + 1 < currentLine.tokens .size ()
743752 && currentLine.tokens [tokenIndex + 1 ].type == StringToken)
744753 {
@@ -751,7 +760,7 @@ vector<DisassemblyTextLine> GenericLineFormatter::FormatLines(
751760 items.clear ();
752761 items.push_back (Item {StartOfContainer, {}, {token}, 0 });
753762 }
754- // Check for end of string - gross!
763+ // End of string
755764 else if (currentLine.tokens [tokenIndex].type == StringToken
756765 && tokenIndex + 1 < currentLine.tokens .size ()
757766 && currentLine.tokens [tokenIndex + 1 ].type == BraceToken)
@@ -817,24 +826,15 @@ vector<DisassemblyTextLine> GenericLineFormatter::FormatLines(
817826 case StringToken:
818827 {
819828 vector<InstructionTextToken> stringTokens = ParseStringToken (token, settings.maximumAnnotationLength );
820- for (size_t k = 0 ; k < stringTokens. size (); k++ )
829+ for (auto subToken : stringTokens)
821830 {
822- InstructionTextToken subToken = stringTokens[k];
823- string trimmedSubText = TrimString (subToken.text );
831+ string trimmedSubText = TrimString (subToken.text );
824832 if (trimmedSubText.empty ())
825833 items.push_back (Item {StringWhitespace, {}, {subToken}, 0 });
826834 if (trimmedSubText[0 ] == ' %' )
827835 items.push_back (Item {FormatSpecifier, {}, {subToken}, 0 });
828836 else if (!trimmedSubText.empty () && trimmedSubText[0 ] == ' \\ ' )
829- {
830- if (trimmedSubText.size () > 1 )
831- {
832- if (trimmedSubText[1 ] == ' n' )
833- items.push_back (Item {StringNewline, {}, {subToken}, 0 });
834- continue ;
835- }
836837 items.push_back (Item {EscapeSequence, {}, {subToken}, 0 });
837- }
838838 else if (trimmedSubText[0 ] == ' ,' || trimmedSubText[0 ] == ' .' || trimmedSubText[0 ] == ' :' || trimmedSubText[0 ] == ' ;' )
839839 items.push_back (Item {StringSeparator, {}, {subToken}, 0 });
840840 else
@@ -937,9 +937,16 @@ vector<DisassemblyTextLine> GenericLineFormatter::FormatLines(
937937
938938 for (auto item = items.begin (); item != items.end ();)
939939 {
940- if (currentWidth + item->width > desiredWidth && item->type != StringWhitespace)
940+ if (item->type == StringComponent && currentWidth + item->width > desiredWidth)
941+ {
942+ // If a string is too wide to fit on the current line, create a newline
943+ // without additional indentation
944+ newLine ();
945+ }
946+ else if (currentWidth + item->width > desiredWidth && item->type != StringWhitespace)
941947 {
942948 // Current item is too wide to fit on the current line, will need to start a new line.
949+ // Whitespace is allowed to be too wide; we push it on as the preceding word is wrapped.
943950 auto next = item;
944951 ++next;
945952
@@ -948,7 +955,7 @@ vector<DisassemblyTextLine> GenericLineFormatter::FormatLines(
948955 // is a container, always use the splitting behavior.
949956 if (currentWidth == 0 || item->width > desiredContinuationWidth || item->type == Container)
950957 {
951- if (( item->type == Argument || item-> type == StringComponent) && currentWidth != 0 )
958+ if (item->type == Argument && currentWidth != 0 )
952959 {
953960 // If an argument is too wide to show on a single line all by itself, start the argument
954961 // on a new line, and add additional indentation for the continuation of the argument.
0 commit comments