Skip to content

Commit a2e3aa2

Browse files
committed
Fix wrapping whitespace and limit wrapping to annotation length
1 parent 6d4c667 commit a2e3aa2

File tree

4 files changed

+38
-5
lines changed

4 files changed

+38
-5
lines changed

binaryninjaapi.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14100,6 +14100,7 @@ namespace BinaryNinja {
1410014100
size_t desiredLineLength;
1410114101
size_t minimumContentLength;
1410214102
size_t tabWidth;
14103+
size_t maximumAnnotationLength;
1410314104
std::string languageName;
1410414105
std::string commentStartString;
1410514106
std::string commentEndString;

binaryninjacore.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3618,6 +3618,7 @@ extern "C"
36183618
size_t desiredLineLength;
36193619
size_t minimumContentLength;
36203620
size_t tabWidth;
3621+
size_t maximumAnnotationLength;
36213622
char* languageName;
36223623
char* commentStartString;
36233624
char* commentEndString;

formatter/generic/genericformatter.cpp

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ enum ItemType
1818
StatementSeparator,
1919
StringComponent,
2020
StringSeparator,
21-
StringSpace,
21+
StringWhitespace,
22+
StringNewline,
2223
FormatSpecifier,
2324
EscapeSequence,
2425
Group,
@@ -272,6 +273,14 @@ static vector<InstructionTextToken> ParseStringToken(
272273
result.emplace_back(StringToken, string(src.substr(start, end - start)));
273274
};
274275

276+
// We generally split along spaces while keeping words intact, but some cases have
277+
// specific splitting behavior:
278+
//
279+
// - Any format specifier (starting with %) will be treated as an atom even if embedded
280+
// within a word
281+
// - Any escape sequence will also be treated as an atom
282+
// - We split along punctuation like commas, colons, periods, and semicolons, grouping
283+
// trailing punctuation together.
275284
while (curEnd < tail)
276285
{
277286
char c = src[curEnd];
@@ -332,8 +341,10 @@ static vector<Item> CreateStringGroups(const vector<Item>& items)
332341
bool hasStrings = false;
333342
for (auto& i : items)
334343
{
335-
if (i.type == StringSeparator && !i.tokens.empty())
344+
if ((i.type == StringSeparator) && !i.tokens.empty())
336345
{
346+
// We try to push separators onto a preceding word, otherwise treat as
347+
// a singular atom
337348
if (pending.empty())
338349
{
339350
result.push_back(Item {Atom, {}, {i.tokens}, 0});
@@ -347,6 +358,16 @@ static vector<Item> CreateStringGroups(const vector<Item>& items)
347358
pending.clear();
348359
hasStrings = true;
349360
}
361+
else if (i.type == StringWhitespace)
362+
{
363+
// Special case because we let whitespace trail even if over width
364+
if (!pending.empty())
365+
{
366+
result.push_back(Item {StringComponent, pending, {}, 0});
367+
pending.clear();
368+
}
369+
result.push_back(Item {StringWhitespace, i.items, i.tokens, i.width});
370+
}
350371
else if (i.type == FormatSpecifier || i.type == EscapeSequence)
351372
{
352373
if (!pending.empty())
@@ -795,17 +816,25 @@ vector<DisassemblyTextLine> GenericLineFormatter::FormatLines(
795816
break;
796817
case StringToken:
797818
{
798-
vector<InstructionTextToken> stringTokens = ParseStringToken(token, 512);
819+
vector<InstructionTextToken> stringTokens = ParseStringToken(token, settings.maximumAnnotationLength);
799820
for (size_t k = 0; k < stringTokens.size(); k++)
800821
{
801822
InstructionTextToken subToken = stringTokens[k];
802823
string trimmedSubText = TrimString(subToken.text);
803824
if (trimmedSubText.empty())
804-
items.push_back(Item {StringSeparator, {}, {subToken}, 0});
825+
items.push_back(Item {StringWhitespace, {}, {subToken}, 0});
805826
if (trimmedSubText[0] == '%')
806827
items.push_back(Item {FormatSpecifier, {}, {subToken}, 0});
807828
else if (!trimmedSubText.empty() && trimmedSubText[0] == '\\')
829+
{
830+
if (trimmedSubText.size() > 1)
831+
{
832+
if (trimmedSubText[1] == 'n')
833+
items.push_back(Item {StringNewline, {}, {subToken}, 0});
834+
continue;
835+
}
808836
items.push_back(Item {EscapeSequence, {}, {subToken}, 0});
837+
}
809838
else if (trimmedSubText[0] == ',' || trimmedSubText[0] == '.' || trimmedSubText[0] == ':' || trimmedSubText[0] == ';')
810839
items.push_back(Item {StringSeparator, {}, {subToken}, 0});
811840
else
@@ -908,7 +937,7 @@ vector<DisassemblyTextLine> GenericLineFormatter::FormatLines(
908937

909938
for (auto item = items.begin(); item != items.end();)
910939
{
911-
if (currentWidth + item->width > desiredWidth)
940+
if (currentWidth + item->width > desiredWidth && item->type != StringWhitespace)
912941
{
913942
// Current item is too wide to fit on the current line, will need to start a new line.
914943
auto next = item;

lineformatter.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ LineFormatterSettings LineFormatterSettings::FromAPIObject(const BNLineFormatter
5353
result.desiredLineLength = settings->desiredLineLength;
5454
result.minimumContentLength = settings->minimumContentLength;
5555
result.tabWidth = settings->tabWidth;
56+
result.maximumAnnotationLength = settings->maximumAnnotationLength;
5657
result.languageName = settings->languageName;
5758
result.commentStartString = settings->commentStartString;
5859
result.commentEndString = settings->commentEndString;
@@ -69,6 +70,7 @@ BNLineFormatterSettings LineFormatterSettings::ToAPIObject() const
6970
result.desiredLineLength = desiredLineLength;
7071
result.minimumContentLength = minimumContentLength;
7172
result.tabWidth = tabWidth;
73+
result.maximumAnnotationLength = maximumAnnotationLength;
7274
result.languageName = (char*)languageName.c_str();
7375
result.commentStartString = (char*)commentStartString.c_str();
7476
result.commentEndString = (char*)commentEndString.c_str();

0 commit comments

Comments
 (0)