Skip to content

Commit 9317717

Browse files
committed
Rework Export Trie parser to avoid recursion, improve error checking
1 parent 811fb85 commit 9317717

File tree

3 files changed

+241
-80
lines changed

3 files changed

+241
-80
lines changed

view/macho/machoview.cpp

Lines changed: 149 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -2498,58 +2498,171 @@ bool MachoView::GetSectionPermissions(MachOHeader& header, uint64_t address, uin
24982498
return false;
24992499
}
25002500

2501-
void MachoView::ParseExportTrie(BinaryReader& reader, linkedit_data_command exportTrie)
2501+
2502+
bool MachoView::AddExportTerminalSymbol(
2503+
const std::string& symbolName, uint64_t symbolFlags, uint64_t imageOffset)
25022504
{
2503-
try {
2504-
uint32_t endGuard = exportTrie.datasize;
2505-
DataBuffer buffer = GetParentView()->ReadBuffer(m_universalImageOffset + exportTrie.dataoff, exportTrie.datasize);
2505+
if (symbolFlags & EXPORT_SYMBOL_FLAGS_REEXPORT)
2506+
{
2507+
m_logger->LogTrace("Export symbol is a re-export, not supported: %s", symbolName.c_str());
2508+
return false;
2509+
}
25062510

2507-
ReadExportNode(GetStart(), buffer, "", 0, endGuard);
2511+
uint64_t symbolAddress = GetStart() + imageOffset;
2512+
if (symbolName.empty() || symbolAddress == 0)
2513+
{
2514+
m_logger->LogTrace("Export symbol is malformed: %s", symbolName.c_str());
2515+
return false;
25082516
}
2509-
catch (ReadException&)
2517+
2518+
// Tries to get the symbol type based off the section containing it.
2519+
auto sectionSymbolType = [&]() -> BNSymbolType {
2520+
uint32_t sectionFlags = 0;
2521+
for (const auto& section : m_allSections)
2522+
{
2523+
if (symbolAddress >= section.addr && symbolAddress < section.addr + section.size)
2524+
{
2525+
// Take the flags from the first containing section.
2526+
sectionFlags = section.flags;
2527+
break;
2528+
}
2529+
}
2530+
2531+
// TODO: Is this enough to determine a function symbol?
2532+
// TODO: Might be the cause of https://github.com/Vector35/binaryninja-api/issues/6526
2533+
// Check the sections flags to see if we actually have a function symbol instead.
2534+
if (sectionFlags & S_ATTR_PURE_INSTRUCTIONS || sectionFlags & S_ATTR_SOME_INSTRUCTIONS)
2535+
return FunctionSymbol;
2536+
2537+
// FIXME: See above, no it is not. Fallback on old logic here to avoid breaking export symbols in __text on regular Mach-Os.
2538+
auto symbolType = GetAnalysisFunctionsForAddress(GetStart() + imageOffset).size() ? FunctionSymbol : DataSymbol;
2539+
return symbolType;
2540+
};
2541+
2542+
switch (symbolFlags & EXPORT_SYMBOL_FLAGS_KIND_MASK)
25102543
{
2511-
m_logger->LogError("Error while parsing Export Trie");
2544+
case EXPORT_SYMBOL_FLAGS_KIND_REGULAR:
2545+
case EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL:
2546+
m_logger->LogTrace("Export symbol is a regular or thread local symbol: %d %s", sectionSymbolType(), symbolName.c_str());
2547+
DefineMachoSymbol(sectionSymbolType(), symbolName, symbolAddress, GlobalBinding, false);
2548+
break;
2549+
case EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE:
2550+
m_logger->LogTrace("Export symbol is an absolute symbol: %s", symbolName.c_str());
2551+
DefineMachoSymbol(DataSymbol, symbolName, symbolAddress, GlobalBinding, false);
2552+
break;
2553+
default:
2554+
m_logger->LogWarn("Unhandled export symbol kind: %llx", symbolFlags & EXPORT_SYMBOL_FLAGS_KIND_MASK);
2555+
return false;
25122556
}
2557+
2558+
m_logger->LogTrace("Successfully added export symbol: %s", symbolName.c_str());
2559+
2560+
return true;
25132561
}
25142562

2515-
void MachoView::ReadExportNode(uint64_t viewStart, DataBuffer& buffer, const std::string& currentText, size_t cursor, uint32_t endGuard)
2563+
void MachoView::ParseExportTrie(BinaryReader& reader, linkedit_data_command exportTrie)
25162564
{
2517-
if (cursor > endGuard)
2518-
throw ReadException();
2565+
try {
2566+
uint32_t endGuard = exportTrie.datasize;
2567+
DataBuffer buffer = GetParentView()
2568+
->ReadBuffer(m_universalImageOffset + exportTrie.dataoff, exportTrie.datasize);
25192569

2520-
uint64_t terminalSize = readValidULEB128(buffer, cursor);
2521-
uint64_t childOffset = cursor + terminalSize;
2522-
if (terminalSize != 0) {
2523-
uint64_t imageOffset = 0;
2524-
uint64_t flags = readValidULEB128(buffer, cursor);
2525-
if (!(flags & EXPORT_SYMBOL_FLAGS_REEXPORT))
2570+
struct Node
2571+
{
2572+
uint64_t cursor;
2573+
std::string text;
2574+
};
2575+
std::vector<Node> stack;
2576+
stack.reserve(64);
2577+
stack.push_back({ /* cursor */ 0, /* text */ "" });
2578+
2579+
while (!stack.empty())
25262580
{
2527-
imageOffset = readValidULEB128(buffer, cursor);
2528-
auto symbolType = GetAnalysisFunctionsForAddress(viewStart + imageOffset).size() ? FunctionSymbol : DataSymbol;
2529-
DefineMachoSymbol(symbolType, currentText, imageOffset + viewStart, GlobalBinding, true);
2581+
m_logger->LogTrace("Export Trie: Processing node %s with cursor %llu", stack.back().text.c_str(), stack.back().cursor);
2582+
Node node = std::move(stack.back());
2583+
stack.pop_back();
2584+
2585+
uint64_t cursor = node.cursor;
2586+
const std::string currentText = std::move(node.text);
2587+
2588+
if (cursor > endGuard)
2589+
{
2590+
m_logger->LogError("Export Trie: Cursor left trie during initial bounds check");
2591+
throw ReadException();
2592+
}
2593+
2594+
size_t localCursor = cursor;
2595+
uint64_t terminalSize = readValidULEB128(buffer, localCursor);
2596+
uint64_t childOffset = localCursor + terminalSize;
2597+
2598+
// If there's terminal data, define the symbol
2599+
if (terminalSize != 0)
2600+
{
2601+
uint64_t flags = readValidULEB128(buffer, localCursor);
2602+
uint64_t imageOffset = readValidULEB128(buffer, localCursor);
2603+
m_logger->LogTrace("Export Trie: Found terminal node %s with flags %llx and image offset %llx", currentText.c_str(), flags, imageOffset);
2604+
2605+
AddExportTerminalSymbol(currentText, flags, imageOffset);
2606+
}
2607+
2608+
localCursor = childOffset;
2609+
if (localCursor > endGuard)
2610+
{
2611+
m_logger->LogError("Export Trie: Cursor left trie while moving to child offset");
2612+
throw ReadException();
2613+
}
2614+
2615+
uint8_t childCount = buffer[localCursor++];
2616+
if (localCursor > endGuard)
2617+
{
2618+
m_logger->LogError("Export Trie: Cursor left trie while reading child count");
2619+
throw ReadException();
2620+
}
2621+
2622+
std::vector<Node> children;
2623+
children.reserve(childCount);
2624+
for (uint8_t i = 0; i < childCount; ++i)
2625+
{
2626+
if (localCursor > endGuard)
2627+
{
2628+
m_logger->LogError("Export Trie: Cursor left trie while reading child count");
2629+
throw ReadException();
2630+
}
2631+
2632+
std::string childText;
2633+
while (localCursor <= endGuard && buffer[localCursor] != 0) {
2634+
childText.push_back(buffer[localCursor++]);
2635+
}
2636+
localCursor++; // skip the `\0`
2637+
if (localCursor > endGuard)
2638+
{
2639+
m_logger->LogError("Export Trie: Cursor left trie while reading child text");
2640+
throw ReadException();
2641+
}
2642+
2643+
uint64_t nextOffset = readValidULEB128(buffer, localCursor);
2644+
if (nextOffset == 0)
2645+
{
2646+
m_logger->LogError("Export Trie: Child offset is zero");
2647+
throw ReadException();
2648+
}
2649+
2650+
children.push_back({ nextOffset, currentText + childText });
2651+
}
2652+
2653+
// Push in reverse so that the first child is processed next
2654+
for (auto it = children.rbegin(); it != children.rend(); ++it)
2655+
{
2656+
stack.push_back(*it);
2657+
}
25302658
}
25312659
}
2532-
cursor = childOffset;
2533-
uint8_t childCount = buffer[cursor];
2534-
cursor++;
2535-
if (cursor > endGuard)
2536-
throw ReadException();
2537-
for (uint8_t i = 0; i < childCount; ++i)
2660+
catch (ReadException&)
25382661
{
2539-
std::string childText;
2540-
while (buffer[cursor] != 0 & cursor <= endGuard)
2541-
childText.push_back(buffer[cursor++]);
2542-
cursor++;
2543-
if (cursor > endGuard)
2544-
throw ReadException();
2545-
auto next = readValidULEB128(buffer, cursor);
2546-
if (next == 0)
2547-
throw ReadException();
2548-
ReadExportNode(viewStart, buffer, currentText + childText, next, endGuard);
2662+
m_logger->LogError("Export trie is malformed. Could not load Exported symbol names.");
25492663
}
25502664
}
25512665

2552-
25532666
void MachoView::ParseRebaseTable(BinaryReader& reader, MachOHeader& header, uint32_t tableOffset, uint32_t tableSize)
25542667
{
25552668
if (tableSize == 0 || tableOffset == 0)

view/macho/machoview.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1483,6 +1483,7 @@ namespace BinaryNinja
14831483
void ParseFunctionStarts(Platform* platform, uint64_t textBase, function_starts_command functionStarts);
14841484
bool ParseRelocationEntry(const relocation_info& info, uint64_t start, BNRelocationInfo& result);
14851485

1486+
bool AddExportTerminalSymbol(const std::string& symbolName, uint64_t symbolFlags, uint64_t imageOffset);
14861487
void ParseExportTrie(BinaryReader& reader, linkedit_data_command exportTrie);
14871488
void ReadExportNode(uint64_t viewStart, DataBuffer& buffer, const std::string& currentText,
14881489
size_t cursor, uint32_t endGuard);

view/sharedcache/core/MachO.cpp

Lines changed: 91 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -606,59 +606,106 @@ bool SharedCacheMachOHeader::AddExportTerminalSymbol(
606606
return true;
607607
}
608608

609-
// TODO: This is like 90% of the runtime.
610-
bool SharedCacheMachOHeader::ProcessLinkEditTrie(std::vector<CacheSymbol>& symbols, const std::string& currentText,
611-
const uint8_t* begin, const uint8_t* current, const uint8_t* end) const
609+
std::vector<CacheSymbol> SharedCacheMachOHeader::ReadExportSymbolTrie(VirtualMemory& vm) const
612610
{
613-
if (current >= end)
614-
return false;
611+
// nothing to do if there’s no export‐trie
612+
if (exportTrie.datasize == 0 || exportTrie.dataoff == 0)
613+
return {};
614+
std::vector<CacheSymbol> symbols = {};
615+
try {
616+
auto [begin, end] = vm.ReadSpan(GetLinkEditFileBase() + exportTrie.dataoff, exportTrie.datasize);
617+
const uint8_t *cursor = begin;
615618

616-
uint64_t terminalSize = readValidULEB128(current, end);
617-
const uint8_t* child = current + terminalSize;
619+
struct Node
620+
{
621+
const uint8_t* cursor;
622+
std::string text;
623+
};
624+
std::vector<Node> stack;
625+
stack.reserve(64);
626+
stack.push_back({ /* cursor */ begin, /* text */ "" });
627+
628+
while (!stack.empty())
629+
{
630+
Node node = std::move(stack.back());
631+
stack.pop_back();
618632

619-
// The terminal is an export symbol.
620-
if (terminalSize != 0)
621-
AddExportTerminalSymbol(symbols, currentText, current, end);
633+
cursor = node.cursor;
634+
const std::string currentText = std::move(node.text);
622635

623-
// TODO: Make this look better
624-
current = child;
625-
uint8_t childCount = *current++;
626-
std::string childText = currentText;
627-
for (uint8_t i = 0; i < childCount; ++i)
628-
{
629-
if (current >= end)
630-
return false;
631-
const auto it = std::find(current, end, 0);
632-
childText.append(current, it);
633-
current = it + 1;
634-
if (current >= end)
635-
return false;
636-
const auto next = readValidULEB128(current, end);
637-
if (next == 0)
638-
return false;
639-
if (!ProcessLinkEditTrie(symbols, childText, begin, begin + next, end))
640-
return false;
641-
childText.resize(currentText.size());
642-
}
636+
if (cursor > end)
637+
{
638+
LogError("Export Trie: Cursor left trie during initial bounds check");
639+
throw ReadException();
640+
}
643641

644-
return true;
645-
}
642+
uint64_t terminalSize = readValidULEB128(cursor, end);
643+
const uint8_t* childCursor = cursor + terminalSize;
646644

647-
std::vector<CacheSymbol> SharedCacheMachOHeader::ReadExportSymbolTrie(VirtualMemory& vm) const
648-
{
649-
if (exportTrie.datasize == 0)
650-
return {};
645+
// If there's terminal data, define the symbol
646+
if (terminalSize != 0)
647+
{
648+
AddExportTerminalSymbol(symbols, currentText, cursor, end);
649+
}
651650

652-
uint64_t exportTrieAddress = GetLinkEditFileBase() + exportTrie.dataoff;
653-
std::vector<CacheSymbol> symbols = {};
654-
try
655-
{
656-
auto [begin, end] = vm.ReadSpan(exportTrieAddress, exportTrie.datasize);
657-
ProcessLinkEditTrie(symbols, "", begin, begin, end);
651+
cursor = childCursor;
652+
if (cursor > end)
653+
{
654+
LogError("Export Trie: Cursor left trie while moving to child offset");
655+
throw ReadException();
656+
}
657+
658+
uint8_t childCount = *cursor;
659+
cursor++;
660+
if (cursor > end)
661+
{
662+
LogError("Export Trie: Cursor left trie while reading child count");
663+
throw ReadException();
664+
}
665+
666+
std::vector<Node> children;
667+
children.reserve(childCount);
668+
for (uint8_t i = 0; i < childCount; ++i)
669+
{
670+
if (cursor > end)
671+
{
672+
LogError("Export Trie: Cursor left trie while reading children");
673+
throw ReadException();
674+
}
675+
676+
std::string childText;
677+
while (cursor <= end && *cursor != 0) {
678+
childText.push_back(*cursor);
679+
cursor++;
680+
}
681+
cursor++; // skip the `\0`
682+
if (cursor > end)
683+
{
684+
LogError("Export Trie: Cursor left trie while reading child text");
685+
throw ReadException();
686+
}
687+
688+
uint64_t nextOffset = readValidULEB128(cursor, end);
689+
if (nextOffset == 0)
690+
{
691+
LogError("Export Trie: Child offset is zero");
692+
throw ReadException();
693+
}
694+
695+
children.push_back({ begin + nextOffset, currentText + childText });
696+
}
697+
698+
// Push in reverse so that the first child is processed next
699+
for (auto it = children.rbegin(); it != children.rend(); ++it)
700+
{
701+
stack.push_back(*it);
702+
}
703+
}
658704
}
659-
catch (std::exception& e)
705+
catch (ReadException&)
660706
{
661-
BNLogError("Failed to read Export Trie: %s", e.what());
707+
LogError("Export trie is malformed. Could not load Exported symbol names.");
662708
}
709+
663710
return symbols;
664711
}

0 commit comments

Comments
 (0)