Skip to content

Commit ea47d4c

Browse files
committed
[SharedCache] Apply .symbols file information when applying an image
This improves symbol recovery drastically on newer shared caches Related PR: #6210
1 parent 83dc0ef commit ea47d4c

File tree

8 files changed

+93
-46
lines changed

8 files changed

+93
-46
lines changed

view/sharedcache/core/MachO.cpp

Lines changed: 10 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -457,32 +457,19 @@ std::optional<SharedCacheMachOHeader> SharedCacheMachOHeader::ParseHeaderForAddr
457457
return header;
458458
}
459459

460-
// TODO: Support reading from .symbols file.
461-
// TODO: Replace view with address size?
462-
std::vector<CacheSymbol> SharedCacheMachOHeader::ReadSymbolTable(BinaryView& view, VirtualMemory& vm) const
460+
std::vector<CacheSymbol> SharedCacheMachOHeader::ReadSymbolTable(VirtualMemory& vm, const TableInfo &symbolInfo, const TableInfo &stringInfo) const
463461
{
464-
auto addressSize = view.GetAddressSize();
465-
// NOTE: The symbol table will exist within the link edit segment, the table offsets are relative to the file not
466-
// the linkedit segment.
467-
uint64_t symbolsAddress = GetLinkEditFileBase() + symtab.symoff;
468-
uint64_t stringsAddress = GetLinkEditFileBase() + symtab.stroff;
469-
470-
// TODO: This needs to be passed in as an optional argument.
471-
// TODO: Sometimes symbol tables are shared and we have to offset into the table for a specific header.
472-
// TODO: The "shared" symbol tables are stored in .symbols files.
473-
int nlistStartIndex = 0;
474-
475462
std::vector<CacheSymbol> symbolList;
476-
for (uint64_t i = 0; i < symtab.nsyms; i++)
463+
// TODO: This assumes that 95% (or more) are going to be added.
464+
symbolList.reserve(symbolInfo.entries);
465+
for (uint64_t entryIndex = 0; entryIndex < symbolInfo.entries; entryIndex++)
477466
{
478-
uint64_t entryIndex = (nlistStartIndex + i);
479-
480467
nlist_64 nlist = {};
481-
if (addressSize == 4)
468+
if (vm.GetAddressSize() == 4)
482469
{
483470
// 32-bit DSC
484471
struct nlist nlist32 = {};
485-
vm.Read(&nlist, symbolsAddress + (entryIndex * sizeof(nlist32)), sizeof(nlist32));
472+
vm.Read(&nlist, symbolInfo.address + (entryIndex * sizeof(nlist32)), sizeof(nlist32));
486473
nlist.n_strx = nlist32.n_strx;
487474
nlist.n_type = nlist32.n_type;
488475
nlist.n_sect = nlist32.n_sect;
@@ -492,24 +479,24 @@ std::vector<CacheSymbol> SharedCacheMachOHeader::ReadSymbolTable(BinaryView& vie
492479
else
493480
{
494481
// 64-bit DSC
495-
vm.Read(&nlist, symbolsAddress + (entryIndex * sizeof(nlist)), sizeof(nlist));
482+
vm.Read(&nlist, symbolInfo.address + (entryIndex * sizeof(nlist)), sizeof(nlist));
496483
}
497484

498485
auto symbolAddress = nlist.n_value;
499486
if (((nlist.n_type & N_TYPE) == N_INDR) || symbolAddress == 0)
500487
continue;
501488

502-
if (nlist.n_strx >= symtab.strsize)
489+
if (nlist.n_strx >= stringInfo.entries)
503490
{
504491
// TODO: where logger?
505492
LogError(
506493
"Symbol entry at index %llu has a string offset of %u which is outside the strings buffer of size %u "
507494
"for symbol table %x",
508-
entryIndex, nlist.n_strx, symtab.strsize, symtab.stroff);
495+
entryIndex, nlist.n_strx, stringInfo.address, stringInfo.entries);
509496
continue;
510497
}
511498

512-
std::string symbolName = vm.ReadCString(stringsAddress + nlist.n_strx);
499+
std::string symbolName = vm.ReadCString(stringInfo.address + nlist.n_strx);
513500
if (symbolName == "<redacted>")
514501
continue;
515502

view/sharedcache/core/MachO.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,15 @@
77

88
struct CacheSymbol;
99

10+
// Used when reading symbol/string table info.
11+
struct TableInfo
12+
{
13+
// VM address where the reading will begin.
14+
uint64_t address;
15+
// Number of entries in the table.
16+
uint32_t entries;
17+
};
18+
1019
struct SharedCacheMachOHeader
1120
{
1221
uint64_t textBase = 0;
@@ -61,8 +70,7 @@ struct SharedCacheMachOHeader
6170
static std::optional<SharedCacheMachOHeader> ParseHeaderForAddress(
6271
std::shared_ptr<VirtualMemory> vm, uint64_t address, const std::string& imagePath);
6372

64-
// TODO: Replace view with address size?
65-
std::vector<CacheSymbol> ReadSymbolTable(BinaryNinja::BinaryView& view, VirtualMemory& vm) const;
73+
std::vector<CacheSymbol> ReadSymbolTable(VirtualMemory& vm, const TableInfo &symbolInfo, const TableInfo &stringInfo) const;
6674

6775
bool AddExportTerminalSymbol(
6876
std::vector<CacheSymbol>& symbols, const std::string& symbolName, const uint8_t* current,

view/sharedcache/core/MachOProcessor.cpp

Lines changed: 57 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ SharedCacheMachOProcessor::SharedCacheMachOProcessor(Ref<BinaryView> view, std::
1717
}
1818
}
1919

20-
void SharedCacheMachOProcessor::ApplyHeader(SharedCacheMachOHeader& header)
20+
void SharedCacheMachOProcessor::ApplyHeader(const SharedCache& cache, SharedCacheMachOHeader& header)
2121
{
2222
auto typeLibraryFromName = [&](const std::string& name) -> Ref<TypeLibrary> {
2323
// Check to see if we have already loaded the type library.
@@ -32,13 +32,16 @@ void SharedCacheMachOProcessor::ApplyHeader(SharedCacheMachOHeader& header)
3232

3333
// Add a section for the header itself.
3434
std::string headerSection = fmt::format("{}::__macho_header", header.identifierPrefix);
35-
// TODO: Support mach_header (non 64bit)
36-
uint64_t headerSectionSize = sizeof(mach_header_64) + header.ident.sizeofcmds;
35+
uint64_t machHeaderSize = m_vm->GetAddressSize() == 8 ? sizeof(mach_header_64) : sizeof(mach_header);
36+
uint64_t headerSectionSize = machHeaderSize + header.ident.sizeofcmds;
3737
m_view->AddUserSection(headerSection, header.textBase, headerSectionSize, ReadOnlyDataSectionSemantics);
3838

3939
ApplyHeaderSections(header);
4040
ApplyHeaderDataVariables(header);
4141

42+
// Pull the available type library for the image we are loading, so we can apply known types.
43+
auto typeLib = typeLibraryFromName(header.installName);
44+
4245
if (header.linkeditPresent && m_vm->IsAddressMapped(header.linkeditSegment.vmaddr))
4346
{
4447
if (m_applyFunctions && header.functionStartsPresent)
@@ -49,18 +52,16 @@ void SharedCacheMachOProcessor::ApplyHeader(SharedCacheMachOHeader& header)
4952
m_view->AddFunctionForAnalysis(targetPlatform, func, false);
5053
}
5154

52-
// Pull the available type library for the image we are loading, so we can apply known types.
53-
auto typeLib = typeLibraryFromName(header.installName);
5455
m_view->BeginBulkModifySymbols();
5556

56-
// TODO: Why does this need to only happen in linkeditSegment?
5757
// Apply symbols from symbol table.
5858
if (header.symtab.symoff != 0)
5959
{
60-
// Mach-O View symtab processing with
61-
// a ton of stuff cut out so it can work
6260
// NOTE: This table is read relative to the link edit segment file base.
63-
const auto symbols = header.ReadSymbolTable(*m_view, *m_vm);
61+
// NOTE: This does not handle the shared .symbols cache entry symbols, that is the responsibility of the caller.
62+
TableInfo symbolInfo = { header.GetLinkEditFileBase() + header.symtab.symoff, header.symtab.nsyms };
63+
TableInfo stringInfo = { header.GetLinkEditFileBase() + header.symtab.stroff, header.symtab.strsize };
64+
const auto symbols = header.ReadSymbolTable(*m_vm, symbolInfo, stringInfo);
6465
for (const auto& sym : symbols)
6566
{
6667
auto [symbol, symbolType] = sym.GetBNSymbolAndType(*m_view);
@@ -72,7 +73,6 @@ void SharedCacheMachOProcessor::ApplyHeader(SharedCacheMachOHeader& header)
7273
if (header.exportTriePresent)
7374
{
7475
// NOTE: This table is read relative to the link edit segment file base.
75-
// TODO: Remove this and use the m_symbols in the cache?
7676
const auto exportSymbols = header.ReadExportSymbolTrie(*m_vm);
7777
for (const auto& sym : exportSymbols)
7878
{
@@ -82,6 +82,53 @@ void SharedCacheMachOProcessor::ApplyHeader(SharedCacheMachOHeader& header)
8282
}
8383
m_view->EndBulkModifySymbols();
8484
}
85+
86+
// Apply symbols from the .symbols cache files.
87+
for (const auto &entry: cache.GetEntries())
88+
{
89+
// NOTE: We check addr size as we only support 64bit .symbols files currently.
90+
if (entry.GetType() != CacheEntryType::Symbols && m_vm->GetAddressSize() == 8)
91+
continue;
92+
const auto& entryHeader = entry.GetHeader();
93+
94+
// This is where we get the symbol and string table information from in the .symbols file.
95+
dyld_cache_local_symbols_info localSymbolsInfo = {};
96+
auto localSymbolsInfoAddr = entry.GetMappedAddress(entryHeader.localSymbolsOffset);
97+
if (!localSymbolsInfoAddr.has_value())
98+
continue;
99+
m_vm->Read(&localSymbolsInfo, *localSymbolsInfoAddr, sizeof(dyld_cache_local_symbols_info));
100+
101+
// Read each symbols entry, looking for the current image entry.
102+
uint64_t localEntriesAddr = *localSymbolsInfoAddr + localSymbolsInfo.entriesOffset;
103+
uint64_t localSymbolsAddr = *localSymbolsInfoAddr + localSymbolsInfo.nlistOffset;
104+
uint64_t localStringsAddr = *localSymbolsInfoAddr + localSymbolsInfo.stringsOffset;
105+
106+
dyld_cache_local_symbols_entry_64 localSymbolsEntry = {};
107+
for (uint32_t i = 0; i < localSymbolsInfo.entriesCount; i++)
108+
{
109+
m_vm->Read(&localSymbolsEntry, localEntriesAddr + i * sizeof(dyld_cache_local_symbols_entry_64),
110+
sizeof(dyld_cache_local_symbols_entry_64));
111+
// The dylibOffset is the offset from the cache base address to the image header.
112+
const auto imageAddr = cache.GetBaseAddress() + localSymbolsEntry.dylibOffset;
113+
if (imageAddr == header.textBase)
114+
{
115+
// We have found the entry to read!
116+
// TODO: Support 32bit nlist
117+
uint64_t symbolTableStart = localSymbolsAddr + (localSymbolsEntry.nlistStartIndex * sizeof(nlist_64));
118+
TableInfo symbolInfo = {symbolTableStart, localSymbolsEntry.nlistCount};
119+
TableInfo stringInfo = {localStringsAddr, localSymbolsInfo.stringsSize};
120+
m_view->BeginBulkModifySymbols();
121+
const auto symbols = header.ReadSymbolTable(*m_vm, symbolInfo, stringInfo);
122+
for (const auto &sym: symbols)
123+
{
124+
auto [symbol, symbolType] = sym.GetBNSymbolAndType(*m_view);
125+
ApplySymbol(m_view, typeLib, symbol, symbolType);
126+
}
127+
m_view->EndBulkModifySymbols();
128+
break;
129+
}
130+
}
131+
}
85132
}
86133

87134
uint64_t SharedCacheMachOProcessor::ApplyHeaderSections(SharedCacheMachOHeader& header)

view/sharedcache/core/MachOProcessor.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#pragma once
22
#include "MachO.h"
3+
#include "SharedCache.h"
34

45
// Process `SharedCacheMachOHeader`.
56
class SharedCacheMachOProcessor
@@ -15,7 +16,7 @@ class SharedCacheMachOProcessor
1516
BinaryNinja::Ref<BinaryNinja::BinaryView> view, std::shared_ptr<VirtualMemory> vm);
1617

1718
// Initialize header information such as sections and symbols.
18-
void ApplyHeader(SharedCacheMachOHeader& header);
19+
void ApplyHeader(const SharedCache& cache, SharedCacheMachOHeader& header);
1920

2021
uint64_t ApplyHeaderSections(SharedCacheMachOHeader& header);
2122

view/sharedcache/core/SharedCache.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,14 @@ std::optional<CacheEntry> CacheEntry::FromFile(const std::string& filePath, cons
8080
// We found a single dyld data cache entry file. Mark it as such!
8181
type = CacheEntryType::DyldData;
8282
}
83-
else if (fileName.find(".symbols") != std::string::npos)
83+
else if (fileName.find(".symbols") != std::string::npos && mappings.size() == 1)
8484
{
8585
// We found a single symbols cache entry file. Mark it as such!
8686
type = CacheEntryType::Symbols;
87+
// Adjust the mapping for the symbol file, they seem to be only for the header.
88+
// If we do not adjust the mapping than we will not be able to read the symbol table through the virtual memory.
89+
mappings[0].fileOffset = 0;
90+
mappings[0].size = file->Length();
8791
}
8892
else if (mappings.size() == 1 && header.imagesCountOld == 0 && header.imagesCount == 0
8993
&& header.imagesTextOffset == 0)
@@ -154,8 +158,7 @@ std::optional<uint64_t> CacheEntry::GetMappedAddress(uint64_t fileOffset) const
154158

155159
SharedCache::SharedCache(uint64_t addressSize)
156160
{
157-
m_addressSize = addressSize;
158-
m_vm = std::make_shared<VirtualMemory>();
161+
m_vm = std::make_shared<VirtualMemory>(addressSize);
159162
m_namedSymMutex = std::make_unique<std::shared_mutex>();
160163
}
161164

@@ -321,7 +324,7 @@ void SharedCache::ProcessEntryRegions(const CacheEntry& entry)
321324
// Collect pool addresses as non image memory regions.
322325
for (size_t i = 0; i < entryHeader.branchPoolsCount; i++)
323326
{
324-
auto branchPoolAddr = entryHeader.branchPoolsOffset + (i * m_addressSize);
327+
auto branchPoolAddr = entryHeader.branchPoolsOffset + (i * m_vm->GetAddressSize());
325328
auto header = SharedCacheMachOHeader::ParseHeaderForAddress(
326329
m_vm, branchPoolAddr, "dyld_shared_cache_branch_islands_" + std::to_string(i));
327330
// Stop processing branch pools if a header fails to parse.

view/sharedcache/core/SharedCache.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ typedef uint32_t CacheEntryId;
173173
// once every time the database is open.
174174
class SharedCache
175175
{
176-
uint64_t m_addressSize = 8;
176+
// Calculated within `AddEntry`, this indicates where the shared cache image is based at.
177177
uint64_t m_baseAddress = 0;
178178
// TODO: Figure out when to lock the mutex on this shit lmfao
179179
// The shared cache can own the virtual memory, this is fine...

view/sharedcache/core/SharedCacheController.cpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,6 @@
11
#include "SharedCacheController.h"
2-
3-
#include <utility>
4-
52
#include "MachOProcessor.h"
63
#include "ObjC.h"
7-
#include "SlideInfo.h"
84

95
using namespace BinaryNinja;
106
using namespace BinaryNinja::DSC;
@@ -211,7 +207,7 @@ bool SharedCacheController::ApplyImage(BinaryView& view, const CacheImage& image
211207
// analyzed functions as updated.
212208
auto prevDisabledState = view.GetFunctionAnalysisUpdateDisabled();
213209
view.SetFunctionAnalysisUpdateDisabled(true);
214-
machoProcessor.ApplyHeader(*image.header);
210+
machoProcessor.ApplyHeader(GetCache(), *image.header);
215211
view.SetFunctionAnalysisUpdateDisabled(prevDisabledState);
216212

217213
// Load objective-c information.

view/sharedcache/core/VirtualMemory.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,13 @@ class VirtualMemory
4242
{
4343
std::shared_mutex m_regionMutex;
4444
AddressRangeMap<VirtualMemoryRegion> m_regions;
45+
uint64_t m_addressSize = 8;
4546

4647
public:
48+
explicit VirtualMemory(uint64_t addressSize = 8) : m_addressSize(addressSize) {}
49+
50+
uint64_t GetAddressSize() const { return m_addressSize; }
51+
4752
// At no point do we ever store a strong pointer to a file accessor, that is the job of the `FileAccessorCache`.
4853
void MapRegion(WeakFileAccessor fileAccessor, AddressRange mappedRange, uint64_t fileOffset);
4954

0 commit comments

Comments
 (0)