Skip to content

Commit 0711738

Browse files
committed
wip
1 parent fdaff2c commit 0711738

21 files changed

+377
-281
lines changed

view/sharedcache/core/Dyld.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,8 @@ struct PACKED_STRUCT dyld_cache_image_info
117117
uint64_t inode;
118118
uint32_t pathFileOffset;
119119
uint32_t pad;
120+
121+
bool operator==(const dyld_cache_image_info & image) const;
120122
};
121123

122124
union dyld_cache_slide_pointer5

view/sharedcache/core/MachO.cpp

Lines changed: 3 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include <utility>
55

66
#include "SharedCache.h"
7+
#include "SlideInfo.h"
78
#include "VirtualMemory.h"
89

910
using namespace BinaryNinja;
@@ -25,7 +26,7 @@ std::optional<SharedCacheMachOHeader> SharedCacheMachOProcessor::ParseHeaderForA
2526
header.textBase = address;
2627
header.installName = imagePath;
2728
// The identifierPrefix is used for the display of the image name in the sections and segments.
28-
header.identifierPrefix = ImageName(imagePath);
29+
header.identifierPrefix = FileName(imagePath);
2930

3031
std::string errorMsg;
3132
VirtualMemoryReader reader(m_vm);
@@ -510,22 +511,8 @@ void SharedCacheMachOProcessor::ApplyHeader(Ref<BinaryView> view, SharedCacheMac
510511
uint64_t SharedCacheMachOHeader::ApplyHeaderSections(Ref<BinaryView> view)
511512
{
512513
auto initSection = [&](const section_64& section, const std::string& sectionName) {
513-
bool skip = false;
514-
// TODO: The memory region for the header needs to be available.
515-
// Find out if we even need to add the section, i.e. is the region available?
516-
// TODO: What does this _actually_ do???
517-
// for (const auto& region : regionsToLoad)
518-
// {
519-
// if (section.addr >= region->start && section.addr < region->start + region->size)
520-
// {
521-
// if (MemoryRegionIsHeaderInitialized(lock, *region))
522-
// skip = true;
523-
// break;
524-
// }
525-
// }
526-
527514
auto existingSections = view->GetSectionsAt(section.addr);
528-
if (!section.size || skip || !existingSections.empty())
515+
if (!section.size || !existingSections.empty())
529516
return false;
530517

531518
std::string type;

view/sharedcache/core/MappedFileAccessor.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ std::shared_ptr<MappedFileAccessor> MappedFileAccessor::Open(const std::string&
1515

1616
void MappedFileAccessor::WritePointer(size_t address, size_t pointer)
1717
{
18+
m_dirty = true;
1819
*reinterpret_cast<size_t *>(&m_file._mmap[address]) = pointer;
1920
}
2021

view/sharedcache/core/MappedFileAccessor.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ class UnmappedAccessException : public std::exception {
1515
// std::enable_shared_from_this allows weak pointers to "revive" the shared pointer in the FileAccessorCache.
1616
class MappedFileAccessor : public std::enable_shared_from_this<MappedFileAccessor> {
1717
MappedFile m_file;
18+
bool m_dirty = false;
1819

1920
public:
2021
explicit MappedFileAccessor(MappedFile file) : m_file(std::move(file)) {}
@@ -33,6 +34,8 @@ class MappedFileAccessor : public std::enable_shared_from_this<MappedFileAccesso
3334

3435
void *Data() const { return m_file._mmap; };
3536

37+
bool IsDirty() const { return m_dirty; }
38+
3639
/**
3740
* Writes to files are implemented for performance reasons and should be treated with utmost care
3841
*

view/sharedcache/core/ObjC.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,6 @@ std::optional<ObjCOptimizationHeader> DSCObjCProcessor::GetObjCOptimizationHeade
122122

123123
ObjCOptimizationHeader header = {};
124124
// Ignoring `objcOptsSize` in favor of `sizeof(ObjCOptimizationHeader)` matches dyld's behavior.
125-
// TODO: base address is wild... why does that work?????
126125
reader.Read(&header, m_cache->GetBaseAddress() + primaryCacheHeader->objcOptsOffset, sizeof(ObjCOptimizationHeader));
127126

128127
return header;

view/sharedcache/core/SharedCache.cpp

Lines changed: 54 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22

33
#include <regex>
44
#include <__filesystem/directory_iterator.h>
5-
#include <__filesystem/operations.h>
65

76
#include "MachO.h"
7+
#include "SlideInfo.h"
88

99
using namespace BinaryNinja;
1010

@@ -99,6 +99,11 @@ std::optional<CacheEntry> CacheEntry::FromFile(const std::string& filePath, Cach
9999
return CacheEntry(filePath, type, header, mappings, images);
100100
}
101101

102+
WeakFileAccessor CacheEntry::GetAccessor() const
103+
{
104+
return FileAccessorCache::Global().Open(m_filePath);
105+
}
106+
102107
SharedCache::SharedCache()
103108
{
104109
m_vm = std::make_shared<VirtualMemory>();
@@ -171,8 +176,14 @@ CacheEntryId SharedCache::AddEntry(CacheEntry entry)
171176
// read the memory of the mapped regions of the cache entry file.
172177
const auto& mappings = entry.GetMappings();
173178
for (const auto& mapping: mappings)
179+
{
174180
m_vm->MapRegion(fileAccessor, {mapping.address, mapping.address + mapping.size}, mapping.fileOffset);
175181

182+
// Recalculate the base address.
183+
if (mapping.address < m_baseAddress)
184+
m_baseAddress = mapping.address;
185+
}
186+
176187
// We are done and can make the entry visible to the entire cache.
177188
m_entries.insert({id, std::move(entry)});
178189
return id;
@@ -192,6 +203,13 @@ void SharedCache::ProcessEntry(BinaryView &view, const CacheEntry &entry)
192203
auto machoProcessor = SharedCacheMachOProcessor(m_vm);
193204

194205
auto entryHeader = entry.GetHeader();
206+
207+
// Apply slide info while we are processing.
208+
// TODO: If this file gets evicted our writes go away, move this to some place visible to the weak file accessor.
209+
auto fileLock = entry.GetAccessor().lock();
210+
auto slideInfoProcessor = SlideInfoProcessor(GetBaseAddress());
211+
slideInfoProcessor.ProcessEntryInfo(entry);
212+
195213
// Collect pool addresses as non image memory regions.
196214
for (size_t i = 0; i < entryHeader.branchPoolsCount; i++)
197215
{
@@ -222,8 +240,6 @@ void SharedCache::ProcessEntry(BinaryView &view, const CacheEntry &entry)
222240

223241
// Get the mapping.
224242
const auto& entryMappings = entry.GetMappings();
225-
// TODO: We probably just want to get the file _name_
226-
const auto& entryFilePath = entry.GetFilePath();
227243

228244
// DyldData, we should take all the mappings and make some regions for them!
229245
if (entry.GetType() == CacheEntryType::DyldData)
@@ -234,10 +250,7 @@ void SharedCache::ProcessEntry(BinaryView &view, const CacheEntry &entry)
234250
CacheRegion mappingRegion;
235251
mappingRegion.start = mapping.address;
236252
mappingRegion.size = mapping.size;
237-
// auto pathBasename = subCachePath.substr(subCachePath.find_last_of("/\\") + 1);
238-
// TODO: Is the filepath here the entire disk path?
239-
// TODO: We probably just want something else...
240-
mappingRegion.name = fmt::format("{}::_data{}", entryFilePath, i);
253+
mappingRegion.name = fmt::format("{}::_data{}", entry.GetFileName(), i);
241254
mappingRegion.flags = SegmentReadable;
242255
mappingRegion.type = CacheRegionType::DyldData;
243256

@@ -253,9 +266,7 @@ void SharedCache::ProcessEntry(BinaryView &view, const CacheEntry &entry)
253266
CacheRegion stubIslandRegion;
254267
stubIslandRegion.start = stubMapping.address;
255268
stubIslandRegion.size = stubMapping.size;
256-
// auto pathBasename = subCachePath.substr(subCachePath.find_last_of("/\\") + 1);
257-
// TODO: Is the filepath here the entire disk path?
258-
stubIslandRegion.name = fmt::format("{}::_stubs", entryFilePath);
269+
stubIslandRegion.name = fmt::format("{}::_stubs", entry.GetFileName());
259270
stubIslandRegion.flags = static_cast<BNSegmentFlag>(SegmentReadable | SegmentExecutable);
260271
stubIslandRegion.type = CacheRegionType::StubIsland;
261272

@@ -329,16 +340,37 @@ void SharedCache::ProcessEntry(BinaryView &view, const CacheEntry &entry)
329340

330341
AddImage(std::move(image));
331342
}
343+
344+
// TODO: See note above about slide info writes.
345+
fileLock.get();
332346
}
333347

334-
uint64_t SharedCache::GetBaseAddress() const
348+
std::optional<CacheEntry> SharedCache::GetEntryContaining(const uint64_t address) const
335349
{
336-
uint64_t lowestAddress = std::numeric_limits<uint64_t>::max();
337-
for (const auto &[_, entry]: m_entries)
338-
for (const auto &mapping: entry.GetMappings())
339-
if (mapping.address < lowestAddress)
340-
lowestAddress = mapping.address;
341-
return lowestAddress != std::numeric_limits<uint64_t>::max() ? lowestAddress : 0;
350+
for (const auto& entry : m_entries)
351+
{
352+
for (const auto& mapping : entry.second.GetMappings())
353+
{
354+
if (address >= mapping.address && address < mapping.address + mapping.size)
355+
return entry.second;
356+
}
357+
}
358+
359+
return std::nullopt;
360+
}
361+
362+
std::optional<CacheEntry> SharedCache::GetEntryWithImage(const CacheImage &image) const
363+
{
364+
for (const auto& entry : m_entries)
365+
{
366+
for (const auto& [_, currentImage] : entry.second.GetImages())
367+
{
368+
if (currentImage.address == image.headerAddress)
369+
return entry.second;
370+
}
371+
}
372+
373+
return std::nullopt;
342374
}
343375

344376
std::optional<CacheRegion> SharedCache::GetRegionAt(const uint64_t address) const
@@ -415,41 +447,6 @@ bool CacheProcessor::ProcessCache(SharedCache& cache)
415447
if (!baseEntry.has_value())
416448
return false;
417449

418-
// Identify if we are dealing with multiple files or just this one.
419-
auto baseHeader = baseEntry->GetHeader();
420-
421-
enum CacheFormat
422-
{
423-
RegularCacheFormat = 0,
424-
// TODO: Why do these need to be separate ugh.
425-
LargeCacheFormat,
426-
SplitCacheFormat,
427-
iOS16CacheFormat,
428-
};
429-
430-
// TODO: The only real difference we need is if imagesCountOld is not zero dont search for other files I think?
431-
CacheFormat cacheFormat = RegularCacheFormat;
432-
if (baseHeader.imagesCountOld != 0)
433-
cacheFormat = RegularCacheFormat;
434-
435-
size_t subCacheOff = offsetof(struct dyld_cache_header, subCacheArrayOffset);
436-
size_t headerEnd = baseHeader.mappingOffset;
437-
if (headerEnd > subCacheOff)
438-
{
439-
if (baseHeader.cacheType != 2)
440-
{
441-
if (std::filesystem::exists(ResolveFilePath(m_view, baseFilePath + ".01")))
442-
cacheFormat = LargeCacheFormat;
443-
else
444-
cacheFormat = SplitCacheFormat;
445-
}
446-
else
447-
cacheFormat = iOS16CacheFormat;
448-
}
449-
450-
// TODO: Make this debug.
451-
LogInfo("Cache Format: %d\n", cacheFormat);
452-
453450
// Before we do anything else, add this to the cache so it's available to other entries.
454451
cache.AddEntry(std::move(*baseEntry));
455452

@@ -459,19 +456,20 @@ bool CacheProcessor::ProcessCache(SharedCache& cache)
459456
// NOTE: This is extremely error-prone!
460457
// We are going to start trying to find files next to this one on disk!
461458
std::filesystem::path basePath = std::filesystem::path(baseFilePath).parent_path();
459+
std::string pattern = fmt::format(".*{}\\.([0-9]+|symbols|dylddata)$", FileName(baseFilePath));
460+
auto subCachePattern = std::regex(pattern);
462461
for (const auto &entry : std::filesystem::directory_iterator(basePath))
463462
{
464463
if (!entry.is_regular_file())
465464
continue;
466465

466+
// Skip the base file itself
467467
auto currentFilePath = entry.path().string();
468468
if (currentFilePath == baseFilePath)
469-
continue; // Skip the base file itself
469+
continue;
470470

471471
// Filter files that dont end with .NUMBER or .symbols or .dylddata
472-
// TODO: Running this regex on a directory with 10000 files aint gonna go well.
473-
// TODO: Remove this LMAO
474-
if (!std::regex_match(currentFilePath, std::regex(".*\\.([0-9]+|symbols|dylddata)$")))
472+
if (!std::regex_match(currentFilePath, subCachePattern))
475473
continue;
476474

477475
auto additionalEntry = CacheEntry::FromFile(currentFilePath, CacheEntryType::Secondary);

view/sharedcache/core/SharedCache.h

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ struct CacheImage
9999
CacheImage& operator=(CacheImage&& other) noexcept = default;
100100

101101
// Get the file name from the path.
102-
std::string GetName() const { return ImageName(path); }
102+
std::string GetName() const { return FileName(path); }
103103
};
104104

105105
enum class CacheEntryType
@@ -143,8 +143,13 @@ class CacheEntry
143143
static std::optional<CacheEntry> FromFile(const std::string& filePath, CacheEntryType type);
144144
// TODO: From Project file?
145145

146+
WeakFileAccessor GetAccessor() const;
147+
146148
CacheEntryType GetType() const { return m_type; }
149+
// Ex. "/myuser/mypath/dyld_shared_cache_arm64e"
147150
const std::string& GetFilePath() const { return m_filePath; }
151+
// Ex. "dyld_shared_cache_arm64e"
152+
const std::string GetFileName() const { return FileName(m_filePath); }
148153
const dyld_cache_header& GetHeader() const { return m_header; }
149154
const std::vector<dyld_cache_mapping_info>& GetMappings() const { return m_mappings; }
150155
const std::unordered_map<std::string, dyld_cache_image_info>& GetImages() const { return m_images; }
@@ -161,6 +166,7 @@ typedef uint32_t CacheEntryId;
161166
// Creating this is expensive, both in actual processing and just copying, so we only generate this
162167
// once every time the database is open.
163168
class SharedCache {
169+
uint64_t m_baseAddress = 0;
164170
// TODO: Figure out when to lock the mutex on this shit lmfao
165171
// The shared cache can own the virtual memory, this is fine...
166172
std::shared_ptr<VirtualMemory> m_vm;
@@ -177,6 +183,7 @@ class SharedCache {
177183
public:
178184
explicit SharedCache();
179185

186+
uint64_t GetBaseAddress() const { return m_baseAddress; }
180187
std::shared_ptr<VirtualMemory> GetVirtualMemory() { return m_vm; }
181188
const std::unordered_map<CacheEntryId, CacheEntry>& GetEntries() const { return m_entries; }
182189
const AddressRangeMap<CacheRegion>& GetRegions() const { return m_regions; }
@@ -208,8 +215,8 @@ class SharedCache {
208215
// TODO: We need a separate "get all objc sections" and then just a ProcessObjCSection
209216
void ProcessAllObjCSections();
210217

211-
// Get the lowest mapped address, this is useful for some reason?
212-
uint64_t GetBaseAddress() const;
218+
std::optional<CacheEntry> GetEntryContaining(uint64_t address) const;
219+
std::optional<CacheEntry> GetEntryWithImage(const CacheImage& image) const;
213220

214221
std::optional<CacheRegion> GetRegionAt(uint64_t address) const;
215222
std::optional<CacheRegion> GetRegionContaining(uint64_t address) const;

0 commit comments

Comments
 (0)