Skip to content

Commit c22d54c

Browse files
committed
[SharedCache] Replace write log with callback registration for reapplying slide info
This improves performance by ~10x with the only real issue being a copy of the cache entry existing for each weak ref. Also fixes - Some old TODO's that are no longer relevant - Some function signatures not being const - FileAccessorCache not evicting enough accessors to fit under an adjusted cache size - Added a warning if we are over the global cache size in view initialization - Logger name not having a `.` in `SlideInfoProcessor::SlideInfoProcessor` - Re-added the accessor dirty check before applying slide info to fix #6570
1 parent 4f22a94 commit c22d54c

File tree

11 files changed

+137
-157
lines changed

11 files changed

+137
-157
lines changed

view/sharedcache/core/FileAccessorCache.cpp

Lines changed: 7 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ FileAccessorCache& FileAccessorCache::Global()
3232
return cache;
3333
}
3434

35-
3635
WeakFileAccessor FileAccessorCache::Open(const std::string& filePath)
3736
{
3837
const auto id = GetCacheAccessorID(filePath);
@@ -51,7 +50,7 @@ WeakFileAccessor FileAccessorCache::Open(const std::string& filePath)
5150
}
5251

5352
// Evict if we are going to go above the limit.
54-
if (m_cache.size() >= m_cacheSize)
53+
while (m_cache.size() >= m_cacheSize)
5554
EvictLastUsed();
5655

5756
// Create a new file accessor and add it to the cache.
@@ -69,20 +68,6 @@ WeakFileAccessor FileAccessorCache::Open(const std::string& filePath)
6968
return WeakFileAccessor(sharedAccessor, filePath);
7069
}
7170

72-
void FileAccessorWriteLog::AddPointer(const size_t address, const size_t pointer)
73-
{
74-
// TODO: A sharded map here would be better. see: rust dashmap
75-
std::unique_lock<std::shared_mutex> lock(m_persistedMutex);
76-
m_persistedPointers.emplace_back(address, pointer);
77-
}
78-
79-
void FileAccessorWriteLog::ApplyWrites(MappedFileAccessor& accessor)
80-
{
81-
std::shared_lock<std::shared_mutex> lock(m_persistedMutex);
82-
for (const auto& [address, pointer] : m_persistedPointers)
83-
accessor.WritePointer(address, pointer);
84-
}
85-
8671
std::shared_ptr<MappedFileAccessor> WeakFileAccessor::lock()
8772
{
8873
auto sharedPtr = m_weakPtr.lock();
@@ -93,20 +78,12 @@ std::shared_ptr<MappedFileAccessor> WeakFileAccessor::lock()
9378
m_weakPtr = FileAccessorCache::Global().Open(m_filePath).m_weakPtr;
9479
sharedPtr = m_weakPtr.lock();
9580

96-
// Apply any previously written pointers back.
97-
if (sharedPtr)
98-
m_writeLog->ApplyWrites(*sharedPtr);
81+
// Call the function registered with `RegisterReviveCallback`.
82+
// TODO: This races if two functions cannot acquire and revive the same file at the same time.
83+
// TODO: This will be called twice.
84+
if (m_reviveCallback.has_value())
85+
(*m_reviveCallback)(*sharedPtr);
9986
}
10087

10188
return sharedPtr;
102-
}
103-
104-
void WeakFileAccessor::WritePointer(const size_t address, const size_t pointer)
105-
{
106-
// Persist the pointer after the file accessor is revived.
107-
m_writeLog->AddPointer(address, pointer);
108-
109-
// And then actually apply the written pointer...
110-
if (auto sharedPtr = m_weakPtr.lock())
111-
sharedPtr->WritePointer(address, pointer);
112-
}
89+
}

view/sharedcache/core/FileAccessorCache.h

Lines changed: 12 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -37,48 +37,36 @@ class FileAccessorCache
3737
// Adjust the cache size limit.
3838
// This will NOT evict current cache entries, as they are already available.
3939
// Any subsequent call to `Open` will assume this cache size, evicting until the size is equal to the cache size.
40-
void SetCacheSize(uint64_t size) { m_cacheSize = size; };
41-
};
42-
43-
// Write log to be used in conjunction with `WeakFileAccessor` to re-apply written data to a "revived" file.
44-
struct FileAccessorWriteLog
45-
{
46-
// To persist writes to a file accessor being revived (within the lock() function)
47-
// we keep a list of writes that will be re-applied in the lock function.
48-
std::shared_mutex m_persistedMutex;
49-
std::vector<std::pair<size_t, uint64_t>> m_persistedPointers;
50-
51-
FileAccessorWriteLog() = default;
40+
void SetCacheSize(const uint64_t size) { m_cacheSize = size; };
5241

53-
// Add the pointer to the persisted pointers.
54-
void AddPointer(size_t address, size_t pointer);
55-
56-
// Apply all logged writes to the given accessor.
57-
void ApplyWrites(MappedFileAccessor& accessor);
42+
size_t GetCacheSize() const { return m_cacheSize; }
5843
};
5944

6045
class WeakFileAccessor
6146
{
47+
using ReviveCallback = std::function<void(MappedFileAccessor&)>;
48+
6249
// Weak pointer to the mapped file accessor, once this is expired we will re-open.
6350
std::weak_ptr<MappedFileAccessor> m_weakPtr;
6451
// File path for re-opening if needed
6552
std::string m_filePath;
6653

6754
// Used to re-add writes once the file accessor is "revived".
68-
std::shared_ptr<FileAccessorWriteLog> m_writeLog;
55+
std::optional<ReviveCallback> m_reviveCallback;
6956

7057
// TODO: Store a weak_ptr/shared_ptr to FileAccessorCache? That way we dont access Global()
7158
// TODO: Only need to do the above if we want multiple caches.
7259

7360
public:
7461
explicit WeakFileAccessor(std::weak_ptr<MappedFileAccessor> weakPtr, std::string filePath) :
75-
m_weakPtr(std::move(weakPtr)), m_filePath(std::move(filePath)),
76-
m_writeLog(std::make_shared<FileAccessorWriteLog>())
62+
m_weakPtr(std::move(weakPtr)), m_filePath(std::move(filePath))
7763
{}
7864

79-
std::shared_ptr<MappedFileAccessor> lock();
65+
// Register the function to be called once the file accessor is revived, this is typically
66+
// used to re-apply writes such as from slide info.
67+
void RegisterReviveCallback(const ReviveCallback& callback) {
68+
m_reviveCallback = callback;
69+
}
8070

81-
// Persists the written pointer within this weak file accessor.
82-
// This works as we expect the weak file accessor to be stored per virtual memory region.
83-
void WritePointer(size_t address, size_t pointer);
71+
std::shared_ptr<MappedFileAccessor> lock();
8472
};

view/sharedcache/core/MappedFileAccessor.cpp

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
#include "MappedFileAccessor.h"
22

3-
43
std::shared_ptr<MappedFileAccessor> MappedFileAccessor::Open(const std::string& filePath)
54
{
65
auto file = MappedFile::OpenFile(filePath);
@@ -40,48 +39,48 @@ std::string MappedFileAccessor::ReadNullTermString(size_t address, const size_t
4039
return str;
4140
}
4241

43-
uint8_t MappedFileAccessor::ReadUInt8(size_t address)
42+
uint8_t MappedFileAccessor::ReadUInt8(size_t address) const
4443
{
4544
return Read<uint8_t>(address);
4645
}
4746

48-
int8_t MappedFileAccessor::ReadInt8(size_t address)
47+
int8_t MappedFileAccessor::ReadInt8(size_t address) const
4948
{
5049
return Read<int8_t>(address);
5150
}
5251

53-
uint16_t MappedFileAccessor::ReadUInt16(size_t address)
52+
uint16_t MappedFileAccessor::ReadUInt16(size_t address) const
5453
{
5554
return Read<uint16_t>(address);
5655
}
5756

58-
int16_t MappedFileAccessor::ReadInt16(size_t address)
57+
int16_t MappedFileAccessor::ReadInt16(size_t address) const
5958
{
6059
return Read<int16_t>(address);
6160
}
6261

6362

64-
uint32_t MappedFileAccessor::ReadUInt32(size_t address)
63+
uint32_t MappedFileAccessor::ReadUInt32(size_t address) const
6564
{
6665
return Read<uint32_t>(address);
6766
}
6867

69-
int32_t MappedFileAccessor::ReadInt32(size_t address)
68+
int32_t MappedFileAccessor::ReadInt32(size_t address) const
7069
{
7170
return Read<int32_t>(address);
7271
}
7372

74-
uint64_t MappedFileAccessor::ReadUInt64(size_t address)
73+
uint64_t MappedFileAccessor::ReadUInt64(size_t address) const
7574
{
7675
return Read<uint64_t>(address);
7776
}
7877

79-
int64_t MappedFileAccessor::ReadInt64(size_t address)
78+
int64_t MappedFileAccessor::ReadInt64(size_t address) const
8079
{
8180
return Read<int64_t>(address);
8281
}
8382

84-
BinaryNinja::DataBuffer MappedFileAccessor::ReadBuffer(size_t addr, size_t length)
83+
BinaryNinja::DataBuffer MappedFileAccessor::ReadBuffer(size_t addr, size_t length) const
8584
{
8685
if (addr + length > Length())
8786
throw UnmappedAccessException(addr + length, Length());
@@ -104,7 +103,7 @@ void MappedFileAccessor::Read(void* dest, size_t addr, size_t length) const
104103
}
105104

106105
template <typename T>
107-
T MappedFileAccessor::Read(size_t address)
106+
T MappedFileAccessor::Read(size_t address) const
108107
{
109108
T result;
110109
Read(&result, address, sizeof(T));

view/sharedcache/core/MappedFileAccessor.h

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -70,23 +70,23 @@ class MappedFileAccessor : public std::enable_shared_from_this<MappedFileAccesso
7070

7171
std::string ReadNullTermString(size_t address, size_t maxLength = -1) const;
7272

73-
uint8_t ReadUInt8(size_t address);
73+
uint8_t ReadUInt8(size_t address) const;
7474

75-
int8_t ReadInt8(size_t address);
75+
int8_t ReadInt8(size_t address) const;
7676

77-
uint16_t ReadUInt16(size_t address);
77+
uint16_t ReadUInt16(size_t address) const;
7878

79-
int16_t ReadInt16(size_t address);
79+
int16_t ReadInt16(size_t address) const;
8080

81-
uint32_t ReadUInt32(size_t address);
81+
uint32_t ReadUInt32(size_t address) const;
8282

83-
int32_t ReadInt32(size_t address);
83+
int32_t ReadInt32(size_t address) const;
8484

85-
uint64_t ReadUInt64(size_t address);
85+
uint64_t ReadUInt64(size_t address) const;
8686

87-
int64_t ReadInt64(size_t address);
87+
int64_t ReadInt64(size_t address) const;
8888

89-
BinaryNinja::DataBuffer ReadBuffer(size_t addr, size_t length);
89+
BinaryNinja::DataBuffer ReadBuffer(size_t addr, size_t length) const;
9090

9191
// Returns a range of pointers within the mapped memory region corresponding to
9292
// {addr, length}.
@@ -99,5 +99,5 @@ class MappedFileAccessor : public std::enable_shared_from_this<MappedFileAccesso
9999
void Read(void* dest, size_t addr, size_t length) const;
100100

101101
template <typename T>
102-
T Read(size_t address);
102+
T Read(size_t address) const;
103103
};

view/sharedcache/core/SharedCache.cpp

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -397,10 +397,42 @@ void SharedCache::ProcessEntryRegions(const CacheEntry& entry)
397397
}
398398
}
399399

400-
void SharedCache::ProcessEntrySlideInfo(const CacheEntry& entry)
400+
void SharedCache::ProcessEntrySlideInfo(const CacheEntry& entry) const
401401
{
402402
auto slideInfoProcessor = SlideInfoProcessor(GetBaseAddress());
403-
slideInfoProcessor.ProcessEntry(*m_vm, entry);
403+
404+
// This will be set for every associated `VirtualMemoryRegion` so that any accesses though the VM will be always be slid.
405+
// NOTE: This MUST be called on the CacheEntry object owned by SharedCache, otherwise persistence through the `SharedCacheController` will not occur.
406+
// NOTE: This will keep a copy of a processor in the `WeakFileAccessor` until that object is destroyed (likely view destruction).
407+
// NOTE: This will keep a copy of the cache entry in the `WeakFileAccessor` until that object is destroyed (likely view destruction).
408+
auto reviveCallback = [slideInfoProcessor, entry](MappedFileAccessor& revivedAccessor) {
409+
slideInfoProcessor.ProcessEntry(revivedAccessor, entry);
410+
};
411+
412+
// Use the current entry accessor, don't register the callback for this one as we want calls through the VM to be slid only.
413+
// Actually process the slide info for this entry, everything else besides this is to support revived file accessors.
414+
auto slideMappings = slideInfoProcessor.ProcessEntry(*entry.GetAccessor().lock(), entry);
415+
416+
// Register the revive callback for all virtual memory regions that have been slid.
417+
// The reason we don't just set this on the entry accessor is that accessor is not consulted for anything really after
418+
// this point, everything else will be going through the virtual memory, and because the callback is on the weak accessor
419+
// reference and not the file accessor cache itself this matters.
420+
auto vm = GetVirtualMemory();
421+
for (const auto& mapping : slideMappings)
422+
{
423+
// Because the mapping address is a file offset for us to consult the virtual memory we must first call `GetMappedAddress`.
424+
if (auto mappedMappingAddr = entry.GetMappedAddress(mapping.address))
425+
{
426+
if (auto vmRegion = vm->GetRegionAtAddress(*mappedMappingAddr))
427+
{
428+
// Ok we have the virtual memory region, lets register the callback on its accessor.
429+
vmRegion->fileAccessor.RegisterReviveCallback(reviveCallback);
430+
continue;
431+
}
432+
}
433+
434+
LogWarn("Failed to register revive callback for slide mapping %llx in entry '%s'", mapping.address, entry.GetFileName().c_str());
435+
}
404436
}
405437

406438
void SharedCache::ProcessSymbols()

view/sharedcache/core/SharedCache.h

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -134,9 +134,8 @@ class CacheEntry
134134
// Mappings tell us _where_ to map the regions within the flat address space.
135135
// Without this we wouldn't know where the entry is supposed to exist in the address space.
136136
std::vector<dyld_cache_mapping_info> m_mappings {};
137-
// TODO: We really should remove this methinks.
138-
// TODO: Storing this here is basically useless? IDK
139137
// Mapping of image path to image info, used within ProcessImagesAndRegions to add them to the cache.
138+
// Also used to retrieve the image dependencies.
140139
std::unordered_map<std::string, dyld_cache_image_info> m_images {};
141140

142141
public:
@@ -221,7 +220,7 @@ class SharedCache
221220
SharedCache &operator=(SharedCache &&) noexcept = default;
222221

223222
uint64_t GetBaseAddress() const { return m_baseAddress; }
224-
std::shared_ptr<VirtualMemory> GetVirtualMemory() { return m_vm; }
223+
std::shared_ptr<VirtualMemory> GetVirtualMemory() const { return m_vm; }
225224
const std::unordered_map<CacheEntryId, CacheEntry>& GetEntries() const { return m_entries; }
226225
const AddressRangeMap<CacheRegion>& GetRegions() const { return m_regions; }
227226
const std::unordered_map<uint64_t, CacheImage>& GetImages() const { return m_images; }
@@ -245,7 +244,7 @@ class SharedCache
245244

246245
void ProcessEntryRegions(const CacheEntry& entry);
247246

248-
void ProcessEntrySlideInfo(const CacheEntry& entry);
247+
void ProcessEntrySlideInfo(const CacheEntry& entry) const;
249248

250249
// Construct the named symbols lookup map for use with `GetSymbolWithName`.
251250
void ProcessSymbols();

view/sharedcache/core/SharedCacheView.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -787,7 +787,8 @@ bool SharedCacheView::Init()
787787
bool result = cacheProcessor.ProcessCache(sharedCache);
788788
auto endTime = std::chrono::high_resolution_clock::now();
789789
std::chrono::duration<double> elapsed = endTime - startTime;
790-
logger->LogInfo("Processing %zu entries took %.3f seconds", sharedCache.GetEntries().size(), elapsed.count());
790+
auto entryCount = sharedCache.GetEntries().size();
791+
logger->LogInfo("Processing %zu entries took %.3f seconds", entryCount, elapsed.count());
791792

792793
if (!result)
793794
{
@@ -800,6 +801,10 @@ bool SharedCacheView::Init()
800801
// Skip all the other shared cache stuff.
801802
return true;
802803
}
804+
805+
// If we can't store all of our files for this cache in the accessor cache we might run into issues, warn the user.
806+
if (entryCount > FileAccessorCache::Global().GetCacheSize())
807+
logger->LogWarn("Cache contains more entries than the allowed number of opened file handles, this may impact reliability.");
803808
}
804809

805810
{

0 commit comments

Comments
 (0)