Skip to content

Commit 09a617a

Browse files
committed
[SharedCache] Improve the gathering of cache entries in view init
This commit addresses some of the issues regarding the retrieval of cache entries during view init, we now will try and store information in the database about the given shared cache entries so that we can better alert the user to possible issues during the loading of saved shared cache databases. We also simplified the logic so that the flow for retrieving the primary file cache is easier and users should be prompted when necessary to select the primary cache file if we cannot. More errors will also be shown to provide the users with more information about the shared cache and what issues they might have. For example we will now try and warn the users when they are opening a shared cache with less entries than what the shared cache reports having. We will also warn the users when opening a database if a previous secondary cache file is no longer available. In the future we can also utilize the shared cache UUID's for individual files to do a more aggressive search across directories in a project. This will make the file name lookup stuff potentially unnecessary, at least when opening a database with all file UUID's stored.
1 parent e311e60 commit 09a617a

File tree

2 files changed

+162
-83
lines changed

2 files changed

+162
-83
lines changed

view/sharedcache/core/SharedCacheView.cpp

Lines changed: 141 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,13 @@ bool SharedCacheViewType::IsTypeValidForData(BinaryView* data)
145145
SharedCacheView::SharedCacheView(const std::string& typeName, BinaryView* data, bool parseOnly) :
146146
BinaryView(typeName, data->GetFile(), data), m_parseOnly(parseOnly)
147147
{
148-
CreateLogger("SharedCache");
149-
CreateLogger("SharedCache.ObjC");
148+
// By default, we will assume the primary file name from the original file path.
149+
// This is subject to be overridden via `LoadMetadata`.
150+
m_primaryFileName = BaseFileName(GetFile()->GetOriginalFilename());
151+
152+
// Load up the metadata from the parent view (because our metadata is hilariously not available during view init)
153+
if (const auto metadata = GetParentView()->QueryMetadata(VIEW_METADATA_KEY))
154+
LoadMetadata(*metadata);
150155
}
151156

152157
enum DSCPlatform
@@ -796,72 +801,25 @@ bool SharedCacheView::Init()
796801
return InitController();
797802
}
798803

799-
// Get the file path and file name for the primary cache entry.
800-
// We need this to support BNDB and projects.
801-
std::optional<std::pair<std::string, std::string>> GetPrimaryFileInfo(BinaryView& view)
802-
{
803-
auto viewFile = view.GetFile();
804-
// Add the primary file, for a regular view this is the original path.
805-
auto primaryFilePath = viewFile->GetOriginalFilename();
806-
807-
// If we don't have an original file path, prompt the user to select one.
808-
if (primaryFilePath.empty())
809-
{
810-
if (!GetOpenFileNameInput(primaryFilePath, "Please select the primary shared cache file"))
811-
return std::nullopt;
812-
// Update so next load we don't need to prompt the user.
813-
viewFile->SetOriginalFilename(primaryFilePath);
814-
}
815-
816-
// The primary file name for which we will use when adding files.
817-
// NOTE: For projects the file name here is a UUID, we will need to traverse the project to get the name.
818-
auto primaryFileName = BaseFileName(primaryFilePath);
819-
820-
// If we are a project file, we need to grab the actual name of the file.
821-
if (auto primaryProjectFile = viewFile->GetProjectFile())
822-
{
823-
primaryFileName = primaryProjectFile->GetName();
824-
825-
// If we are a BNDB in a project, we need to change the `primaryFilePath` as well.
826-
// Because our shared cache processing only works through our mapped file accessor we need to original
827-
// file to map in, this can be relaxed if we ever support Binary Ninja file accessors.
828-
if (primaryFileName.find(".bndb") != std::string::npos)
829-
{
830-
primaryFileName = primaryFileName.substr(0, primaryFileName.size() - 5);
831-
for (const auto& pj : primaryProjectFile->GetProject()->GetFiles())
832-
{
833-
auto projectFilePath = pj->GetPathOnDisk();
834-
auto projectFileName = pj->GetName();
835-
if (projectFileName == primaryFileName || projectFilePath == primaryFilePath)
836-
{
837-
primaryFilePath = projectFilePath;
838-
primaryFileName = projectFileName;
839-
break;
840-
}
841-
}
842-
}
843-
}
844-
845-
return {{primaryFilePath, primaryFileName}};
846-
}
847-
848804
bool SharedCacheView::InitController()
849805
{
850-
auto primaryFileInfo = GetPrimaryFileInfo(*this);
851-
if (!primaryFileInfo.has_value())
806+
auto primaryFilePath = GetPrimaryFilePath();
807+
if (!primaryFilePath.has_value())
808+
{
809+
m_logger->LogError("Failed to get primary file path!");
852810
return false;
853-
auto [primaryFilePath, primaryFileName] = primaryFileInfo.value();
854-
std::string primaryFileDir = std::filesystem::path(primaryFilePath).parent_path().string();
811+
}
812+
std::string primaryFileDir = std::filesystem::path(*primaryFilePath).parent_path().string();
855813

856814
// OK, we have the primary shared cache file, now let's add the entries.
857815
auto sharedCacheBuilder = SharedCacheBuilder(this);
858-
sharedCacheBuilder.SetPrimaryFileName(primaryFileName);
816+
sharedCacheBuilder.SetPrimaryFileName(m_primaryFileName);
859817

860818
// Add the primary file. If we fail log alert that the primary cache file is invalid.
861819
// We process the primary cache entry first as it might be consulted in the processing of later entries.
862-
if (!sharedCacheBuilder.AddFile(primaryFilePath, primaryFileName, CacheEntryType::Primary))
820+
if (!sharedCacheBuilder.AddFile(*primaryFilePath, m_primaryFileName, CacheEntryType::Primary))
863821
{
864-
m_logger->LogAlertF("Failed to add primary cache file: '{}'", primaryFileName);
822+
m_logger->LogAlertF("Failed to add primary cache file: '{}'", m_primaryFileName);
865823
return false;
866824
}
867825

@@ -871,39 +829,35 @@ bool SharedCacheView::InitController()
871829
sharedCacheBuilder.AddDirectory(primaryFileDir);
872830
if (auto projectFile = GetFile()->GetProjectFile())
873831
sharedCacheBuilder.AddProjectFolder(projectFile->GetFolder());
874-
auto entryCount = sharedCacheBuilder.GetCache().GetEntries().size();
832+
auto totalEntries = sharedCacheBuilder.GetCache().GetEntries().size();
875833
auto endTime = std::chrono::high_resolution_clock::now();
876834
std::chrono::duration<double> elapsed = endTime - startTime;
877-
m_logger->LogInfo("Processing %zu entries took %.3f seconds", entryCount, elapsed.count());
835+
m_logger->LogInfo("Processing %zu entries took %.3f seconds", totalEntries, elapsed.count());
878836

879837
// If we can't store all of our files for this cache in the accessor cache we might run into issues, warn the user.
880-
if (entryCount > FileAccessorCache::Global().GetCacheSize())
838+
if (totalEntries > FileAccessorCache::Global().GetCacheSize())
881839
m_logger->LogWarn("Cache contains more entries than the allowed number of opened file handles, this may impact reliability.");
882840

883-
// If we have less entries than the primary header subcache array count than let the user add another directory.
884-
const auto& cacheEntries = sharedCacheBuilder.GetCache().GetEntries();
885-
for (const auto& [_, entry] : cacheEntries)
841+
// Verify that we are not missing any entries that were stored in the metadata.
842+
// If we are that means we should alert the user that a previously associated cache entry is missing.
843+
std::set<std::string> missingCacheEntries = m_secondaryFileNames;
844+
uint64_t expectedFileCount = 1;
845+
for (const auto& entry : sharedCacheBuilder.GetCache().GetEntries())
886846
{
887-
if (entry.GetType() != CacheEntryType::Primary)
888-
continue;
889-
890-
auto requiredCount = entry.GetHeader().subCacheArrayCount;
891-
if (requiredCount <= entryCount)
892-
continue;
893-
894-
m_logger->LogWarnF("Opening with {} entries when shared cache header says there are {}, likely missing files, prompting user to select a directory containing the rest.", entryCount, requiredCount);
895-
// We don't have enough entries, prompt the user to select a directory with the rest.
896-
std::string supplementaryDir;
897-
if (GetDirectoryNameInput(supplementaryDir, "Directory with associated shared cache files"))
898-
{
899-
auto additionalEntries = sharedCacheBuilder.AddDirectory(primaryFileDir);
900-
m_logger->LogInfoF("Processed an additional {} entries...", additionalEntries);
901-
entryCount += additionalEntries;
902-
// If we are still below the count, just let the user know and continue.
903-
if (entryCount < requiredCount)
904-
m_logger->LogWarnF("Provided entry files still below the reported entry count in the shared cache header. Some functionality may be lost.");
905-
}
847+
missingCacheEntries.erase(entry.GetFileName());
848+
LogSecondaryFileName(entry.GetFileName());
849+
// Set the number of sub-caches so we can verify it later.
850+
if (entry.GetType() == CacheEntryType::Primary)
851+
expectedFileCount += entry.GetHeader().subCacheArrayCount;
906852
}
853+
for (const auto& missingFileName: missingCacheEntries)
854+
m_logger->LogErrorF("Secondary cache file '{}' is missing!", missingFileName);
855+
856+
// Verify that we have the required amount of sub-caches, if not alert the user.
857+
if (expectedFileCount == 1)
858+
m_logger->LogAlertF("Primary cache file '{}' has no sub-caches! You are likely opening a secondary cache file instead of a primary one.", m_primaryFileName);
859+
else if (totalEntries < expectedFileCount)
860+
m_logger->LogAlertF("Insufficient cache files in dyld header ({}/{}), loading as partial shared cache...", totalEntries, expectedFileCount);
907861
}
908862

909863
auto sharedCache = sharedCacheBuilder.Finalize();
@@ -987,3 +941,107 @@ bool SharedCacheView::InitController()
987941

988942
return true;
989943
}
944+
945+
void SharedCacheView::SetPrimaryFileName(std::string primaryFileName)
946+
{
947+
m_primaryFileName = std::move(primaryFileName);
948+
GetParentView()->StoreMetadata(VIEW_METADATA_KEY, GetMetadata());
949+
}
950+
951+
void SharedCacheView::LogSecondaryFileName(std::string secondaryFileName)
952+
{
953+
m_secondaryFileNames.insert(std::move(secondaryFileName));
954+
GetParentView()->StoreMetadata(VIEW_METADATA_KEY, GetMetadata());
955+
}
956+
957+
std::optional<std::string> SharedCacheView::GetPrimaryFilePath()
958+
{
959+
auto viewFile = GetFile();
960+
// 1. Try and get the primary file path using `GetOriginalFilename`.
961+
auto primaryFilePath = viewFile->GetOriginalFilename();
962+
963+
// 2. If the original file name is not a usable file path then prompt the user to select one.
964+
if (primaryFilePath.empty() || !std::filesystem::exists(primaryFilePath))
965+
{
966+
if (!GetOpenFileNameInput(primaryFilePath, "Please select the primary shared cache file"))
967+
return std::nullopt;
968+
SetPrimaryFileName(BaseFileName(primaryFilePath));
969+
// Update so next load we don't need to prompt the user.
970+
viewFile->SetOriginalFilename(primaryFilePath);
971+
}
972+
973+
// 3. If we are not in a project, we can go ahead and return the file path, it does not need to be resolved from project.
974+
auto primaryProjectFile = viewFile->GetProjectFile();
975+
if (!primaryProjectFile)
976+
return primaryFilePath;
977+
978+
auto project = primaryProjectFile->GetProject();
979+
auto primaryProjectFileName = primaryProjectFile->GetName();
980+
auto primaryProjectFilePath = primaryProjectFile->GetPathOnDisk();
981+
982+
// 4. If we are not a BNDB project file than we can return the path on disk as we are the primary file.
983+
if (primaryProjectFileName.find(".bndb") == std::string::npos)
984+
{
985+
// Set the primary file name to the project file name so on subsequent loads we can pick it up.
986+
SetPrimaryFileName(primaryProjectFileName);
987+
return primaryProjectFilePath;
988+
}
989+
990+
// 5. If we are a BNDB project file the path must be resolved from the file name.
991+
auto primaryProjectFileFolder = primaryProjectFile->GetFolder();
992+
for (const auto& pj : project->GetFiles())
993+
{
994+
// Skip files not in the same folder.
995+
if (!IsSameFolder(pj->GetFolder(), primaryProjectFileFolder))
996+
continue;
997+
// We are looking for the file with file name we stored in metadata.
998+
if (pj->GetName() != m_primaryFileName)
999+
continue;
1000+
return pj->GetPathOnDisk();
1001+
}
1002+
1003+
// 6. If we fail to resolve the project file given the `m_primaryFileName` than we fall back to asking the user.
1004+
std::string newPrimaryFilePath;
1005+
if (!GetOpenFileNameInput(newPrimaryFilePath, "Please select the primary shared cache file"))
1006+
return std::nullopt;
1007+
1008+
// TODO: We likely want to verify that the project file exists in the same directory as the BNDB.
1009+
// TODO: We currently require the database to exist in the same directory as the files.
1010+
// Update the primary file name for later loads, otherwise we would keep prompting to select a file.
1011+
primaryProjectFile = project->GetFileByPathOnDisk(newPrimaryFilePath);
1012+
SetPrimaryFileName(primaryProjectFile->GetName());
1013+
return newPrimaryFilePath;
1014+
}
1015+
1016+
Ref<Metadata> SharedCacheView::GetMetadata() const
1017+
{
1018+
std::map<std::string, Ref<Metadata>> viewMeta;
1019+
1020+
std::vector<std::string> secondaryFileNames;
1021+
secondaryFileNames.reserve(m_secondaryFileNames.size());
1022+
for (const auto& secondaryFileName : m_secondaryFileNames)
1023+
secondaryFileNames.push_back(secondaryFileName);
1024+
1025+
// TODO: Refactor this to just "cache files" which is a new struct of:
1026+
// TODO: cache file name
1027+
// TODO: cache file UUID
1028+
// TODO: cache file entry type?
1029+
viewMeta["secondaryFileNames"] = new Metadata(secondaryFileNames);
1030+
viewMeta["primaryFileName"] = new Metadata(m_primaryFileName);
1031+
1032+
return new Metadata(viewMeta);
1033+
}
1034+
1035+
void SharedCacheView::LoadMetadata(const Metadata &metadata)
1036+
{
1037+
auto viewMeta = metadata.GetKeyValueStore();
1038+
if (viewMeta.find("secondaryFileNames") != viewMeta.end())
1039+
{
1040+
const auto secondaryFileNames = viewMeta["secondaryFileNames"]->GetStringList();
1041+
for (const auto& secondaryFileName : secondaryFileNames)
1042+
m_secondaryFileNames.insert(secondaryFileName);
1043+
}
1044+
1045+
if (viewMeta.find("primaryFileName") != viewMeta.end())
1046+
m_primaryFileName = viewMeta["primaryFileName"]->GetString();
1047+
}

view/sharedcache/core/SharedCacheView.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,20 @@
77

88
#include <binaryninjaapi.h>
99

10+
static const char* VIEW_METADATA_KEY = "shared_cache_view";
1011

1112
class SharedCacheView : public BinaryNinja::BinaryView
1213
{
1314
bool m_parseOnly;
1415
BinaryNinja::Ref<BinaryNinja::Logger> m_logger;
1516

17+
// Restored primary file name from metadata, or the file name on first open.
18+
std::string m_primaryFileName;
19+
20+
// Restored associated file names from metadata, this is all the associated cache entries.
21+
// NOTE: Currently this is just used to alert the user to supposed missing files.
22+
std::set<std::string> m_secondaryFileNames;
23+
1624
public:
1725
SharedCacheView(const std::string& typeName, BinaryView* data, bool parseOnly = false);
1826

@@ -22,6 +30,19 @@ class SharedCacheView : public BinaryNinja::BinaryView
2230

2331
// Initialized the shared cache controller for this view. This is what allows us to load images and regions.
2432
bool InitController();
33+
34+
void SetPrimaryFileName(std::string primaryFileName);
35+
36+
// Logs the secondary file name to `m_secondaryFileNames`, see the note on the field about usage.
37+
void LogSecondaryFileName(std::string associatedFileName);
38+
39+
// Get the path to the primary file.
40+
std::optional<std::string> GetPrimaryFilePath();
41+
42+
// Get the metadata for saving the state of the shared cache.
43+
BinaryNinja::Ref<BinaryNinja::Metadata> GetMetadata() const;
44+
45+
void LoadMetadata(const BinaryNinja::Metadata& metadata);
2546
};
2647

2748

0 commit comments

Comments
 (0)