From 933daf279045ac043ef8c3ecec8af7e907403a5d Mon Sep 17 00:00:00 2001 From: Oliver Hunt Date: Fri, 1 Aug 2025 16:59:45 -0700 Subject: [PATCH] [llvm][cas] Fan out persistent storage to reduce filesystem load The CAS OnDiskGraphDB backend currently uses a single directory for persistent storage. Over time this can lead to a very large number of files accumulating in one directory. While the point at which this overhead becomes significant on any given file system, all of them eventually reach a point where the number of entries in a single directory starts to seriously degrade the performance of an wide array of operations performed on the entries within that directory or the directory itself. In this PR we introduce an intermediate layer of subdirectories in order to spread the individual persistent files over a large number of different directories. This is the same general approach that has been taken by a wide array of other applications for exactly the same reasons. The PR currently uses 2 radix-36 characters giving an approximately 10 bit fan out. There does not seem to be a consistent bias in favour of wider or deeper fan outs, but for now a relatively wide and shallow approach seems reasonable. We current choose not to use a fan out for temporary files mostly for practical reasons. When fanning out across subdirs we need to ensure that the subdirectories exist, LLVM's current APIs for constructing temporary files do not provide a mechanism for automatically building the required directories. While we could add such functionality, that may actually hinder performance rather than helping: temporary files created by llvm are just that, and so are cleaned up at the end of execution. As a result the risk of large numbers of files accumulating is relatively low. At the same time the additional file system work needed to check for and then create new directory entries is relatively high. --- llvm/include/llvm/CAS/OnDiskGraphDB.h | 3 +- llvm/lib/CAS/OnDiskGraphDB.cpp | 81 ++++++++++++++++++++------- 2 files changed, 62 insertions(+), 22 deletions(-) diff --git a/llvm/include/llvm/CAS/OnDiskGraphDB.h b/llvm/include/llvm/CAS/OnDiskGraphDB.h index 483c867730e9c..72f975d627c52 100644 --- a/llvm/include/llvm/CAS/OnDiskGraphDB.h +++ b/llvm/include/llvm/CAS/OnDiskGraphDB.h @@ -391,7 +391,8 @@ class OnDiskGraphDB { static ObjectID getExternalReference(const IndexProxy &I); void getStandalonePath(StringRef FileSuffix, const IndexProxy &I, - SmallVectorImpl &Path) const; + SmallVectorImpl &PersistentPath, + SmallVectorImpl *TempPath = nullptr) const; ArrayRef getDigest(InternalRef Ref) const; ArrayRef getDigest(const IndexProxy &I) const; diff --git a/llvm/lib/CAS/OnDiskGraphDB.cpp b/llvm/lib/CAS/OnDiskGraphDB.cpp index 99c9dd5f681c6..dc8c35d8a05fc 100644 --- a/llvm/lib/CAS/OnDiskGraphDB.cpp +++ b/llvm/lib/CAS/OnDiskGraphDB.cpp @@ -50,6 +50,7 @@ #include "llvm/CAS/OnDiskGraphDB.h" #include "OnDiskCommon.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StableHashing.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CAS/OnDiskCASLogger.h" #include "llvm/CAS/OnDiskHashMappedTrie.h" @@ -596,7 +597,17 @@ Error OnDiskGraphDB::TempFile::keep(const Twine &Name) { assert(!Done); Done = true; // Always try to close and rename. - std::error_code RenameEC = sys::fs::rename(TmpName, Name); + std::error_code RenameEC = sys::fs::create_directories( + sys::path::parent_path(Name.str()) + ); + + // We consider both the creation of the directory for the persistent storage + // and the final rename itself to be a failure to rename. This is consistent + // with other filesystem APIs that provide file manipulation APIs that will + // automatically construct directories as needed, so it seems like + // a reasonable choice. + if (!RenameEC) + RenameEC = sys::fs::rename(TmpName, Name); if (Logger) Logger->log_TempFile_keep(TmpName, Name.str(), RenameEC); @@ -607,6 +618,7 @@ Error OnDiskGraphDB::TempFile::keep(const Twine &Name) { sys::fs::file_t File = sys::fs::convertFDToNativeFile(FD); if (std::error_code EC = sys::fs::closeFile(File)) return errorCodeToError(EC); + FD = -1; return errorCodeToError(RenameEC); @@ -935,19 +947,22 @@ Error OnDiskGraphDB::validate(bool Deep, HashingFuncT Hasher) const { case TrieRecord::StorageKind::Standalone: case TrieRecord::StorageKind::StandaloneLeaf: case TrieRecord::StorageKind::StandaloneLeaf0: - SmallString<256> Path; - getStandalonePath(TrieRecord::getStandaloneFileSuffix(D.SK), I, Path); + SmallString<256> PersistentPath; + getStandalonePath(TrieRecord::getStandaloneFileSuffix(D.SK), I, + PersistentPath); // If need to validate the content of the file later, just load the // buffer here. Otherwise, just check the existance of the file. if (Deep) { - auto File = MemoryBuffer::getFile(Path, /*IsText=*/false, + auto File = MemoryBuffer::getFile(PersistentPath, /*IsText=*/false, /*RequiresNullTerminator=*/false); if (!File || !*File) - return formatError("record file \'" + Path + "\' does not exist"); + return formatError("record file \'" + PersistentPath + + "\' does not exist"); FileBuffer = std::move(*File); - } else if (!llvm::sys::fs::exists(Path)) - return formatError("record file \'" + Path + "\' does not exist"); + } else if (!llvm::sys::fs::exists(PersistentPath)) + return formatError("record file \'" + PersistentPath + + "\' does not exist"); } if (!Deep) @@ -1203,10 +1218,11 @@ OnDiskGraphDB::load(ObjectID ExternalRef) { // Note: Creation logic guarantees that data that needs null-termination is // suitably 0-padded. Requiring null-termination here would be too expensive // for extremely large objects that happen to be page-aligned. - SmallString<256> Path; - getStandalonePath(TrieRecord::getStandaloneFileSuffix(Object.SK), I, Path); + SmallString<256> PersistentPath; + getStandalonePath(TrieRecord::getStandaloneFileSuffix(Object.SK), I, + PersistentPath); ErrorOr> OwnedBuffer = MemoryBuffer::getFile( - Path, /*IsText=*/false, /*RequiresNullTerminator=*/false); + PersistentPath, /*IsText=*/false, /*RequiresNullTerminator=*/false); if (!OwnedBuffer) return createCorruptObjectError(getDigest(I)); @@ -1249,9 +1265,29 @@ InternalRef OnDiskGraphDB::makeInternalRef(FileOffset IndexOffset) { } void OnDiskGraphDB::getStandalonePath(StringRef Suffix, const IndexProxy &I, - SmallVectorImpl &Path) const { - Path.assign(RootPath.begin(), RootPath.end()); - sys::path::append(Path, FilePrefix + Twine(I.Offset.get()) + Suffix); + SmallVectorImpl &PersistentPath, + SmallVectorImpl *TempPath) const { + PersistentPath.assign(RootPath.begin(), RootPath.end()); + if (TempPath) + TempPath->assign(RootPath.begin(), RootPath.end()); + SmallVector FileNameBuffer; + StringRef FileName = + (FilePrefix + Twine(I.Offset.get()) + Suffix).toStringRef(FileNameBuffer); + + unsigned FileNameHash = stable_hash_name(FileName); + + // This is around 10 bits of entropy given we're using radix-36 + static const unsigned IntermediateDirNameLength = 2; + static const char Radix36Chars[] = "0123456789abcdefghijklmnopqrstuvwxyz"; + static_assert(sizeof(Radix36Chars) == 36 + /* null */ 1); + SmallString IntermediateDirName; + for (unsigned Ch = 0; Ch < IntermediateDirNameLength; ++Ch) { + IntermediateDirName.push_back(Radix36Chars[FileNameHash % 36]); + FileNameHash /= 36; + } + sys::path::append(PersistentPath, IntermediateDirName, FileName); + if (TempPath) + sys::path::append(*TempPath, FileName); } OnDiskContent OnDiskGraphDB::getContentFromHandle(ObjectHandle OH) const { @@ -1339,13 +1375,15 @@ Error OnDiskGraphDB::createStandaloneLeaf(IndexProxy &I, ArrayRef Data) { TrieRecord::StorageKind SK = Leaf0 ? TrieRecord::StorageKind::StandaloneLeaf0 : TrieRecord::StorageKind::StandaloneLeaf; - SmallString<256> Path; + SmallString<256> PersistentPath; + SmallString<256> TempPath; int64_t FileSize = Data.size() + Leaf0; - getStandalonePath(TrieRecord::getStandaloneFileSuffix(SK), I, Path); + getStandalonePath(TrieRecord::getStandaloneFileSuffix(SK), I, PersistentPath, + &TempPath); // Write the file. Don't reuse this mapped_file_region, which is read/write. // Let load() pull up one that's read-only. - Expected File = createTempFile(Path, FileSize); + Expected File = createTempFile(TempPath, FileSize); if (!File) return File.takeError(); assert(File->size() == (uint64_t)FileSize); @@ -1353,7 +1391,7 @@ Error OnDiskGraphDB::createStandaloneLeaf(IndexProxy &I, ArrayRef Data) { if (Leaf0) File->data()[Data.size()] = 0; assert(File->data()[Data.size()] == 0); - if (Error E = File->keep(Path)) + if (Error E = File->keep(PersistentPath)) return E; // Store the object reference. @@ -1402,14 +1440,15 @@ Error OnDiskGraphDB::store(ObjectID ID, ArrayRef Refs, // Compute the storage kind, allocate it, and create the record. TrieRecord::StorageKind SK = TrieRecord::StorageKind::Unknown; FileOffset PoolOffset; - SmallString<256> Path; + SmallString<256> PersistentPath; + SmallString<256> TempPath; std::optional File; std::optional FileSize; auto AllocStandaloneFile = [&](size_t Size) -> Expected { getStandalonePath(TrieRecord::getStandaloneFileSuffix( TrieRecord::StorageKind::Standalone), - I, Path); - if (Error E = createTempFile(Path, Size).moveInto(File)) + I, PersistentPath, &TempPath); + if (Error E = createTempFile(TempPath, Size).moveInto(File)) return std::move(E); assert(File->size() == Size); FileSize = Size; @@ -1463,7 +1502,7 @@ Error OnDiskGraphDB::store(ObjectID ID, ArrayRef Refs, if (File) { if (Existing.SK == TrieRecord::StorageKind::Unknown) { // Keep the file! - if (Error E = File->keep(Path)) + if (Error E = File->keep(PersistentPath)) return E; } else { File.reset();