diff --git a/llvm/include/llvm/CAS/OnDiskGraphDB.h b/llvm/include/llvm/CAS/OnDiskGraphDB.h index 483c867730e9c..72f975d627c52 100644 --- a/llvm/include/llvm/CAS/OnDiskGraphDB.h +++ b/llvm/include/llvm/CAS/OnDiskGraphDB.h @@ -391,7 +391,8 @@ class OnDiskGraphDB { static ObjectID getExternalReference(const IndexProxy &I); void getStandalonePath(StringRef FileSuffix, const IndexProxy &I, - SmallVectorImpl &Path) const; + SmallVectorImpl &PersistentPath, + SmallVectorImpl *TempPath = nullptr) const; ArrayRef getDigest(InternalRef Ref) const; ArrayRef getDigest(const IndexProxy &I) const; diff --git a/llvm/lib/CAS/OnDiskGraphDB.cpp b/llvm/lib/CAS/OnDiskGraphDB.cpp index 99c9dd5f681c6..dc8c35d8a05fc 100644 --- a/llvm/lib/CAS/OnDiskGraphDB.cpp +++ b/llvm/lib/CAS/OnDiskGraphDB.cpp @@ -50,6 +50,7 @@ #include "llvm/CAS/OnDiskGraphDB.h" #include "OnDiskCommon.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StableHashing.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CAS/OnDiskCASLogger.h" #include "llvm/CAS/OnDiskHashMappedTrie.h" @@ -596,7 +597,17 @@ Error OnDiskGraphDB::TempFile::keep(const Twine &Name) { assert(!Done); Done = true; // Always try to close and rename. - std::error_code RenameEC = sys::fs::rename(TmpName, Name); + std::error_code RenameEC = sys::fs::create_directories( + sys::path::parent_path(Name.str()) + ); + + // We consider both the creation of the directory for the persistent storage + // and the final rename itself to be a failure to rename. This is consistent + // with other filesystem APIs that provide file manipulation APIs that will + // automatically construct directories as needed, so it seems like + // a reasonable choice. + if (!RenameEC) + RenameEC = sys::fs::rename(TmpName, Name); if (Logger) Logger->log_TempFile_keep(TmpName, Name.str(), RenameEC); @@ -607,6 +618,7 @@ Error OnDiskGraphDB::TempFile::keep(const Twine &Name) { sys::fs::file_t File = sys::fs::convertFDToNativeFile(FD); if (std::error_code EC = sys::fs::closeFile(File)) return errorCodeToError(EC); + FD = -1; return errorCodeToError(RenameEC); @@ -935,19 +947,22 @@ Error OnDiskGraphDB::validate(bool Deep, HashingFuncT Hasher) const { case TrieRecord::StorageKind::Standalone: case TrieRecord::StorageKind::StandaloneLeaf: case TrieRecord::StorageKind::StandaloneLeaf0: - SmallString<256> Path; - getStandalonePath(TrieRecord::getStandaloneFileSuffix(D.SK), I, Path); + SmallString<256> PersistentPath; + getStandalonePath(TrieRecord::getStandaloneFileSuffix(D.SK), I, + PersistentPath); // If need to validate the content of the file later, just load the // buffer here. Otherwise, just check the existance of the file. if (Deep) { - auto File = MemoryBuffer::getFile(Path, /*IsText=*/false, + auto File = MemoryBuffer::getFile(PersistentPath, /*IsText=*/false, /*RequiresNullTerminator=*/false); if (!File || !*File) - return formatError("record file \'" + Path + "\' does not exist"); + return formatError("record file \'" + PersistentPath + + "\' does not exist"); FileBuffer = std::move(*File); - } else if (!llvm::sys::fs::exists(Path)) - return formatError("record file \'" + Path + "\' does not exist"); + } else if (!llvm::sys::fs::exists(PersistentPath)) + return formatError("record file \'" + PersistentPath + + "\' does not exist"); } if (!Deep) @@ -1203,10 +1218,11 @@ OnDiskGraphDB::load(ObjectID ExternalRef) { // Note: Creation logic guarantees that data that needs null-termination is // suitably 0-padded. Requiring null-termination here would be too expensive // for extremely large objects that happen to be page-aligned. - SmallString<256> Path; - getStandalonePath(TrieRecord::getStandaloneFileSuffix(Object.SK), I, Path); + SmallString<256> PersistentPath; + getStandalonePath(TrieRecord::getStandaloneFileSuffix(Object.SK), I, + PersistentPath); ErrorOr> OwnedBuffer = MemoryBuffer::getFile( - Path, /*IsText=*/false, /*RequiresNullTerminator=*/false); + PersistentPath, /*IsText=*/false, /*RequiresNullTerminator=*/false); if (!OwnedBuffer) return createCorruptObjectError(getDigest(I)); @@ -1249,9 +1265,29 @@ InternalRef OnDiskGraphDB::makeInternalRef(FileOffset IndexOffset) { } void OnDiskGraphDB::getStandalonePath(StringRef Suffix, const IndexProxy &I, - SmallVectorImpl &Path) const { - Path.assign(RootPath.begin(), RootPath.end()); - sys::path::append(Path, FilePrefix + Twine(I.Offset.get()) + Suffix); + SmallVectorImpl &PersistentPath, + SmallVectorImpl *TempPath) const { + PersistentPath.assign(RootPath.begin(), RootPath.end()); + if (TempPath) + TempPath->assign(RootPath.begin(), RootPath.end()); + SmallVector FileNameBuffer; + StringRef FileName = + (FilePrefix + Twine(I.Offset.get()) + Suffix).toStringRef(FileNameBuffer); + + unsigned FileNameHash = stable_hash_name(FileName); + + // This is around 10 bits of entropy given we're using radix-36 + static const unsigned IntermediateDirNameLength = 2; + static const char Radix36Chars[] = "0123456789abcdefghijklmnopqrstuvwxyz"; + static_assert(sizeof(Radix36Chars) == 36 + /* null */ 1); + SmallString IntermediateDirName; + for (unsigned Ch = 0; Ch < IntermediateDirNameLength; ++Ch) { + IntermediateDirName.push_back(Radix36Chars[FileNameHash % 36]); + FileNameHash /= 36; + } + sys::path::append(PersistentPath, IntermediateDirName, FileName); + if (TempPath) + sys::path::append(*TempPath, FileName); } OnDiskContent OnDiskGraphDB::getContentFromHandle(ObjectHandle OH) const { @@ -1339,13 +1375,15 @@ Error OnDiskGraphDB::createStandaloneLeaf(IndexProxy &I, ArrayRef Data) { TrieRecord::StorageKind SK = Leaf0 ? TrieRecord::StorageKind::StandaloneLeaf0 : TrieRecord::StorageKind::StandaloneLeaf; - SmallString<256> Path; + SmallString<256> PersistentPath; + SmallString<256> TempPath; int64_t FileSize = Data.size() + Leaf0; - getStandalonePath(TrieRecord::getStandaloneFileSuffix(SK), I, Path); + getStandalonePath(TrieRecord::getStandaloneFileSuffix(SK), I, PersistentPath, + &TempPath); // Write the file. Don't reuse this mapped_file_region, which is read/write. // Let load() pull up one that's read-only. - Expected File = createTempFile(Path, FileSize); + Expected File = createTempFile(TempPath, FileSize); if (!File) return File.takeError(); assert(File->size() == (uint64_t)FileSize); @@ -1353,7 +1391,7 @@ Error OnDiskGraphDB::createStandaloneLeaf(IndexProxy &I, ArrayRef Data) { if (Leaf0) File->data()[Data.size()] = 0; assert(File->data()[Data.size()] == 0); - if (Error E = File->keep(Path)) + if (Error E = File->keep(PersistentPath)) return E; // Store the object reference. @@ -1402,14 +1440,15 @@ Error OnDiskGraphDB::store(ObjectID ID, ArrayRef Refs, // Compute the storage kind, allocate it, and create the record. TrieRecord::StorageKind SK = TrieRecord::StorageKind::Unknown; FileOffset PoolOffset; - SmallString<256> Path; + SmallString<256> PersistentPath; + SmallString<256> TempPath; std::optional File; std::optional FileSize; auto AllocStandaloneFile = [&](size_t Size) -> Expected { getStandalonePath(TrieRecord::getStandaloneFileSuffix( TrieRecord::StorageKind::Standalone), - I, Path); - if (Error E = createTempFile(Path, Size).moveInto(File)) + I, PersistentPath, &TempPath); + if (Error E = createTempFile(TempPath, Size).moveInto(File)) return std::move(E); assert(File->size() == Size); FileSize = Size; @@ -1463,7 +1502,7 @@ Error OnDiskGraphDB::store(ObjectID ID, ArrayRef Refs, if (File) { if (Existing.SK == TrieRecord::StorageKind::Unknown) { // Keep the file! - if (Error E = File->keep(Path)) + if (Error E = File->keep(PersistentPath)) return E; } else { File.reset();