From 843042139e3732e862e29db0ff32a169e3a5f471 Mon Sep 17 00:00:00 2001 From: Amr Hesham Date: Thu, 6 Feb 2025 13:11:11 +0100 Subject: [PATCH 1/8] [LLVM][Support] Add new CreateFileError functions (#125906) Add new CreateFileError functions to create a StringError with the specified error code and prepend the file path to it Needed for: #125345 (cherry picked from commit 2464f4ba6e0e50bb30c31b6526fa0bdd5a531217) --- llvm/include/llvm/Support/Error.h | 17 +++++++++++++++++ llvm/unittests/Support/ErrorTest.cpp | 11 +++++++++++ 2 files changed, 28 insertions(+) diff --git a/llvm/include/llvm/Support/Error.h b/llvm/include/llvm/Support/Error.h index cb06ac19f0bb7..cb02967655cca 100644 --- a/llvm/include/llvm/Support/Error.h +++ b/llvm/include/llvm/Support/Error.h @@ -1399,6 +1399,23 @@ inline Error createFileError(const Twine &F, size_t Line, std::error_code EC) { return createFileError(F, Line, errorCodeToError(EC)); } +/// Create a StringError with the specified error code and prepend the file path +/// to it. +inline Error createFileError(const Twine &F, std::error_code EC, + const Twine &S) { + Error E = createStringError(EC, S); + return createFileError(F, std::move(E)); +} + +/// Create a StringError with the specified error code and prepend the file path +/// to it. +template +inline Error createFileError(const Twine &F, std::error_code EC, + char const *Fmt, const Ts &...Vals) { + Error E = createStringError(EC, Fmt, Vals...); + return createFileError(F, std::move(E)); +} + Error createFileError(const Twine &F, ErrorSuccess) = delete; /// Helper for check-and-exit error handling. diff --git a/llvm/unittests/Support/ErrorTest.cpp b/llvm/unittests/Support/ErrorTest.cpp index bd098a4988dc5..b5b1c70d3e532 100644 --- a/llvm/unittests/Support/ErrorTest.cpp +++ b/llvm/unittests/Support/ErrorTest.cpp @@ -976,6 +976,17 @@ TEST(Error, FileErrorTest) { handleAllErrors(std::move(FE6), [](std::unique_ptr F) { EXPECT_EQ(F->messageWithoutFileInfo(), "CustomError {6}"); }); + + Error FE7 = + createFileError("file.bin", make_error_code(std::errc::invalid_argument), + "invalid argument"); + EXPECT_EQ(toString(std::move(FE7)), "'file.bin': invalid argument"); + + StringRef Argument = "arg"; + Error FE8 = + createFileError("file.bin", make_error_code(std::errc::invalid_argument), + "invalid argument '%s'", Argument.str().c_str()); + EXPECT_EQ(toString(std::move(FE8)), "'file.bin': invalid argument 'arg'"); } TEST(Error, FileErrorErrorCode) { From d2b9a1b63d868860abf022b5ddb0189138172ce4 Mon Sep 17 00:00:00 2001 From: Ben Langmuir Date: Wed, 2 Apr 2025 09:25:38 -0700 Subject: [PATCH 2/8] [llvm][cas] Extend on-disk CAS validation to ActionCache Validate the ActionCache hash-mapped trie structure and sanity check the resulting values. Unlike the CAS itself there is no direct way to check the values are "correct", but at least we can check for invalid zero offsets, which is what we would get if we dropped page writes or truncated the file. (cherry picked from commit 2966de47a493297f4ebc8dd1380fd53d9aed9944) --- llvm/include/llvm/CAS/ActionCache.h | 3 +++ llvm/include/llvm/CAS/OnDiskKeyValueDB.h | 3 +++ llvm/include/llvm/CAS/UnifiedOnDiskCache.h | 2 ++ llvm/lib/CAS/ActionCaches.cpp | 18 ++++++++++++++++ llvm/lib/CAS/OnDiskKeyValueDB.cpp | 22 +++++++++++++++++++ llvm/lib/CAS/PluginAPI.h | 2 ++ llvm/lib/CAS/PluginAPI_functions.def | 1 + llvm/lib/CAS/PluginCAS.cpp | 12 +++++++++++ llvm/lib/CAS/UnifiedOnDiskCache.cpp | 25 ++++++++++++++++++++++ llvm/test/tools/llvm-cas/validation.test | 14 ++++++++++++ llvm/tools/llvm-cas/llvm-cas.cpp | 7 +++--- 11 files changed, 106 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/CAS/ActionCache.h b/llvm/include/llvm/CAS/ActionCache.h index f548fb0522480..42236c9b5b45f 100644 --- a/llvm/include/llvm/CAS/ActionCache.h +++ b/llvm/include/llvm/CAS/ActionCache.h @@ -114,6 +114,9 @@ class ActionCache { Globally, std::move(Callback), CancelObj); } + /// Validate the ActionCache contents. + virtual Error validate() const = 0; + virtual ~ActionCache() = default; protected: diff --git a/llvm/include/llvm/CAS/OnDiskKeyValueDB.h b/llvm/include/llvm/CAS/OnDiskKeyValueDB.h index 94ccd728519c2..35c574aacb573 100644 --- a/llvm/include/llvm/CAS/OnDiskKeyValueDB.h +++ b/llvm/include/llvm/CAS/OnDiskKeyValueDB.h @@ -60,6 +60,9 @@ class OnDiskKeyValueDB { StringRef ValueName, size_t ValueSize, std::shared_ptr Logger = nullptr); + using CheckValueT = function_ref)>; + Error validate(CheckValueT CheckValue) const; + private: OnDiskKeyValueDB(size_t ValueSize, OnDiskHashMappedTrie Cache) : ValueSize(ValueSize), Cache(std::move(Cache)) {} diff --git a/llvm/include/llvm/CAS/UnifiedOnDiskCache.h b/llvm/include/llvm/CAS/UnifiedOnDiskCache.h index be1880d6b3023..0e658b4dd4090 100644 --- a/llvm/include/llvm/CAS/UnifiedOnDiskCache.h +++ b/llvm/include/llvm/CAS/UnifiedOnDiskCache.h @@ -124,6 +124,8 @@ class UnifiedOnDiskCache { ~UnifiedOnDiskCache(); + Error validateActionCache(); + private: UnifiedOnDiskCache(); diff --git a/llvm/lib/CAS/ActionCaches.cpp b/llvm/lib/CAS/ActionCaches.cpp index 7f66ad843ea78..83891b4215954 100644 --- a/llvm/lib/CAS/ActionCaches.cpp +++ b/llvm/lib/CAS/ActionCaches.cpp @@ -52,6 +52,10 @@ class InMemoryActionCache final : public ActionCache { Expected> getImpl(ArrayRef ActionKey, bool Globally) const final; + Error validate() const final { + return createStringError("InMemoryActionCache doesn't support validate()"); + } + private: using DataT = CacheEntry; using InMemoryCacheT = ThreadSafeHashMappedTrie; @@ -68,6 +72,8 @@ class OnDiskActionCache final : public ActionCache { static Expected> create(StringRef Path); + Error validate() const final; + private: static StringRef getHashName() { return "BLAKE3"; } @@ -86,6 +92,8 @@ class UnifiedOnDiskActionCache final : public ActionCache { UnifiedOnDiskActionCache(std::shared_ptr UniDB); + Error validate() const final; + private: std::shared_ptr UniDB; }; @@ -198,6 +206,12 @@ Error OnDiskActionCache::putImpl(ArrayRef Key, const CASID &Result, ArrayRef((const uint8_t *)Observed.data(), Observed.size())); } +Error OnDiskActionCache::validate() const { + // FIXME: without the matching CAS there is nothing we can check about the + // cached values. The hash size is already validated by the DB validator. + return DB->validate(nullptr); +} + UnifiedOnDiskActionCache::UnifiedOnDiskActionCache( std::shared_ptr UniDB) : ActionCache(builtin::BuiltinCASContext::getDefaultContext()), @@ -233,6 +247,10 @@ Error UnifiedOnDiskActionCache::putImpl(ArrayRef Key, UniDB->getGraphDB().getDigest(*Observed)); } +Error UnifiedOnDiskActionCache::validate() const { + return UniDB->validateActionCache(); +} + Expected> cas::createOnDiskActionCache(StringRef Path) { #if LLVM_ENABLE_ONDISK_CAS diff --git a/llvm/lib/CAS/OnDiskKeyValueDB.cpp b/llvm/lib/CAS/OnDiskKeyValueDB.cpp index ec410fdc4de1b..f3a0e863d1dab 100644 --- a/llvm/lib/CAS/OnDiskKeyValueDB.cpp +++ b/llvm/lib/CAS/OnDiskKeyValueDB.cpp @@ -81,3 +81,25 @@ OnDiskKeyValueDB::open(StringRef Path, StringRef HashName, unsigned KeySize, return std::unique_ptr( new OnDiskKeyValueDB(ValueSize, std::move(*ActionCache))); } + +Error OnDiskKeyValueDB::validate(CheckValueT CheckValue) const { + return Cache.validate( + [&](FileOffset Offset, + OnDiskHashMappedTrie::ConstValueProxy Record) -> Error { + auto formatError = [&](Twine Msg) { + return createStringError( + llvm::errc::illegal_byte_sequence, + "bad cache value at 0x" + + utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) + ": " + + Msg.str()); + }; + + if (Record.Data.size() != ValueSize) + return formatError("wrong cache value size"); + if (!isAligned(Align(8), Record.Data.size())) + return formatError("wrong cache value alignment"); + if (CheckValue) + return CheckValue(Offset, Record.Data); + return Error::success(); + }); +} \ No newline at end of file diff --git a/llvm/lib/CAS/PluginAPI.h b/llvm/lib/CAS/PluginAPI.h index bf019748da829..ec5413a9834a8 100644 --- a/llvm/lib/CAS/PluginAPI.h +++ b/llvm/lib/CAS/PluginAPI.h @@ -108,6 +108,8 @@ struct llcas_functions_t { bool globally, void *ctx_cb, llcas_actioncache_put_cb, llcas_cancellable_t *); + + bool (*actioncache_validate)(llcas_cas_t, char **error); }; #endif // LLVM_LIB_CAS_PLUGINAPI_H diff --git a/llvm/lib/CAS/PluginAPI_functions.def b/llvm/lib/CAS/PluginAPI_functions.def index 3dca709689d4a..f242611bdd603 100644 --- a/llvm/lib/CAS/PluginAPI_functions.def +++ b/llvm/lib/CAS/PluginAPI_functions.def @@ -7,6 +7,7 @@ CASPLUGINAPI_FUNCTION(actioncache_get_for_digest, true) CASPLUGINAPI_FUNCTION(actioncache_get_for_digest_async, true) CASPLUGINAPI_FUNCTION(actioncache_put_for_digest, true) CASPLUGINAPI_FUNCTION(actioncache_put_for_digest_async, true) +CASPLUGINAPI_FUNCTION(actioncache_validate, false) CASPLUGINAPI_FUNCTION(cancellable_cancel, false) CASPLUGINAPI_FUNCTION(cancellable_dispose, false) CASPLUGINAPI_FUNCTION(cas_contains_object, true) diff --git a/llvm/lib/CAS/PluginCAS.cpp b/llvm/lib/CAS/PluginCAS.cpp index f27be05711495..9d9553160bdd1 100644 --- a/llvm/lib/CAS/PluginCAS.cpp +++ b/llvm/lib/CAS/PluginCAS.cpp @@ -461,6 +461,8 @@ class PluginActionCache : public ActionCache { PluginActionCache(std::shared_ptr); + Error validate() const final; + private: std::shared_ptr Ctx; }; @@ -596,6 +598,16 @@ void PluginActionCache::putImplAsync(ArrayRef ResolvedKey, PluginActionCache::PluginActionCache(std::shared_ptr CASCtx) : ActionCache(*CASCtx), Ctx(std::move(CASCtx)) {} +Error PluginActionCache::validate() const { + if (Ctx->Functions.actioncache_validate) { + char *c_err = nullptr; + if (Ctx->Functions.actioncache_validate(Ctx->c_cas, &c_err)) + return Ctx->errorAndDispose(c_err); + return Error::success(); + } + return createStringError("plugin action cache doesn't support validation"); +} + //===----------------------------------------------------------------------===// // createPluginCASDatabases API //===----------------------------------------------------------------------===// diff --git a/llvm/lib/CAS/UnifiedOnDiskCache.cpp b/llvm/lib/CAS/UnifiedOnDiskCache.cpp index 632456f8f7c03..9f86e53ac84d7 100644 --- a/llvm/lib/CAS/UnifiedOnDiskCache.cpp +++ b/llvm/lib/CAS/UnifiedOnDiskCache.cpp @@ -52,6 +52,7 @@ #include "llvm/CAS/UnifiedOnDiskCache.h" #include "OnDiskCommon.h" #include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/CAS/OnDiskCASLogger.h" #include "llvm/CAS/OnDiskKeyValueDB.h" #include "llvm/Support/Compiler.h" @@ -123,6 +124,30 @@ UnifiedOnDiskCache::faultInFromUpstreamKV(ArrayRef Key) { return KVPut(Key, *PrimaryID); } +Error UnifiedOnDiskCache::validateActionCache() { + auto ValidateRef = [&](FileOffset Offset, ArrayRef Value) -> Error { + assert(Value.size() == sizeof(uint64_t) && "should be validated already"); + auto ID = ObjectID::fromOpaqueData(support::endian::read64le(Value.data())); + auto formatError = [&](Twine Msg) { + return createStringError( + llvm::errc::illegal_byte_sequence, + "bad record at 0x" + + utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) + ": " + + Msg.str()); + }; + if (ID.getOpaqueData() == 0) + return formatError("zero is not a valid ref"); + if (!PrimaryGraphDB->containsObject(ID)) + return formatError("cas does not contain ref"); + return Error::success(); + }; + if (Error E = PrimaryKVDB->validate(ValidateRef)) + return E; + if (UpstreamKVDB) + return UpstreamKVDB->validate(ValidateRef); + return Error::success(); +} + /// \returns all the 'v.' names of sub-directories, sorted with /// ascending order of the integer after the dot. static Error getAllDBDirs(StringRef Path, diff --git a/llvm/test/tools/llvm-cas/validation.test b/llvm/test/tools/llvm-cas/validation.test index b29d37f49422b..a60ee05c8d569 100644 --- a/llvm/test/tools/llvm-cas/validation.test +++ b/llvm/test/tools/llvm-cas/validation.test @@ -9,3 +9,17 @@ RUN: rm %t/cas/v1.1/v8.data RUN: not llvm-cas --cas %t/cas --validate RUN: not llvm-cas --cas %t/cas --validate --check-hash +RUN: mkdir %t/ac + +RUN: llvm-cas --cas %t/ac --make-blob \ +RUN: --data /dev/null > %t/empty.casid +RUN: echo "abc" | \ +RUN: llvm-cas --cas %t/ac --make-blob \ +RUN: --data - >%t/abc.casid + +RUN: llvm-cas --cas %t/ac --put-cache-key @%t/abc.casid @%t/empty.casid +RUN: llvm-cas --cas %t/ac --validate +# Note: records are 40 bytes (32 hash bytes + 8 byte value), so trim the last +# allocated record, leaving it invalid. +RUN: truncate -s -40 %t/ac/v1.1/v3.actions +RUN: not llvm-cas --cas %t/ac --validate diff --git a/llvm/tools/llvm-cas/llvm-cas.cpp b/llvm/tools/llvm-cas/llvm-cas.cpp index b804edc07a783..64245eb1342f5 100644 --- a/llvm/tools/llvm-cas/llvm-cas.cpp +++ b/llvm/tools/llvm-cas/llvm-cas.cpp @@ -64,7 +64,7 @@ static int putCacheKey(ObjectStore &CAS, ActionCache &AC, ArrayRef Objects); static int getCacheResult(ObjectStore &CAS, ActionCache &AC, const CASID &ID); static int validateObject(ObjectStore &CAS, const CASID &ID); -static int validate(ObjectStore &CAS, bool CheckHash); +static int validate(ObjectStore &CAS, ActionCache &AC, bool CheckHash); static int ingestCasIDFile(cas::ObjectStore &CAS, ArrayRef CASIDs); static int checkLockFiles(StringRef CASPath); @@ -184,7 +184,7 @@ int main(int Argc, char **Argv) { return dump(*CAS); if (Command == Validate) - return validate(*CAS, CheckHash); + return validate(*CAS, *AC, CheckHash); if (Command == MakeBlob) return makeBlob(*CAS, DataPath); @@ -722,9 +722,10 @@ int validateObject(ObjectStore &CAS, const CASID &ID) { return 0; } -int validate(ObjectStore &CAS, bool CheckHash) { +int validate(ObjectStore &CAS, ActionCache &AC, bool CheckHash) { ExitOnError ExitOnErr("llvm-cas: validate: "); ExitOnErr(CAS.validate(CheckHash)); + ExitOnErr(AC.validate()); outs() << "validated successfully\n"; return 0; } From 9fd0c15c161ec401082fc77376a8d4cb1c290997 Mon Sep 17 00:00:00 2001 From: Ben Langmuir Date: Mon, 7 Apr 2025 09:22:45 -0700 Subject: [PATCH 3/8] [llvm][cas] Fix build with LLVM_CAS_ENABLE_REMOTE_CACHE (ActionCache::validate) (cherry picked from commit eb2d1eaa96fa17c8626d19eab8973eee8fb851c3) --- llvm/lib/RemoteCachingService/CAS/GRPCRelayCAS.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/llvm/lib/RemoteCachingService/CAS/GRPCRelayCAS.cpp b/llvm/lib/RemoteCachingService/CAS/GRPCRelayCAS.cpp index 6665ad728162f..607414c92430d 100644 --- a/llvm/lib/RemoteCachingService/CAS/GRPCRelayCAS.cpp +++ b/llvm/lib/RemoteCachingService/CAS/GRPCRelayCAS.cpp @@ -220,6 +220,11 @@ class GRPCActionCache : public ActionCache { Error putImpl(ArrayRef ResolvedKey, const CASID &Result, bool Globally) final; + Error validate() const final { + // Not supported yet. Always return success. + return Error::success(); + } + private: std::unique_ptr KVDB; }; From a316c4eea85839e5a8261ed18664787ee014d8c5 Mon Sep 17 00:00:00 2001 From: Ben Langmuir Date: Wed, 2 Apr 2025 15:11:16 -0700 Subject: [PATCH 4/8] [llvm][cas] Add validate-if-needed to recover from invalid data Introduce a new validate-if-needed API to the UnifiedOnDiskCache and llvm-cas tool that triggers out-of-process validation of the CAS once for every machine boot, and optionally recovers from invalid data by marking it for garbage collection. This fixes a hole in the CAS data coherence when a power loss or similar failure causes the OS to not flush all of the pages in the mmaped on-disk CAS files. The intent is that clients such as the clang scanning daemon or a build system should trigger this validation at least once before using the CAS. rdar://123542312 (cherry picked from commit 17aa9cf45731f7e8f143105dac2ed1fde1c838e8) --- .../llvm/CAS/BuiltinUnifiedCASDatabases.h | 33 ++ llvm/include/llvm/CAS/OnDiskCASLogger.h | 5 + llvm/include/llvm/CAS/UnifiedOnDiskCache.h | 29 ++ llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp | 13 + llvm/lib/CAS/OnDiskCASLogger.cpp | 19 ++ llvm/lib/CAS/UnifiedOnDiskCache.cpp | 294 +++++++++++++++++- llvm/test/CAS/logging.test | 6 + llvm/test/CAS/validate-if-needed.test | 43 +++ llvm/tools/llvm-cas/llvm-cas.cpp | 64 +++- 9 files changed, 495 insertions(+), 11 deletions(-) create mode 100644 llvm/test/CAS/validate-if-needed.test diff --git a/llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h b/llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h index 969d097b6ceca..c374a3aaec04f 100644 --- a/llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h +++ b/llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h @@ -21,6 +21,39 @@ class ObjectStore; Expected, std::unique_ptr>> createOnDiskUnifiedCASDatabases(StringRef Path); +/// Represents the result of validating the contents using +/// \c validateOnDiskUnifiedCASDatabasesIfNeeded. +/// +/// Note: invalid results are handled as an \c Error. +enum class ValidationResult { + /// The data is already valid. + Valid, + /// The data was invalid, but was recovered. + RecoveredValid, + /// Validation was skipped, as it was not needed. + Skipped, +}; + +/// Validate the data in \p Path, if needed to ensure correctness. +/// +/// \param Path directory for the on-disk database. +/// \param CheckHash Whether to validate hashes match the data. +/// \param AllowRecovery Whether to automatically recover from invalid data by +/// marking the files for garbage collection. +/// \param ForceValidation Whether to force validation to occur even if it +/// should not be necessary. +/// \param LLVMCasBinary If provided, validation is performed out-of-process +/// using the given \c llvm-cas executable which protects against crashes +/// during validation. Otherwise validation is performed in-process. +/// +/// \returns \c Valid if the data is already valid, \c RecoveredValid if data +/// was invalid but has been cleared, \c Skipped if validation is not needed, +/// or an \c Error if validation cannot be performed or if the data is left +/// in an invalid state because \p AllowRecovery is false. +Expected validateOnDiskUnifiedCASDatabasesIfNeeded( + StringRef Path, bool CheckHash, bool AllowRecovery, bool ForceValidation, + std::optional LLVMCasBinary); + } // namespace llvm::cas #endif // LLVM_CAS_BUILTINUNIFIEDCASDATABASES_H diff --git a/llvm/include/llvm/CAS/OnDiskCASLogger.h b/llvm/include/llvm/CAS/OnDiskCASLogger.h index 0ceb4d9ad7836..e0c21b771b11d 100644 --- a/llvm/include/llvm/CAS/OnDiskCASLogger.h +++ b/llvm/include/llvm/CAS/OnDiskCASLogger.h @@ -62,6 +62,11 @@ class OnDiskCASLogger { void log_MappedFileRegionBumpPtr_allocate(void *Region, TrieOffset Off, size_t Size); void log_UnifiedOnDiskCache_collectGarbage(StringRef Path); + void log_UnifiedOnDiskCache_validateIfNeeded( + StringRef Path, uint64_t BootTime, uint64_t ValidationTime, + bool CheckHash, bool AllowRecovery, bool Force, + std::optional LLVMCas, StringRef ValidationError, bool Skipped, + bool Recovered); void log_TempFile_create(StringRef Name); void log_TempFile_keep(StringRef TmpName, StringRef Name, std::error_code EC); void log_TempFile_remove(StringRef TmpName, std::error_code EC); diff --git a/llvm/include/llvm/CAS/UnifiedOnDiskCache.h b/llvm/include/llvm/CAS/UnifiedOnDiskCache.h index 0e658b4dd4090..4fa239710cecd 100644 --- a/llvm/include/llvm/CAS/UnifiedOnDiskCache.h +++ b/llvm/include/llvm/CAS/UnifiedOnDiskCache.h @@ -9,6 +9,7 @@ #ifndef LLVM_CAS_UNIFIEDONDISKCACHE_H #define LLVM_CAS_UNIFIEDONDISKCACHE_H +#include "llvm/CAS/BuiltinUnifiedCASDatabases.h" #include "llvm/CAS/OnDiskGraphDB.h" namespace llvm::cas::ondisk { @@ -82,6 +83,34 @@ class UnifiedOnDiskCache { OnDiskGraphDB::FaultInPolicy FaultInPolicy = OnDiskGraphDB::FaultInPolicy::FullTree); + /// Validate the data in \p Path, if needed to ensure correctness. + /// + /// Note: if invalid data is detected and \p AllowRecovery is true, then + /// recovery requires exclusive access to the CAS and it is an error to + /// attempt recovery if there is concurrent use of the CAS. + /// + /// \param Path directory for the on-disk database. + /// \param HashName Identifier name for the hashing algorithm that is going to + /// be used. + /// \param HashByteSize Size for the object digest hash bytes. + /// \param CheckHash Whether to validate hashes match the data. + /// \param AllowRecovery Whether to automatically recover from invalid data by + /// marking the files for garbage collection. + /// \param ForceValidation Whether to force validation to occur even if it + /// should not be necessary. + /// \param LLVMCasBinary If provided, validation is performed out-of-process + /// using the given \c llvm-cas executable which protects against crashes + /// during validation. Otherwise validation is performed in-process. + /// + /// \returns \c Valid if the data is already valid, \c RecoveredValid if data + /// was invalid but has been cleared, \c Skipped if validation is not needed, + /// or an \c Error if validation cannot be performed or if the data is left + /// in an invalid state because \p AllowRecovery is false. + static Expected + validateIfNeeded(StringRef Path, StringRef HashName, unsigned HashByteSize, + bool CheckHash, bool AllowRecovery, bool ForceValidation, + std::optional LLVMCasBinary); + /// This is called implicitly at destruction time, so it is not required for a /// client to call this. After calling \p close the only method that is valid /// to call is \p needsGarbaseCollection. diff --git a/llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp b/llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp index 87073cf2b4f23..40d898e4b7f56 100644 --- a/llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp +++ b/llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp @@ -23,3 +23,16 @@ cas::createOnDiskUnifiedCASDatabases(StringRef Path) { auto AC = builtin::createActionCacheFromUnifiedOnDiskCache(std::move(UniDB)); return std::make_pair(std::move(CAS), std::move(AC)); } + +Expected cas::validateOnDiskUnifiedCASDatabasesIfNeeded( + StringRef Path, bool CheckHash, bool AllowRecovery, bool ForceValidation, + std::optional LLVMCasBinary) { +#if LLVM_ENABLE_ONDISK_CAS + return ondisk::UnifiedOnDiskCache::validateIfNeeded( + Path, builtin::BuiltinCASContext::getHashName(), + sizeof(builtin::HashType), CheckHash, AllowRecovery, ForceValidation, + LLVMCasBinary); +#else + return createStringError(inconvertibleErrorCode(), "OnDiskCache is disabled"); +#endif +} \ No newline at end of file diff --git a/llvm/lib/CAS/OnDiskCASLogger.cpp b/llvm/lib/CAS/OnDiskCASLogger.cpp index e9a68a548edad..a39cbbd3e0b58 100644 --- a/llvm/lib/CAS/OnDiskCASLogger.cpp +++ b/llvm/lib/CAS/OnDiskCASLogger.cpp @@ -211,6 +211,25 @@ void OnDiskCASLogger::log_UnifiedOnDiskCache_collectGarbage(StringRef Path) { Log << "collect garbage '" << Path << "'"; } +void OnDiskCASLogger::log_UnifiedOnDiskCache_validateIfNeeded( + StringRef Path, uint64_t BootTime, uint64_t ValidationTime, bool CheckHash, + bool AllowRecovery, bool Force, std::optional LLVMCas, + StringRef ValidationError, bool Skipped, bool Recovered) { + TextLogLine Log(OS); + Log << "validate-if-needed '" << Path << "'"; + Log << " boot=" << BootTime << " last-valid=" << ValidationTime; + Log << " check-hash=" << CheckHash << " allow-recovery=" << AllowRecovery; + Log << " force=" << Force; + if (LLVMCas) + Log << " llvm-cas=" << *LLVMCas; + if (Skipped) + Log << " skipped"; + if (Recovered) + Log << " recovered"; + if (!ValidationError.empty()) + Log << " data was invalid " << ValidationError; +} + void OnDiskCASLogger::log_TempFile_create(StringRef Name) { TextLogLine Log(OS); Log << "standalone file create '" << Name << "'"; diff --git a/llvm/lib/CAS/UnifiedOnDiskCache.cpp b/llvm/lib/CAS/UnifiedOnDiskCache.cpp index 9f86e53ac84d7..c5e9bcccd1833 100644 --- a/llvm/lib/CAS/UnifiedOnDiskCache.cpp +++ b/llvm/lib/CAS/UnifiedOnDiskCache.cpp @@ -47,18 +47,48 @@ // without affecting any active readers/writers in the same process or other // processes. // +// The \c UnifiedOnDiskCache also provides validation and recovery on top of the +// underlying on-disk storage. The low-level storage is designed to remain +// coherent across regular process crashes, but may be invalid after power loss +// or similar system failures. \c UnifiedOnDiskCache::validateIfNeeded allows +// validating the contents once per boot and can recover by marking invalid +// data for garbage collection. +// +// The data recovery described above requires exclusive access to the CAS, and +// it is an error to attempt recovery if the CAS is open in any process/thread. +// In order to maximize backwards compatibility with tools that do not perform +// validation before opening the CAS, we do not attempt to get exclusive access +// until recovery is actually performed, meaning as long as the data is valid +// it will not conflict with concurrent use. +// //===----------------------------------------------------------------------===// #include "llvm/CAS/UnifiedOnDiskCache.h" +#include "BuiltinCAS.h" #include "OnDiskCommon.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/CAS/OnDiskCASLogger.h" +#include "llvm/CAS/OnDiskGraphDB.h" #include "llvm/CAS/OnDiskKeyValueDB.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/raw_ostream.h" +#include + +#if __has_include() +#include +#endif using namespace llvm; using namespace llvm::cas; @@ -69,6 +99,9 @@ using namespace llvm::cas::ondisk; /// the \p UnifiedOnDiskCache::collectGarbage function. static constexpr StringLiteral DBDirPrefix = "v1."; +static constexpr StringLiteral ValidationFilename = "v1.validation"; +static constexpr StringLiteral CorruptPrefix = "corrupt."; + Expected UnifiedOnDiskCache::KVPut(ObjectID Key, ObjectID Value) { return KVPut(PrimaryGraphDB->getDigest(Key), Value); } @@ -149,9 +182,10 @@ Error UnifiedOnDiskCache::validateActionCache() { } /// \returns all the 'v.' names of sub-directories, sorted with -/// ascending order of the integer after the dot. -static Error getAllDBDirs(StringRef Path, - SmallVectorImpl &DBDirs) { +/// ascending order of the integer after the dot. Corrupt directories, if +/// included, will come first. +static Error getAllDBDirs(StringRef Path, SmallVectorImpl &DBDirs, + bool IncludeCorrupt = false) { struct DBDir { uint64_t Order; std::string Name; @@ -164,6 +198,10 @@ static Error getAllDBDirs(StringRef Path, if (DirI->type() != sys::fs::file_type::directory_file) continue; StringRef SubDir = sys::path::filename(DirI->path()); + if (IncludeCorrupt && SubDir.starts_with(CorruptPrefix)) { + FoundDBDirs.push_back({0, std::string(SubDir)}); + continue; + } if (!SubDir.starts_with(DBDirPrefix)) continue; uint64_t Order; @@ -183,6 +221,23 @@ static Error getAllDBDirs(StringRef Path, return Error::success(); } +static Error getAllGarbageDirs(StringRef Path, + SmallVectorImpl &DBDirs) { + if (Error E = getAllDBDirs(Path, DBDirs, /*IncludeCorrupt=*/true)) + return E; + + // FIXME: When the version of \p DBDirPrefix is bumped up we need to figure + // out how to handle the leftover sub-directories of the previous version. + + for (unsigned Keep = 2; Keep > 0 && !DBDirs.empty(); --Keep) { + StringRef Back(DBDirs.back()); + if (Back.starts_with(CorruptPrefix)) + break; + DBDirs.pop_back(); + } + return Error::success(); +} + /// \returns Given a sub-directory named 'v.', it outputs the /// 'v.' name. static void getNextDBDirName(StringRef DBDir, llvm::raw_ostream &OS) { @@ -194,6 +249,230 @@ static void getNextDBDirName(StringRef DBDir, llvm::raw_ostream &OS) { OS << DBDirPrefix << Count + 1; } +static Error validateOutOfProcess(StringRef LLVMCasBinary, StringRef RootPath, + bool CheckHash) { + SmallVector Args{LLVMCasBinary, "-cas", RootPath, "-validate"}; + if (CheckHash) + Args.push_back("-check-hash"); + + llvm::SmallString<128> StdErrPath; + int StdErrFD = -1; + if (std::error_code EC = sys::fs::createTemporaryFile( + "llvm-cas-validate-stderr", "txt", StdErrFD, StdErrPath, + llvm::sys::fs::OF_Text)) + return createStringError(EC, "failed to create temporary file"); + FileRemover OutputRemover(StdErrPath.c_str()); + + std::optional Redirects[] = { + {""}, // stdin = /dev/null + {""}, // stdout = /dev/null + StdErrPath.str(), + }; + + std::string ErrMsg; + int Result = + sys::ExecuteAndWait(LLVMCasBinary, Args, /*Env=*/std::nullopt, Redirects, + /*SecondsToWait=*/120, /*MemoryLimit=*/0, &ErrMsg); + + if (Result == -1) + return createStringError("failed to exec " + join(Args, " ") + ": " + + ErrMsg); + if (Result != 0) { + llvm::SmallString<64> Err("cas contents invalid"); + if (!ErrMsg.empty()) { + Err += ": "; + Err += ErrMsg; + } + auto StdErrBuf = MemoryBuffer::getFile(StdErrPath.c_str()); + if (StdErrBuf && !(*StdErrBuf)->getBuffer().empty()) { + Err += ": "; + Err += (*StdErrBuf)->getBuffer(); + } + return createStringError(Err); + } + return Error::success(); +} + +static Error validateInProcess(StringRef RootPath, StringRef HashName, + unsigned HashByteSize, bool CheckHash) { + std::shared_ptr UniDB; + if (Error E = UnifiedOnDiskCache::open(RootPath, std::nullopt, HashName, + HashByteSize) + .moveInto(UniDB)) + return E; + auto CAS = builtin::createObjectStoreFromUnifiedOnDiskCache(UniDB); + if (Error E = CAS->validate(CheckHash)) + return E; + if (Error E = UniDB->validateActionCache()) + return E; + return Error::success(); +} + +static Expected getBootTime() { +#if __has_include() && defined(KERN_BOOTTIME) + struct timeval TV; + size_t TVLen = sizeof(TV); + int KernBoot[2] = {CTL_KERN, KERN_BOOTTIME}; + if (sysctl(KernBoot, 2, &TV, &TVLen, nullptr, 0) < 0) + return createStringError(llvm::errnoAsErrorCode(), + "failed to get boottime"); + if (TVLen != sizeof(TV)) + return createStringError("sysctl kern.boottime unexpected format"); + return TV.tv_sec; +#elif defined(__linux__) + // Use the mtime for /proc, which is recreated during system boot. + // We could also read /proc/stat and search for 'btime'. + sys::fs::file_status Status; + if (std::error_code EC = sys::fs::status("/proc", Status)) + return createFileError("/proc", EC); + return Status.getLastModificationTime().time_since_epoch().count(); +#else + llvm::report_fatal_error("unimplemented"); +#endif +} + +Expected +UnifiedOnDiskCache::validateIfNeeded(StringRef RootPath, StringRef HashName, + unsigned HashByteSize, bool CheckHash, + bool AllowRecovery, bool ForceValidation, + std::optional LLVMCasBinary) { + if (std::error_code EC = sys::fs::create_directories(RootPath)) + return createFileError(RootPath, EC); + + SmallString<256> PathBuf(RootPath); + sys::path::append(PathBuf, ValidationFilename); + int FD = -1; + if (std::error_code EC = sys::fs::openFileForReadWrite( + PathBuf, FD, sys::fs::CD_OpenAlways, sys::fs::OF_None)) + return createFileError(PathBuf, EC); + assert(FD != -1); + + sys::fs::file_t File = sys::fs::convertFDToNativeFile(FD); + auto CloseFile = make_scope_exit([&]() { sys::fs::closeFile(File); }); + + if (std::error_code EC = lockFileThreadSafe(FD, /*Exclusive=*/true)) + return createFileError(PathBuf, EC); + auto UnlockFD = make_scope_exit([&]() { unlockFileThreadSafe(FD); }); + + std::shared_ptr Logger; + if (Error E = + ondisk::OnDiskCASLogger::openIfEnabled(RootPath).moveInto(Logger)) + return std::move(E); + + SmallString<8> Bytes; + if (Error E = sys::fs::readNativeFileToEOF(File, Bytes)) + return createFileError(PathBuf, std::move(E)); + + uint64_t ValidationBootTime = 0; + if (!Bytes.empty() && + StringRef(Bytes).trim().getAsInteger(10, ValidationBootTime)) + return createFileError(PathBuf, errc::illegal_byte_sequence, + "expected integer"); + + static uint64_t BootTime = 0; + if (BootTime == 0) + if (Error E = getBootTime().moveInto(BootTime)) + return std::move(E); + + bool Recovered = false; + bool Skipped = false; + std::string LogValidationError; + + auto Log = llvm::make_scope_exit([&] { + if (!Logger) + return; + Logger->log_UnifiedOnDiskCache_validateIfNeeded( + RootPath, BootTime, ValidationBootTime, CheckHash, AllowRecovery, + ForceValidation, LLVMCasBinary, LogValidationError, Skipped, Recovered); + }); + + if (ValidationBootTime == BootTime && !ForceValidation) { + Skipped = true; + return ValidationResult::Skipped; + } + + // Validate! + bool NeedsRecovery = false; + Error E = + LLVMCasBinary + ? validateOutOfProcess(*LLVMCasBinary, RootPath, CheckHash) + : validateInProcess(RootPath, HashName, HashByteSize, CheckHash); + if (E) { + if (Logger) + LogValidationError = toStringWithoutConsuming(E); + if (AllowRecovery) { + consumeError(std::move(E)); + NeedsRecovery = true; + } else { + return std::move(E); + } + } + + if (NeedsRecovery) { + sys::path::remove_filename(PathBuf); + sys::path::append(PathBuf, "lock"); + + int LockFD = -1; + if (std::error_code EC = sys::fs::openFileForReadWrite( + PathBuf, LockFD, sys::fs::CD_OpenAlways, sys::fs::OF_None)) + return createFileError(PathBuf, EC); + sys::fs::file_t LockFile = sys::fs::convertFDToNativeFile(LockFD); + auto CloseLock = make_scope_exit([&]() { sys::fs::closeFile(LockFile); }); + if (std::error_code EC = tryLockFileThreadSafe(LockFD)) { + if (EC == std::errc::no_lock_available) + return createFileError( + PathBuf, EC, + "CAS validation requires exclusive access but CAS was in use"); + return createFileError(PathBuf, EC); + } + auto UnlockFD = make_scope_exit([&]() { unlockFileThreadSafe(LockFD); }); + + SmallVector DBDirs; + if (Error E = getAllDBDirs(RootPath, DBDirs)) + return std::move(E); + + for (StringRef DBDir : DBDirs) { + sys::path::remove_filename(PathBuf); + sys::path::append(PathBuf, DBDir); + std::error_code EC; + int Attempt = 0, MaxAttempts = 100; + SmallString<128> GCPath; + for (; Attempt < MaxAttempts; ++Attempt) { + GCPath.assign(RootPath); + sys::path::append(GCPath, CorruptPrefix + std::to_string(Attempt) + + "." + DBDir); + EC = sys::fs::rename(PathBuf, GCPath); + if (EC != errc::directory_not_empty) + break; + } + if (Attempt == MaxAttempts) + return createStringError( + EC, "rename " + PathBuf + + " failed: too many CAS directories awaiting pruning"); + if (EC) + return createStringError(EC, "rename " + PathBuf + " to " + GCPath + + " failed"); + } + Recovered = true; + } + + if (ValidationBootTime != BootTime) { + // Fix filename in case we have error to report. + sys::path::remove_filename(PathBuf); + sys::path::append(PathBuf, ValidationFilename); + if (std::error_code EC = sys::fs::resize_file(FD, 0)) + return createFileError(PathBuf, EC); + raw_fd_ostream OS(FD, /*shouldClose=*/false); + OS.seek(0); // resize does not reset position + OS << BootTime << '\n'; + if (OS.has_error()) + return createFileError(PathBuf, OS.error()); + } + + return NeedsRecovery ? ValidationResult::RecoveredValid + : ValidationResult::Valid; +} + Expected> UnifiedOnDiskCache::open(StringRef RootPath, std::optional SizeLimit, StringRef HashName, unsigned HashByteSize, @@ -384,16 +663,11 @@ UnifiedOnDiskCache::~UnifiedOnDiskCache() { consumeError(close()); } Error UnifiedOnDiskCache::collectGarbage(StringRef Path, ondisk::OnDiskCASLogger *Logger) { SmallVector DBDirs; - if (Error E = getAllDBDirs(Path, DBDirs)) + if (Error E = getAllGarbageDirs(Path, DBDirs)) return E; - if (DBDirs.size() <= 2) - return Error::success(); // no unused directories. - - // FIXME: When the version of \p DBDirPrefix is bumped up we need to figure - // out how to handle the leftover sub-directories of the previous version. SmallString<256> PathBuf(Path); - for (StringRef UnusedSubDir : ArrayRef(DBDirs).drop_back(2)) { + for (StringRef UnusedSubDir : DBDirs) { sys::path::append(PathBuf, UnusedSubDir); if (Logger) Logger->log_UnifiedOnDiskCache_collectGarbage(PathBuf); diff --git a/llvm/test/CAS/logging.test b/llvm/test/CAS/logging.test index 2c2d72fbd4035..98dd6cd9ff001 100644 --- a/llvm/test/CAS/logging.test +++ b/llvm/test/CAS/logging.test @@ -1,8 +1,11 @@ RUN: rm -rf %t RUN: split-file %s %t RUN: env LLVM_CAS_LOG=2 llvm-cas --cas %t/cas --ingest %t/input +RUN: env LLVM_CAS_LOG=2 llvm-cas --cas %t/cas --validate-if-needed -check-hash +RUN: env LLVM_CAS_LOG=2 llvm-cas --cas %t/cas --validate-if-needed -force -allow-recovery RUN: FileCheck %s --input-file %t/cas/v1.log + // CHECK: resize mapped file '{{.*}}v8.index' // CHECK: mmap '{{.*}}v8.index' [[INDEX:0x[0-9a-f]+]] // CHECK: resize mapped file '{{.*}}v8.data' @@ -22,6 +25,9 @@ RUN: FileCheck %s --input-file %t/cas/v1.log // CHECK: resize mapped file '{{.*}}v8.index' // CHECK: close mmap '{{.*}}v8.index' +// CHECK: validate-if-needed '{{.*}}cas' boot=[[BOOT:[0-9]+]] last-valid=0 check-hash=1 allow-recovery=0 force=0 llvm-cas={{.*}}llvm-cas +// CHECK: validate-if-needed '{{.*}}cas' boot=[[BOOT]] last-valid=[[BOOT]] check-hash=0 allow-recovery=1 force=1 llvm-cas={{.*}}llvm-cas + //--- input/a Input 1 diff --git a/llvm/test/CAS/validate-if-needed.test b/llvm/test/CAS/validate-if-needed.test new file mode 100644 index 0000000000000..c2d359c153849 --- /dev/null +++ b/llvm/test/CAS/validate-if-needed.test @@ -0,0 +1,43 @@ +RUN: rm -rf %t && mkdir %t +RUN: llvm-cas --cas %t/cas --ingest %S/Inputs > %t/cas.id +RUN: mv %t/cas/v1.1/v8.data %t/cas/v1.1/v8.data.bak + +# INVALID: bad record +# VALID: validated successfully +# SKIPPED: validation skipped +# RECOVERED: recovered from invalid data + +# Validation failures are not saved. +RUN: not llvm-cas --cas %t/cas --validate-if-needed 2>&1 | FileCheck %s -check-prefix=INVALID +RUN: not llvm-cas --cas %t/cas --validate-if-needed 2>&1 | FileCheck %s -check-prefix=INVALID + +# Validation happens once per boot. +RUN: mv %t/cas/v1.1/v8.data.bak %t/cas/v1.1/v8.data +RUN: llvm-cas --cas %t/cas --validate-if-needed | FileCheck %s -check-prefix=VALID +RUN: llvm-cas --cas %t/cas --validate-if-needed | FileCheck %s -check-prefix=SKIPPED +# Wrong timestamp triggers re-validation. +RUN: echo '123' > %t/cas/v1.validation +RUN: llvm-cas --cas %t/cas --validate-if-needed | FileCheck %s -check-prefix=VALID +RUN: llvm-cas --cas %t/cas --validate-if-needed | FileCheck %s -check-prefix=SKIPPED +# Skipped validation does not catch errors. +RUN: mv %t/cas/v1.1/v8.data %t/cas/v1.1/v8.data.bak +RUN: llvm-cas --cas %t/cas --validate-if-needed | FileCheck %s -check-prefix=SKIPPED + +# Unless forced. +RUN: not llvm-cas --cas %t/cas --validate-if-needed --force 2>&1 | FileCheck %s -check-prefix=INVALID + +# Recovering from invalid data. +RUN: llvm-cas --cas %t/cas --validate-if-needed --allow-recovery --force | FileCheck %s -check-prefix=RECOVERED +RUN: ls %t/cas/corrupt.0.v1.1 +RUN: llvm-cas --cas %t/cas --validate-if-needed --allow-recovery | FileCheck %s -check-prefix=SKIPPED +RUN: llvm-cas --cas %t/cas --validate-if-needed --force | FileCheck %s -check-prefix=VALID +RUN: rm -rf %t/cas/v1.1 +RUN: cp -r %t/cas/corrupt.0.v1.1 %t/cas/v1.1 +RUN: mv %t/cas/v1.1/v8.data %t/cas/v1.1/v8.data.bak +RUN: llvm-cas --cas %t/cas --validate-if-needed --allow-recovery --force | FileCheck %s -check-prefix=RECOVERED +RUN: ls %t/cas/corrupt.1.v1.1 + +# Corrupt data is pruned. +RUN: llvm-cas --cas %t/cas --prune +RUN: not ls %t/cas/corrupt.0.v1.1 +RUN: not ls %t/cas/corrupt.1.v1.1 diff --git a/llvm/tools/llvm-cas/llvm-cas.cpp b/llvm/tools/llvm-cas/llvm-cas.cpp index 64245eb1342f5..a633b9361c09e 100644 --- a/llvm/tools/llvm-cas/llvm-cas.cpp +++ b/llvm/tools/llvm-cas/llvm-cas.cpp @@ -13,6 +13,7 @@ #include "llvm/CAS/HierarchicalTreeBuilder.h" #include "llvm/CAS/ObjectStore.h" #include "llvm/CAS/TreeSchema.h" +#include "llvm/CAS/UnifiedOnDiskCache.h" #include "llvm/CAS/Utils.h" #include "llvm/RemoteCachingService/RemoteCachingService.h" #include "llvm/Support/CommandLine.h" @@ -65,8 +66,13 @@ static int putCacheKey(ObjectStore &CAS, ActionCache &AC, static int getCacheResult(ObjectStore &CAS, ActionCache &AC, const CASID &ID); static int validateObject(ObjectStore &CAS, const CASID &ID); static int validate(ObjectStore &CAS, ActionCache &AC, bool CheckHash); +static int validateIfNeeded(StringRef Path, StringRef PluginPath, + ArrayRef PluginOpts, bool CheckHash, + bool Force, bool AllowRecovery, bool InProcess, + const char *Argv0); static int ingestCasIDFile(cas::ObjectStore &CAS, ArrayRef CASIDs); static int checkLockFiles(StringRef CASPath); +static int prune(cas::ObjectStore &CAS); int main(int Argc, char **Argv) { InitLLVM X(Argc, Argv); @@ -86,6 +92,11 @@ int main(int Argc, char **Argv) { cl::value_desc("path")); cl::opt CheckHash("check-hash", cl::desc("check all hashes during validation")); + cl::opt AllowRecovery("allow-recovery", + cl::desc("allow recovery of cas data")); + cl::opt Force("force", + cl::desc("force validation even if unnecessary")); + cl::opt InProcess("in-process", cl::desc("validate in-process")); enum CommandKind { Invalid, @@ -109,6 +120,8 @@ int main(int Argc, char **Argv) { CheckLockFiles, Validate, ValidateObject, + ValidateIfNeeded, + Prune, }; cl::opt Command( cl::desc("choose command action:"), @@ -137,7 +150,10 @@ int main(int Argc, char **Argv) { "Test file locking behaviour of on-disk CAS"), clEnumValN(Validate, "validate", "validate ObjectStore"), clEnumValN(ValidateObject, "validate-object", - "validate the object for CASID")), + "validate the object for CASID"), + clEnumValN(ValidateIfNeeded, "validate-if-needed", + "validate cas contents if needed"), + clEnumValN(Prune, "prune", "prune local cas storage")), cl::init(CommandKind::Invalid)); cl::ParseCommandLineOptions(Argc, Argv, "llvm-cas CAS tool\n"); @@ -155,6 +171,10 @@ int main(int Argc, char **Argv) { if (Command == CheckLockFiles) return checkLockFiles(CASPath); + if (Command == ValidateIfNeeded) + return validateIfNeeded(CASPath, CASPluginPath, CASPluginOpts, CheckHash, + Force, AllowRecovery, InProcess, Argv[0]); + std::shared_ptr CAS; std::shared_ptr AC; std::optional CASFilePath; @@ -210,6 +230,9 @@ int main(int Argc, char **Argv) { if (Command == MergeTrees) return mergeTrees(*CAS, Inputs); + if (Command == Prune) + return prune(*CAS); + if (Inputs.empty()) ExitOnErr(createStringError(inconvertibleErrorCode(), "missing to operate on")); @@ -729,3 +752,42 @@ int validate(ObjectStore &CAS, ActionCache &AC, bool CheckHash) { outs() << "validated successfully\n"; return 0; } + +int validateIfNeeded(StringRef Path, StringRef PluginPath, + ArrayRef PluginOpts, bool CheckHash, + bool Force, bool AllowRecovery, bool InProcess, + const char *Argv0) { + ExitOnError ExitOnErr("llvm-cas: validate-if-needed: "); + std::string ExecStorage; + std::optional Exec; + if (!InProcess) { + ExecStorage = sys::fs::getMainExecutable(Argv0, (void *)validateIfNeeded); + Exec = ExecStorage; + } + ValidationResult Result; + if (PluginPath.empty()) { + Result = ExitOnErr(validateOnDiskUnifiedCASDatabasesIfNeeded( + Path, CheckHash, AllowRecovery, Force, Exec)); + } else { + // FIXME: add a hook for plugin validation + Result = ValidationResult::Skipped; + } + switch (Result) { + case ValidationResult::Valid: + outs() << "validated successfully\n"; + break; + case ValidationResult::RecoveredValid: + outs() << "recovered from invalid data\n"; + break; + case ValidationResult::Skipped: + outs() << "validation skipped\n"; + break; + } + return 0; +} + +static int prune(cas::ObjectStore &CAS) { + ExitOnError ExitOnErr("llvm-cas: prune: "); + ExitOnErr(CAS.pruneStorageData()); + return 0; +} \ No newline at end of file From 46e43bbaab19d40b8f203bad8a7d30ed5e1364e7 Mon Sep 17 00:00:00 2001 From: Ben Langmuir Date: Wed, 26 Mar 2025 14:05:54 -0700 Subject: [PATCH 5/8] [clang][cas] Pass through LLVM_CAS_LOG env var to the depscan daemon Ensure that the scanning daemon modifications to the CAS are captured properly by the log. (cherry picked from commit f6a775f6c7b23f606654b1b07928ee36ca64d399) --- clang/test/CAS/depscan-cas-log.c | 18 ++++++++++++++++++ clang/tools/driver/cc1depscanProtocol.cpp | 12 +++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 clang/test/CAS/depscan-cas-log.c diff --git a/clang/test/CAS/depscan-cas-log.c b/clang/test/CAS/depscan-cas-log.c new file mode 100644 index 0000000000000..85111a5a9efc1 --- /dev/null +++ b/clang/test/CAS/depscan-cas-log.c @@ -0,0 +1,18 @@ +// Ensure both the first clang process and the daemon have logging enabled. +// It's hard to check this exhaustively, but in practice if the daemon does not +// enable logging there are currently zero records in the log. + +// RUN: rm -rf %t && mkdir %t +// RUN: env LLVM_CACHE_CAS_PATH=%t/cas LLVM_CAS_LOG=1 %clang \ +// RUN: -cc1depscan -fdepscan=daemon -fdepscan-include-tree -o - \ +// RUN: -cc1-args -cc1 -triple x86_64-apple-macosx11.0.0 -emit-obj %s -o %t/t.o -fcas-path %t/cas +// RUN: FileCheck %s --input-file %t/cas/v1.log + +// CHECK: [[PID1:[0-9]*]] {{[0-9]*}}: mmap '{{.*}}v8.index' +// CHECK: [[PID1]] {{[0-9]*}}: create subtrie + +// CHECK: [[PID2:[0-9]*]] {{[0-9]*}}: mmap '{{.*}}v8.index' +// Even a minimal compilation involves at least 9 records for the cache key. +// CHECK-COUNT-9: [[PID2]] {{[0-9]*}}: create record + +// CHECK: [[PID1]] {{[0-9]*}}: close mmap '{{.*}}v8.index' diff --git a/clang/tools/driver/cc1depscanProtocol.cpp b/clang/tools/driver/cc1depscanProtocol.cpp index 438bb79384ba2..b29ef162a3ee6 100644 --- a/clang/tools/driver/cc1depscanProtocol.cpp +++ b/clang/tools/driver/cc1depscanProtocol.cpp @@ -17,6 +17,7 @@ #include "llvm/Support/Process.h" #include "llvm/Support/Signals.h" #include "llvm/Support/StringSaver.h" +#include #if LLVM_ON_UNIX #include // FIXME: Unix-only. Not portable. @@ -186,10 +187,19 @@ Expected ScanDaemon::launchDaemon(StringRef BasePath, return llvm::errorCodeToError(std::error_code(EC, std::generic_category())); #endif + static constexpr const char *PassThroughEnv[] = { + "LLVM_CAS_LOG", + }; + SmallVector EnvP; + for (const char *Name : PassThroughEnv) + if (const char *Value = getenv(Name)) + EnvP.push_back(Saver.save(llvm::Twine(Name) + "=" + Value).data()); + EnvP.push_back(nullptr); + ::pid_t Pid; int EC = ::posix_spawn(&Pid, Args[0], /*file_actions=*/nullptr, &Attrs, const_cast(LaunchArgs.data()), - /*envp=*/nullptr); + const_cast(EnvP.data())); if (EC) return llvm::errorCodeToError(std::error_code(EC, std::generic_category())); From 87b9f401f130722fd38d69412273914d9f139fe7 Mon Sep 17 00:00:00 2001 From: Ben Langmuir Date: Tue, 29 Apr 2025 10:43:49 -0700 Subject: [PATCH 6/8] [clang][cas] Adopt validate-if-needed in clang scanning daemon Use the new validate-if-needed functionality to ensure the clang scanning daemon's CAS data is valid. (cherry picked from commit 1101d73128f7d87a899ea01adcbc838997997f19) --- clang/include/clang/CAS/CASOptions.h | 2 + clang/lib/CAS/CASOptions.cpp | 9 +++ clang/test/CAS/depscan-cas-log.c | 2 +- clang/test/CAS/validate-once.c | 18 +++++ clang/tools/driver/cc1depscanProtocol.cpp | 3 +- clang/tools/driver/cc1depscan_main.cpp | 80 +++++++++++++++++------ 6 files changed, 93 insertions(+), 21 deletions(-) create mode 100644 clang/test/CAS/validate-once.c diff --git a/clang/include/clang/CAS/CASOptions.h b/clang/include/clang/CAS/CASOptions.h index 03fb00a7fbff0..a40e158d4a914 100644 --- a/clang/include/clang/CAS/CASOptions.h +++ b/clang/include/clang/CAS/CASOptions.h @@ -117,6 +117,8 @@ class CASOptions : public CASConfiguration { /// default on-disk CAS, otherwise this is a noop. void ensurePersistentCAS(); + std::string getResolvedCASPath() const; + private: /// Initialize Cached CAS and ActionCache. llvm::Error initCache() const; diff --git a/clang/lib/CAS/CASOptions.cpp b/clang/lib/CAS/CASOptions.cpp index a2d30095424ac..bee4f8be41855 100644 --- a/clang/lib/CAS/CASOptions.cpp +++ b/clang/lib/CAS/CASOptions.cpp @@ -119,3 +119,12 @@ llvm::Error CASOptions::initCache() const { std::tie(Cache.CAS, Cache.AC) = std::move(DBs); return llvm::Error::success(); } + +std::string CASOptions::getResolvedCASPath() const { + if (CASPath != "auto") + return CASPath; + + SmallString<256> PathBuf; + getDefaultOnDiskCASPath(PathBuf); + return std::string(PathBuf); +} diff --git a/clang/test/CAS/depscan-cas-log.c b/clang/test/CAS/depscan-cas-log.c index 85111a5a9efc1..6d85d7a2c5e18 100644 --- a/clang/test/CAS/depscan-cas-log.c +++ b/clang/test/CAS/depscan-cas-log.c @@ -3,7 +3,7 @@ // enable logging there are currently zero records in the log. // RUN: rm -rf %t && mkdir %t -// RUN: env LLVM_CACHE_CAS_PATH=%t/cas LLVM_CAS_LOG=1 %clang \ +// RUN: env LLVM_CACHE_CAS_PATH=%t/cas LLVM_CAS_LOG=1 LLVM_CAS_DISABLE_VALIDATION=1 %clang \ // RUN: -cc1depscan -fdepscan=daemon -fdepscan-include-tree -o - \ // RUN: -cc1-args -cc1 -triple x86_64-apple-macosx11.0.0 -emit-obj %s -o %t/t.o -fcas-path %t/cas // RUN: FileCheck %s --input-file %t/cas/v1.log diff --git a/clang/test/CAS/validate-once.c b/clang/test/CAS/validate-once.c new file mode 100644 index 0000000000000..774727fc64581 --- /dev/null +++ b/clang/test/CAS/validate-once.c @@ -0,0 +1,18 @@ +// RUN: rm -rf %t + +// RUN: llvm-cas --cas %t/cas --ingest %s +// RUN: mv %t/cas/v1.1/v8.data %t/cas/v1.1/v8.data.bak + +// RUN: %clang -cc1depscand -execute %{clang-daemon-dir}/%basename_t -cas-args -fcas-path %t/cas -- \ +// RUN: %clang -target x86_64-apple-macos11 -I %S/Inputs \ +// RUN: -Xclang -fcas-path -Xclang %t/cas \ +// RUN: -fdepscan=daemon -fdepscan-daemon=%{clang-daemon-dir}/%basename_t -fsyntax-only -x c %s + +// RUN: ls %t/cas/corrupt.0.v1.1 + +// RUN: llvm-cas --cas %t/cas --validate-if-needed | FileCheck %s -check-prefix=SKIPPED +// SKIPPED: validation skipped + +#include "test.h" + +int func(void); diff --git a/clang/tools/driver/cc1depscanProtocol.cpp b/clang/tools/driver/cc1depscanProtocol.cpp index b29ef162a3ee6..83a1556b1abae 100644 --- a/clang/tools/driver/cc1depscanProtocol.cpp +++ b/clang/tools/driver/cc1depscanProtocol.cpp @@ -188,7 +188,8 @@ Expected ScanDaemon::launchDaemon(StringRef BasePath, #endif static constexpr const char *PassThroughEnv[] = { - "LLVM_CAS_LOG", + "LLVM_CAS_LOG", + "LLVM_CAS_DISABLE_VALIDATION", }; SmallVector EnvP; for (const char *Name : PassThroughEnv) diff --git a/clang/tools/driver/cc1depscan_main.cpp b/clang/tools/driver/cc1depscan_main.cpp index c26aff93cee77..66f4ae2d1109d 100644 --- a/clang/tools/driver/cc1depscan_main.cpp +++ b/clang/tools/driver/cc1depscan_main.cpp @@ -28,9 +28,11 @@ #include "clang/Tooling/DependencyScanning/ScanAndUpdateArgs.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Bitstream/BitstreamReader.h" #include "llvm/CAS/ActionCache.h" +#include "llvm/CAS/BuiltinUnifiedCASDatabases.h" #include "llvm/CAS/CASProvidingFileSystem.h" #include "llvm/CAS/CachingOnDiskFileSystem.h" #include "llvm/CAS/HierarchicalTreeBuilder.h" @@ -41,6 +43,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Path.h" #include "llvm/Support/PrefixMapper.h" @@ -52,6 +55,7 @@ #include "llvm/Support/raw_ostream.h" #include #include +#include #include #if LLVM_ON_UNIX @@ -631,8 +635,8 @@ namespace { struct ScanServer { const char *Argv0 = nullptr; SmallString<128> BasePath; - /// List of cas options. - ArrayRef CASArgs; + CASOptions CASOpts; + bool ProduceIncludeTree = true; int PidFD = -1; int ListenSocket = -1; /// \p std::nullopt means it runs indefinitely. @@ -641,7 +645,7 @@ struct ScanServer { ~ScanServer() { shutdown(); } - void start(bool Exclusive); + void start(bool Exclusive, ArrayRef CASArgs); int listen(); /// Tear down the socket and bind file immediately but wait till all existing @@ -706,13 +710,13 @@ int cc1depscand_main(ArrayRef Argv, const char *Argv0, // particular "build session", to shutdown, then have it stay alive until the // session is finished. bool LongRunning = false; - + ArrayRef CASArgs; for (const auto *A = Argv.begin() + 2; A != Argv.end(); ++A) { StringRef Arg(*A); if (Arg == "-long-running") LongRunning = true; else if (Arg == "-cas-args") { - Server.CASArgs = ArrayRef(A + 1, Argv.end()); + CASArgs = ArrayRef(A + 1, Argv.end()); break; } } @@ -723,7 +727,7 @@ int cc1depscand_main(ArrayRef Argv, const char *Argv0, reportError(Twine("cannot create basedir: ") + EC.message()); if (Command == "-serve") { - Server.start(/*Exclusive*/ true); + Server.start(/*Exclusive*/ true, CASArgs); return Server.listen(); } else if (Command == "-execute") { @@ -734,7 +738,7 @@ int cc1depscand_main(ArrayRef Argv, const char *Argv0, } // Make sure to start the server before executing the command. - Server.start(/*Exclusive*/ true); + Server.start(/*Exclusive*/ true, CASArgs); std::thread ServerThread([&Server]() { Server.listen(); }); setenv("CLANG_CACHE_SCAN_DAEMON_SOCKET_PATH", Server.BasePath.c_str(), @@ -785,11 +789,59 @@ int cc1depscand_main(ArrayRef Argv, const char *Argv0, openAndReplaceFD(1, LogOutPath); openAndReplaceFD(2, LogErrPath); - Server.start(/*Exclusive*/ false); + Server.start(/*Exclusive*/ false, CASArgs); return Server.listen(); } -void ScanServer::start(bool Exclusive) { +static std::optional +findLLVMCasBinary(const char *Argv0, llvm::SmallVectorImpl &Storage) { + using namespace llvm::sys; + std::string Path = fs::getMainExecutable(Argv0, (void *)cc1depscan_main); + Storage.assign(Path.begin(), Path.end()); + path::remove_filename(Storage); + path::append(Storage, "llvm-cas"); + StringRef PathStr(Storage.data(), Storage.size()); + if (fs::exists(PathStr)) + return PathStr; + // Look for a corresponding usr/local/bin/llvm-cas + PathStr = path::parent_path(PathStr); + if (path::filename(PathStr) != "bin") + return std::nullopt; + PathStr = path::parent_path(PathStr); + Storage.truncate(PathStr.size()); + path::append(Storage, "local", "bin", "llvm-cas"); + PathStr = StringRef{Storage.data(), Storage.size()}; + if (fs::exists(PathStr)) + return PathStr; + return std::nullopt; +} + +void ScanServer::start(bool Exclusive, ArrayRef CASArgs) { + // Parse CAS options and validate if needed. + DiagnosticsEngine Diags(new DiagnosticIDs(), new DiagnosticOptions()); + + const OptTable &Opts = clang::driver::getDriverOptTable(); + unsigned MissingArgIndex, MissingArgCount; + auto ParsedCASArgs = + Opts.ParseArgs(CASArgs, MissingArgIndex, MissingArgCount); + CompilerInvocation::ParseCASArgs(CASOpts, ParsedCASArgs, Diags); + CASOpts.ensurePersistentCAS(); + ProduceIncludeTree = + ParsedCASArgs.hasArg(driver::options::OPT_fdepscan_include_tree); + + static std::once_flag ValidateOnce; + std::call_once(ValidateOnce, [&] { + if (getenv("LLVM_CAS_DISABLE_VALIDATION")) + return; + if (CASOpts.CASPath.empty() || !CASOpts.PluginPath.empty()) + return; + SmallString<64> LLVMCasStorage; + ExitOnErr(llvm::cas::validateOnDiskUnifiedCASDatabasesIfNeeded( + CASOpts.getResolvedCASPath(), /*CheckHash=*/true, + /*AllowRecovery=*/true, + /*Force=*/false, findLLVMCasBinary(Argv0, LLVMCasStorage))); + }); + // Check the pidfile. SmallString<128> PidPath; (BasePath + ".pid").toVector(PidPath); @@ -828,16 +880,6 @@ int ScanServer::listen() { llvm::DefaultThreadPool Pool; DiagnosticsEngine Diags(new DiagnosticIDs(), new DiagnosticOptions()); - CASOptions CASOpts; - const OptTable &Opts = clang::driver::getDriverOptTable(); - unsigned MissingArgIndex, MissingArgCount; - auto ParsedCASArgs = - Opts.ParseArgs(CASArgs, MissingArgIndex, MissingArgCount); - CompilerInvocation::ParseCASArgs(CASOpts, ParsedCASArgs, Diags); - CASOpts.ensurePersistentCAS(); - bool ProduceIncludeTree = - ParsedCASArgs.hasArg(driver::options::OPT_fdepscan_include_tree); - std::shared_ptr CAS; std::shared_ptr Cache; std::tie(CAS, Cache) = CASOpts.getOrCreateDatabases(Diags); From 71f019793481d6fbcb6c7dada1b36be4bd9bb69b Mon Sep 17 00:00:00 2001 From: Ben Langmuir Date: Wed, 30 Apr 2025 10:05:13 -0700 Subject: [PATCH 7/8] Updates for review (cherry picked from commit cb7bc8c28c83d0887dc50fef4748dda2bf61c07b) --- clang/include/clang/CAS/CASOptions.h | 2 +- clang/lib/CAS/CASOptions.cpp | 14 +++++++------- clang/tools/driver/cc1depscan_main.cpp | 4 +++- llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h | 4 ++-- llvm/include/llvm/CAS/UnifiedOnDiskCache.h | 2 +- llvm/lib/CAS/UnifiedOnDiskCache.cpp | 2 +- llvm/tools/llvm-cas/llvm-cas.cpp | 2 +- 7 files changed, 16 insertions(+), 14 deletions(-) diff --git a/clang/include/clang/CAS/CASOptions.h b/clang/include/clang/CAS/CASOptions.h index a40e158d4a914..63f5a1f6d36c6 100644 --- a/clang/include/clang/CAS/CASOptions.h +++ b/clang/include/clang/CAS/CASOptions.h @@ -117,7 +117,7 @@ class CASOptions : public CASConfiguration { /// default on-disk CAS, otherwise this is a noop. void ensurePersistentCAS(); - std::string getResolvedCASPath() const; + void getResolvedCASPath(llvm::SmallVectorImpl &Result) const; private: /// Initialize Cached CAS and ActionCache. diff --git a/clang/lib/CAS/CASOptions.cpp b/clang/lib/CAS/CASOptions.cpp index bee4f8be41855..e9d940d5cf2be 100644 --- a/clang/lib/CAS/CASOptions.cpp +++ b/clang/lib/CAS/CASOptions.cpp @@ -108,6 +108,7 @@ llvm::Error CASOptions::initCache() const { } SmallString<256> PathBuf; + getResolvedCASPath(PathBuf); if (CASPath == "auto") { getDefaultOnDiskCASPath(PathBuf); CASPath = PathBuf; @@ -120,11 +121,10 @@ llvm::Error CASOptions::initCache() const { return llvm::Error::success(); } -std::string CASOptions::getResolvedCASPath() const { - if (CASPath != "auto") - return CASPath; - - SmallString<256> PathBuf; - getDefaultOnDiskCASPath(PathBuf); - return std::string(PathBuf); +void CASOptions::getResolvedCASPath(SmallVectorImpl &Result) const { + if (CASPath == "auto") { + getDefaultOnDiskCASPath(Result); + } else { + Result.assign(CASPath.begin(), CASPath.end()); + } } diff --git a/clang/tools/driver/cc1depscan_main.cpp b/clang/tools/driver/cc1depscan_main.cpp index 66f4ae2d1109d..697704c54cd4f 100644 --- a/clang/tools/driver/cc1depscan_main.cpp +++ b/clang/tools/driver/cc1depscan_main.cpp @@ -836,8 +836,10 @@ void ScanServer::start(bool Exclusive, ArrayRef CASArgs) { if (CASOpts.CASPath.empty() || !CASOpts.PluginPath.empty()) return; SmallString<64> LLVMCasStorage; + SmallString<64> CASPath; + CASOpts.getResolvedCASPath(CASPath); ExitOnErr(llvm::cas::validateOnDiskUnifiedCASDatabasesIfNeeded( - CASOpts.getResolvedCASPath(), /*CheckHash=*/true, + CASPath, /*CheckHash=*/true, /*AllowRecovery=*/true, /*Force=*/false, findLLVMCasBinary(Argv0, LLVMCasStorage))); }); diff --git a/llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h b/llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h index c374a3aaec04f..bd0a0b65f57f8 100644 --- a/llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h +++ b/llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h @@ -29,7 +29,7 @@ enum class ValidationResult { /// The data is already valid. Valid, /// The data was invalid, but was recovered. - RecoveredValid, + Recovered, /// Validation was skipped, as it was not needed. Skipped, }; @@ -46,7 +46,7 @@ enum class ValidationResult { /// using the given \c llvm-cas executable which protects against crashes /// during validation. Otherwise validation is performed in-process. /// -/// \returns \c Valid if the data is already valid, \c RecoveredValid if data +/// \returns \c Valid if the data is already valid, \c Recovered if data /// was invalid but has been cleared, \c Skipped if validation is not needed, /// or an \c Error if validation cannot be performed or if the data is left /// in an invalid state because \p AllowRecovery is false. diff --git a/llvm/include/llvm/CAS/UnifiedOnDiskCache.h b/llvm/include/llvm/CAS/UnifiedOnDiskCache.h index 4fa239710cecd..90e8d36df5d52 100644 --- a/llvm/include/llvm/CAS/UnifiedOnDiskCache.h +++ b/llvm/include/llvm/CAS/UnifiedOnDiskCache.h @@ -102,7 +102,7 @@ class UnifiedOnDiskCache { /// using the given \c llvm-cas executable which protects against crashes /// during validation. Otherwise validation is performed in-process. /// - /// \returns \c Valid if the data is already valid, \c RecoveredValid if data + /// \returns \c Valid if the data is already valid, \c Recovered if data /// was invalid but has been cleared, \c Skipped if validation is not needed, /// or an \c Error if validation cannot be performed or if the data is left /// in an invalid state because \p AllowRecovery is false. diff --git a/llvm/lib/CAS/UnifiedOnDiskCache.cpp b/llvm/lib/CAS/UnifiedOnDiskCache.cpp index c5e9bcccd1833..03b172d72bc3d 100644 --- a/llvm/lib/CAS/UnifiedOnDiskCache.cpp +++ b/llvm/lib/CAS/UnifiedOnDiskCache.cpp @@ -469,7 +469,7 @@ UnifiedOnDiskCache::validateIfNeeded(StringRef RootPath, StringRef HashName, return createFileError(PathBuf, OS.error()); } - return NeedsRecovery ? ValidationResult::RecoveredValid + return NeedsRecovery ? ValidationResult::Recovered : ValidationResult::Valid; } diff --git a/llvm/tools/llvm-cas/llvm-cas.cpp b/llvm/tools/llvm-cas/llvm-cas.cpp index a633b9361c09e..40ccf5c1ec57a 100644 --- a/llvm/tools/llvm-cas/llvm-cas.cpp +++ b/llvm/tools/llvm-cas/llvm-cas.cpp @@ -776,7 +776,7 @@ int validateIfNeeded(StringRef Path, StringRef PluginPath, case ValidationResult::Valid: outs() << "validated successfully\n"; break; - case ValidationResult::RecoveredValid: + case ValidationResult::Recovered: outs() << "recovered from invalid data\n"; break; case ValidationResult::Skipped: From fe19246aada95dff6388ee3571678141ded0c9eb Mon Sep 17 00:00:00 2001 From: Ben Langmuir Date: Wed, 30 Apr 2025 10:20:56 -0700 Subject: [PATCH 8/8] Handle EEXIST for Linux rename failure Documentation says it can either be ENOTEMPTY (like Darwin) or EEXIST. Also print the error. (cherry picked from commit bb52d9696ebbdbdf1c5a43ed9ea6c3f5130ec157) --- llvm/lib/CAS/UnifiedOnDiskCache.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CAS/UnifiedOnDiskCache.cpp b/llvm/lib/CAS/UnifiedOnDiskCache.cpp index 03b172d72bc3d..b7491228784ea 100644 --- a/llvm/lib/CAS/UnifiedOnDiskCache.cpp +++ b/llvm/lib/CAS/UnifiedOnDiskCache.cpp @@ -442,7 +442,8 @@ UnifiedOnDiskCache::validateIfNeeded(StringRef RootPath, StringRef HashName, sys::path::append(GCPath, CorruptPrefix + std::to_string(Attempt) + "." + DBDir); EC = sys::fs::rename(PathBuf, GCPath); - if (EC != errc::directory_not_empty) + // Darwin uses ENOTEMPTY. Linux may return either ENOTEMPTY or EEXIST. + if (EC != errc::directory_not_empty && EC != errc::file_exists) break; } if (Attempt == MaxAttempts) @@ -451,7 +452,7 @@ UnifiedOnDiskCache::validateIfNeeded(StringRef RootPath, StringRef HashName, " failed: too many CAS directories awaiting pruning"); if (EC) return createStringError(EC, "rename " + PathBuf + " to " + GCPath + - " failed"); + " failed: " + EC.message()); } Recovered = true; }