diff --git a/clang/include/clang/CAS/CASOptions.h b/clang/include/clang/CAS/CASOptions.h index 03fb00a7fbff0..63f5a1f6d36c6 100644 --- a/clang/include/clang/CAS/CASOptions.h +++ b/clang/include/clang/CAS/CASOptions.h @@ -117,6 +117,8 @@ class CASOptions : public CASConfiguration { /// default on-disk CAS, otherwise this is a noop. void ensurePersistentCAS(); + void getResolvedCASPath(llvm::SmallVectorImpl &Result) const; + private: /// Initialize Cached CAS and ActionCache. llvm::Error initCache() const; diff --git a/clang/lib/CAS/CASOptions.cpp b/clang/lib/CAS/CASOptions.cpp index a2d30095424ac..e9d940d5cf2be 100644 --- a/clang/lib/CAS/CASOptions.cpp +++ b/clang/lib/CAS/CASOptions.cpp @@ -108,6 +108,7 @@ llvm::Error CASOptions::initCache() const { } SmallString<256> PathBuf; + getResolvedCASPath(PathBuf); if (CASPath == "auto") { getDefaultOnDiskCASPath(PathBuf); CASPath = PathBuf; @@ -119,3 +120,11 @@ llvm::Error CASOptions::initCache() const { std::tie(Cache.CAS, Cache.AC) = std::move(DBs); return llvm::Error::success(); } + +void CASOptions::getResolvedCASPath(SmallVectorImpl &Result) const { + if (CASPath == "auto") { + getDefaultOnDiskCASPath(Result); + } else { + Result.assign(CASPath.begin(), CASPath.end()); + } +} diff --git a/clang/test/CAS/depscan-cas-log.c b/clang/test/CAS/depscan-cas-log.c new file mode 100644 index 0000000000000..6d85d7a2c5e18 --- /dev/null +++ b/clang/test/CAS/depscan-cas-log.c @@ -0,0 +1,18 @@ +// Ensure both the first clang process and the daemon have logging enabled. +// It's hard to check this exhaustively, but in practice if the daemon does not +// enable logging there are currently zero records in the log. + +// RUN: rm -rf %t && mkdir %t +// RUN: env LLVM_CACHE_CAS_PATH=%t/cas LLVM_CAS_LOG=1 LLVM_CAS_DISABLE_VALIDATION=1 %clang \ +// RUN: -cc1depscan -fdepscan=daemon -fdepscan-include-tree -o - \ +// RUN: -cc1-args -cc1 -triple x86_64-apple-macosx11.0.0 -emit-obj %s -o %t/t.o -fcas-path %t/cas +// RUN: FileCheck %s --input-file %t/cas/v1.log + +// CHECK: [[PID1:[0-9]*]] {{[0-9]*}}: mmap '{{.*}}v8.index' +// CHECK: [[PID1]] {{[0-9]*}}: create subtrie + +// CHECK: [[PID2:[0-9]*]] {{[0-9]*}}: mmap '{{.*}}v8.index' +// Even a minimal compilation involves at least 9 records for the cache key. +// CHECK-COUNT-9: [[PID2]] {{[0-9]*}}: create record + +// CHECK: [[PID1]] {{[0-9]*}}: close mmap '{{.*}}v8.index' diff --git a/clang/test/CAS/validate-once.c b/clang/test/CAS/validate-once.c new file mode 100644 index 0000000000000..774727fc64581 --- /dev/null +++ b/clang/test/CAS/validate-once.c @@ -0,0 +1,18 @@ +// RUN: rm -rf %t + +// RUN: llvm-cas --cas %t/cas --ingest %s +// RUN: mv %t/cas/v1.1/v8.data %t/cas/v1.1/v8.data.bak + +// RUN: %clang -cc1depscand -execute %{clang-daemon-dir}/%basename_t -cas-args -fcas-path %t/cas -- \ +// RUN: %clang -target x86_64-apple-macos11 -I %S/Inputs \ +// RUN: -Xclang -fcas-path -Xclang %t/cas \ +// RUN: -fdepscan=daemon -fdepscan-daemon=%{clang-daemon-dir}/%basename_t -fsyntax-only -x c %s + +// RUN: ls %t/cas/corrupt.0.v1.1 + +// RUN: llvm-cas --cas %t/cas --validate-if-needed | FileCheck %s -check-prefix=SKIPPED +// SKIPPED: validation skipped + +#include "test.h" + +int func(void); diff --git a/clang/tools/driver/cc1depscanProtocol.cpp b/clang/tools/driver/cc1depscanProtocol.cpp index 438bb79384ba2..83a1556b1abae 100644 --- a/clang/tools/driver/cc1depscanProtocol.cpp +++ b/clang/tools/driver/cc1depscanProtocol.cpp @@ -17,6 +17,7 @@ #include "llvm/Support/Process.h" #include "llvm/Support/Signals.h" #include "llvm/Support/StringSaver.h" +#include #if LLVM_ON_UNIX #include // FIXME: Unix-only. Not portable. @@ -186,10 +187,20 @@ Expected ScanDaemon::launchDaemon(StringRef BasePath, return llvm::errorCodeToError(std::error_code(EC, std::generic_category())); #endif + static constexpr const char *PassThroughEnv[] = { + "LLVM_CAS_LOG", + "LLVM_CAS_DISABLE_VALIDATION", + }; + SmallVector EnvP; + for (const char *Name : PassThroughEnv) + if (const char *Value = getenv(Name)) + EnvP.push_back(Saver.save(llvm::Twine(Name) + "=" + Value).data()); + EnvP.push_back(nullptr); + ::pid_t Pid; int EC = ::posix_spawn(&Pid, Args[0], /*file_actions=*/nullptr, &Attrs, const_cast(LaunchArgs.data()), - /*envp=*/nullptr); + const_cast(EnvP.data())); if (EC) return llvm::errorCodeToError(std::error_code(EC, std::generic_category())); diff --git a/clang/tools/driver/cc1depscan_main.cpp b/clang/tools/driver/cc1depscan_main.cpp index c26aff93cee77..697704c54cd4f 100644 --- a/clang/tools/driver/cc1depscan_main.cpp +++ b/clang/tools/driver/cc1depscan_main.cpp @@ -28,9 +28,11 @@ #include "clang/Tooling/DependencyScanning/ScanAndUpdateArgs.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Bitstream/BitstreamReader.h" #include "llvm/CAS/ActionCache.h" +#include "llvm/CAS/BuiltinUnifiedCASDatabases.h" #include "llvm/CAS/CASProvidingFileSystem.h" #include "llvm/CAS/CachingOnDiskFileSystem.h" #include "llvm/CAS/HierarchicalTreeBuilder.h" @@ -41,6 +43,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Path.h" #include "llvm/Support/PrefixMapper.h" @@ -52,6 +55,7 @@ #include "llvm/Support/raw_ostream.h" #include #include +#include #include #if LLVM_ON_UNIX @@ -631,8 +635,8 @@ namespace { struct ScanServer { const char *Argv0 = nullptr; SmallString<128> BasePath; - /// List of cas options. - ArrayRef CASArgs; + CASOptions CASOpts; + bool ProduceIncludeTree = true; int PidFD = -1; int ListenSocket = -1; /// \p std::nullopt means it runs indefinitely. @@ -641,7 +645,7 @@ struct ScanServer { ~ScanServer() { shutdown(); } - void start(bool Exclusive); + void start(bool Exclusive, ArrayRef CASArgs); int listen(); /// Tear down the socket and bind file immediately but wait till all existing @@ -706,13 +710,13 @@ int cc1depscand_main(ArrayRef Argv, const char *Argv0, // particular "build session", to shutdown, then have it stay alive until the // session is finished. bool LongRunning = false; - + ArrayRef CASArgs; for (const auto *A = Argv.begin() + 2; A != Argv.end(); ++A) { StringRef Arg(*A); if (Arg == "-long-running") LongRunning = true; else if (Arg == "-cas-args") { - Server.CASArgs = ArrayRef(A + 1, Argv.end()); + CASArgs = ArrayRef(A + 1, Argv.end()); break; } } @@ -723,7 +727,7 @@ int cc1depscand_main(ArrayRef Argv, const char *Argv0, reportError(Twine("cannot create basedir: ") + EC.message()); if (Command == "-serve") { - Server.start(/*Exclusive*/ true); + Server.start(/*Exclusive*/ true, CASArgs); return Server.listen(); } else if (Command == "-execute") { @@ -734,7 +738,7 @@ int cc1depscand_main(ArrayRef Argv, const char *Argv0, } // Make sure to start the server before executing the command. - Server.start(/*Exclusive*/ true); + Server.start(/*Exclusive*/ true, CASArgs); std::thread ServerThread([&Server]() { Server.listen(); }); setenv("CLANG_CACHE_SCAN_DAEMON_SOCKET_PATH", Server.BasePath.c_str(), @@ -785,11 +789,61 @@ int cc1depscand_main(ArrayRef Argv, const char *Argv0, openAndReplaceFD(1, LogOutPath); openAndReplaceFD(2, LogErrPath); - Server.start(/*Exclusive*/ false); + Server.start(/*Exclusive*/ false, CASArgs); return Server.listen(); } -void ScanServer::start(bool Exclusive) { +static std::optional +findLLVMCasBinary(const char *Argv0, llvm::SmallVectorImpl &Storage) { + using namespace llvm::sys; + std::string Path = fs::getMainExecutable(Argv0, (void *)cc1depscan_main); + Storage.assign(Path.begin(), Path.end()); + path::remove_filename(Storage); + path::append(Storage, "llvm-cas"); + StringRef PathStr(Storage.data(), Storage.size()); + if (fs::exists(PathStr)) + return PathStr; + // Look for a corresponding usr/local/bin/llvm-cas + PathStr = path::parent_path(PathStr); + if (path::filename(PathStr) != "bin") + return std::nullopt; + PathStr = path::parent_path(PathStr); + Storage.truncate(PathStr.size()); + path::append(Storage, "local", "bin", "llvm-cas"); + PathStr = StringRef{Storage.data(), Storage.size()}; + if (fs::exists(PathStr)) + return PathStr; + return std::nullopt; +} + +void ScanServer::start(bool Exclusive, ArrayRef CASArgs) { + // Parse CAS options and validate if needed. + DiagnosticsEngine Diags(new DiagnosticIDs(), new DiagnosticOptions()); + + const OptTable &Opts = clang::driver::getDriverOptTable(); + unsigned MissingArgIndex, MissingArgCount; + auto ParsedCASArgs = + Opts.ParseArgs(CASArgs, MissingArgIndex, MissingArgCount); + CompilerInvocation::ParseCASArgs(CASOpts, ParsedCASArgs, Diags); + CASOpts.ensurePersistentCAS(); + ProduceIncludeTree = + ParsedCASArgs.hasArg(driver::options::OPT_fdepscan_include_tree); + + static std::once_flag ValidateOnce; + std::call_once(ValidateOnce, [&] { + if (getenv("LLVM_CAS_DISABLE_VALIDATION")) + return; + if (CASOpts.CASPath.empty() || !CASOpts.PluginPath.empty()) + return; + SmallString<64> LLVMCasStorage; + SmallString<64> CASPath; + CASOpts.getResolvedCASPath(CASPath); + ExitOnErr(llvm::cas::validateOnDiskUnifiedCASDatabasesIfNeeded( + CASPath, /*CheckHash=*/true, + /*AllowRecovery=*/true, + /*Force=*/false, findLLVMCasBinary(Argv0, LLVMCasStorage))); + }); + // Check the pidfile. SmallString<128> PidPath; (BasePath + ".pid").toVector(PidPath); @@ -828,16 +882,6 @@ int ScanServer::listen() { llvm::DefaultThreadPool Pool; DiagnosticsEngine Diags(new DiagnosticIDs(), new DiagnosticOptions()); - CASOptions CASOpts; - const OptTable &Opts = clang::driver::getDriverOptTable(); - unsigned MissingArgIndex, MissingArgCount; - auto ParsedCASArgs = - Opts.ParseArgs(CASArgs, MissingArgIndex, MissingArgCount); - CompilerInvocation::ParseCASArgs(CASOpts, ParsedCASArgs, Diags); - CASOpts.ensurePersistentCAS(); - bool ProduceIncludeTree = - ParsedCASArgs.hasArg(driver::options::OPT_fdepscan_include_tree); - std::shared_ptr CAS; std::shared_ptr Cache; std::tie(CAS, Cache) = CASOpts.getOrCreateDatabases(Diags); diff --git a/llvm/include/llvm/CAS/ActionCache.h b/llvm/include/llvm/CAS/ActionCache.h index f548fb0522480..42236c9b5b45f 100644 --- a/llvm/include/llvm/CAS/ActionCache.h +++ b/llvm/include/llvm/CAS/ActionCache.h @@ -114,6 +114,9 @@ class ActionCache { Globally, std::move(Callback), CancelObj); } + /// Validate the ActionCache contents. + virtual Error validate() const = 0; + virtual ~ActionCache() = default; protected: diff --git a/llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h b/llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h index 969d097b6ceca..bd0a0b65f57f8 100644 --- a/llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h +++ b/llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h @@ -21,6 +21,39 @@ class ObjectStore; Expected, std::unique_ptr>> createOnDiskUnifiedCASDatabases(StringRef Path); +/// Represents the result of validating the contents using +/// \c validateOnDiskUnifiedCASDatabasesIfNeeded. +/// +/// Note: invalid results are handled as an \c Error. +enum class ValidationResult { + /// The data is already valid. + Valid, + /// The data was invalid, but was recovered. + Recovered, + /// Validation was skipped, as it was not needed. + Skipped, +}; + +/// Validate the data in \p Path, if needed to ensure correctness. +/// +/// \param Path directory for the on-disk database. +/// \param CheckHash Whether to validate hashes match the data. +/// \param AllowRecovery Whether to automatically recover from invalid data by +/// marking the files for garbage collection. +/// \param ForceValidation Whether to force validation to occur even if it +/// should not be necessary. +/// \param LLVMCasBinary If provided, validation is performed out-of-process +/// using the given \c llvm-cas executable which protects against crashes +/// during validation. Otherwise validation is performed in-process. +/// +/// \returns \c Valid if the data is already valid, \c Recovered if data +/// was invalid but has been cleared, \c Skipped if validation is not needed, +/// or an \c Error if validation cannot be performed or if the data is left +/// in an invalid state because \p AllowRecovery is false. +Expected validateOnDiskUnifiedCASDatabasesIfNeeded( + StringRef Path, bool CheckHash, bool AllowRecovery, bool ForceValidation, + std::optional LLVMCasBinary); + } // namespace llvm::cas #endif // LLVM_CAS_BUILTINUNIFIEDCASDATABASES_H diff --git a/llvm/include/llvm/CAS/OnDiskCASLogger.h b/llvm/include/llvm/CAS/OnDiskCASLogger.h index 0ceb4d9ad7836..e0c21b771b11d 100644 --- a/llvm/include/llvm/CAS/OnDiskCASLogger.h +++ b/llvm/include/llvm/CAS/OnDiskCASLogger.h @@ -62,6 +62,11 @@ class OnDiskCASLogger { void log_MappedFileRegionBumpPtr_allocate(void *Region, TrieOffset Off, size_t Size); void log_UnifiedOnDiskCache_collectGarbage(StringRef Path); + void log_UnifiedOnDiskCache_validateIfNeeded( + StringRef Path, uint64_t BootTime, uint64_t ValidationTime, + bool CheckHash, bool AllowRecovery, bool Force, + std::optional LLVMCas, StringRef ValidationError, bool Skipped, + bool Recovered); void log_TempFile_create(StringRef Name); void log_TempFile_keep(StringRef TmpName, StringRef Name, std::error_code EC); void log_TempFile_remove(StringRef TmpName, std::error_code EC); diff --git a/llvm/include/llvm/CAS/OnDiskKeyValueDB.h b/llvm/include/llvm/CAS/OnDiskKeyValueDB.h index 94ccd728519c2..35c574aacb573 100644 --- a/llvm/include/llvm/CAS/OnDiskKeyValueDB.h +++ b/llvm/include/llvm/CAS/OnDiskKeyValueDB.h @@ -60,6 +60,9 @@ class OnDiskKeyValueDB { StringRef ValueName, size_t ValueSize, std::shared_ptr Logger = nullptr); + using CheckValueT = function_ref)>; + Error validate(CheckValueT CheckValue) const; + private: OnDiskKeyValueDB(size_t ValueSize, OnDiskHashMappedTrie Cache) : ValueSize(ValueSize), Cache(std::move(Cache)) {} diff --git a/llvm/include/llvm/CAS/UnifiedOnDiskCache.h b/llvm/include/llvm/CAS/UnifiedOnDiskCache.h index be1880d6b3023..90e8d36df5d52 100644 --- a/llvm/include/llvm/CAS/UnifiedOnDiskCache.h +++ b/llvm/include/llvm/CAS/UnifiedOnDiskCache.h @@ -9,6 +9,7 @@ #ifndef LLVM_CAS_UNIFIEDONDISKCACHE_H #define LLVM_CAS_UNIFIEDONDISKCACHE_H +#include "llvm/CAS/BuiltinUnifiedCASDatabases.h" #include "llvm/CAS/OnDiskGraphDB.h" namespace llvm::cas::ondisk { @@ -82,6 +83,34 @@ class UnifiedOnDiskCache { OnDiskGraphDB::FaultInPolicy FaultInPolicy = OnDiskGraphDB::FaultInPolicy::FullTree); + /// Validate the data in \p Path, if needed to ensure correctness. + /// + /// Note: if invalid data is detected and \p AllowRecovery is true, then + /// recovery requires exclusive access to the CAS and it is an error to + /// attempt recovery if there is concurrent use of the CAS. + /// + /// \param Path directory for the on-disk database. + /// \param HashName Identifier name for the hashing algorithm that is going to + /// be used. + /// \param HashByteSize Size for the object digest hash bytes. + /// \param CheckHash Whether to validate hashes match the data. + /// \param AllowRecovery Whether to automatically recover from invalid data by + /// marking the files for garbage collection. + /// \param ForceValidation Whether to force validation to occur even if it + /// should not be necessary. + /// \param LLVMCasBinary If provided, validation is performed out-of-process + /// using the given \c llvm-cas executable which protects against crashes + /// during validation. Otherwise validation is performed in-process. + /// + /// \returns \c Valid if the data is already valid, \c Recovered if data + /// was invalid but has been cleared, \c Skipped if validation is not needed, + /// or an \c Error if validation cannot be performed or if the data is left + /// in an invalid state because \p AllowRecovery is false. + static Expected + validateIfNeeded(StringRef Path, StringRef HashName, unsigned HashByteSize, + bool CheckHash, bool AllowRecovery, bool ForceValidation, + std::optional LLVMCasBinary); + /// This is called implicitly at destruction time, so it is not required for a /// client to call this. After calling \p close the only method that is valid /// to call is \p needsGarbaseCollection. @@ -124,6 +153,8 @@ class UnifiedOnDiskCache { ~UnifiedOnDiskCache(); + Error validateActionCache(); + private: UnifiedOnDiskCache(); diff --git a/llvm/include/llvm/Support/Error.h b/llvm/include/llvm/Support/Error.h index cb06ac19f0bb7..cb02967655cca 100644 --- a/llvm/include/llvm/Support/Error.h +++ b/llvm/include/llvm/Support/Error.h @@ -1399,6 +1399,23 @@ inline Error createFileError(const Twine &F, size_t Line, std::error_code EC) { return createFileError(F, Line, errorCodeToError(EC)); } +/// Create a StringError with the specified error code and prepend the file path +/// to it. +inline Error createFileError(const Twine &F, std::error_code EC, + const Twine &S) { + Error E = createStringError(EC, S); + return createFileError(F, std::move(E)); +} + +/// Create a StringError with the specified error code and prepend the file path +/// to it. +template +inline Error createFileError(const Twine &F, std::error_code EC, + char const *Fmt, const Ts &...Vals) { + Error E = createStringError(EC, Fmt, Vals...); + return createFileError(F, std::move(E)); +} + Error createFileError(const Twine &F, ErrorSuccess) = delete; /// Helper for check-and-exit error handling. diff --git a/llvm/lib/CAS/ActionCaches.cpp b/llvm/lib/CAS/ActionCaches.cpp index 7f66ad843ea78..83891b4215954 100644 --- a/llvm/lib/CAS/ActionCaches.cpp +++ b/llvm/lib/CAS/ActionCaches.cpp @@ -52,6 +52,10 @@ class InMemoryActionCache final : public ActionCache { Expected> getImpl(ArrayRef ActionKey, bool Globally) const final; + Error validate() const final { + return createStringError("InMemoryActionCache doesn't support validate()"); + } + private: using DataT = CacheEntry; using InMemoryCacheT = ThreadSafeHashMappedTrie; @@ -68,6 +72,8 @@ class OnDiskActionCache final : public ActionCache { static Expected> create(StringRef Path); + Error validate() const final; + private: static StringRef getHashName() { return "BLAKE3"; } @@ -86,6 +92,8 @@ class UnifiedOnDiskActionCache final : public ActionCache { UnifiedOnDiskActionCache(std::shared_ptr UniDB); + Error validate() const final; + private: std::shared_ptr UniDB; }; @@ -198,6 +206,12 @@ Error OnDiskActionCache::putImpl(ArrayRef Key, const CASID &Result, ArrayRef((const uint8_t *)Observed.data(), Observed.size())); } +Error OnDiskActionCache::validate() const { + // FIXME: without the matching CAS there is nothing we can check about the + // cached values. The hash size is already validated by the DB validator. + return DB->validate(nullptr); +} + UnifiedOnDiskActionCache::UnifiedOnDiskActionCache( std::shared_ptr UniDB) : ActionCache(builtin::BuiltinCASContext::getDefaultContext()), @@ -233,6 +247,10 @@ Error UnifiedOnDiskActionCache::putImpl(ArrayRef Key, UniDB->getGraphDB().getDigest(*Observed)); } +Error UnifiedOnDiskActionCache::validate() const { + return UniDB->validateActionCache(); +} + Expected> cas::createOnDiskActionCache(StringRef Path) { #if LLVM_ENABLE_ONDISK_CAS diff --git a/llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp b/llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp index 87073cf2b4f23..40d898e4b7f56 100644 --- a/llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp +++ b/llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp @@ -23,3 +23,16 @@ cas::createOnDiskUnifiedCASDatabases(StringRef Path) { auto AC = builtin::createActionCacheFromUnifiedOnDiskCache(std::move(UniDB)); return std::make_pair(std::move(CAS), std::move(AC)); } + +Expected cas::validateOnDiskUnifiedCASDatabasesIfNeeded( + StringRef Path, bool CheckHash, bool AllowRecovery, bool ForceValidation, + std::optional LLVMCasBinary) { +#if LLVM_ENABLE_ONDISK_CAS + return ondisk::UnifiedOnDiskCache::validateIfNeeded( + Path, builtin::BuiltinCASContext::getHashName(), + sizeof(builtin::HashType), CheckHash, AllowRecovery, ForceValidation, + LLVMCasBinary); +#else + return createStringError(inconvertibleErrorCode(), "OnDiskCache is disabled"); +#endif +} \ No newline at end of file diff --git a/llvm/lib/CAS/OnDiskCASLogger.cpp b/llvm/lib/CAS/OnDiskCASLogger.cpp index e9a68a548edad..a39cbbd3e0b58 100644 --- a/llvm/lib/CAS/OnDiskCASLogger.cpp +++ b/llvm/lib/CAS/OnDiskCASLogger.cpp @@ -211,6 +211,25 @@ void OnDiskCASLogger::log_UnifiedOnDiskCache_collectGarbage(StringRef Path) { Log << "collect garbage '" << Path << "'"; } +void OnDiskCASLogger::log_UnifiedOnDiskCache_validateIfNeeded( + StringRef Path, uint64_t BootTime, uint64_t ValidationTime, bool CheckHash, + bool AllowRecovery, bool Force, std::optional LLVMCas, + StringRef ValidationError, bool Skipped, bool Recovered) { + TextLogLine Log(OS); + Log << "validate-if-needed '" << Path << "'"; + Log << " boot=" << BootTime << " last-valid=" << ValidationTime; + Log << " check-hash=" << CheckHash << " allow-recovery=" << AllowRecovery; + Log << " force=" << Force; + if (LLVMCas) + Log << " llvm-cas=" << *LLVMCas; + if (Skipped) + Log << " skipped"; + if (Recovered) + Log << " recovered"; + if (!ValidationError.empty()) + Log << " data was invalid " << ValidationError; +} + void OnDiskCASLogger::log_TempFile_create(StringRef Name) { TextLogLine Log(OS); Log << "standalone file create '" << Name << "'"; diff --git a/llvm/lib/CAS/OnDiskKeyValueDB.cpp b/llvm/lib/CAS/OnDiskKeyValueDB.cpp index ec410fdc4de1b..f3a0e863d1dab 100644 --- a/llvm/lib/CAS/OnDiskKeyValueDB.cpp +++ b/llvm/lib/CAS/OnDiskKeyValueDB.cpp @@ -81,3 +81,25 @@ OnDiskKeyValueDB::open(StringRef Path, StringRef HashName, unsigned KeySize, return std::unique_ptr( new OnDiskKeyValueDB(ValueSize, std::move(*ActionCache))); } + +Error OnDiskKeyValueDB::validate(CheckValueT CheckValue) const { + return Cache.validate( + [&](FileOffset Offset, + OnDiskHashMappedTrie::ConstValueProxy Record) -> Error { + auto formatError = [&](Twine Msg) { + return createStringError( + llvm::errc::illegal_byte_sequence, + "bad cache value at 0x" + + utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) + ": " + + Msg.str()); + }; + + if (Record.Data.size() != ValueSize) + return formatError("wrong cache value size"); + if (!isAligned(Align(8), Record.Data.size())) + return formatError("wrong cache value alignment"); + if (CheckValue) + return CheckValue(Offset, Record.Data); + return Error::success(); + }); +} \ No newline at end of file diff --git a/llvm/lib/CAS/PluginAPI.h b/llvm/lib/CAS/PluginAPI.h index bf019748da829..ec5413a9834a8 100644 --- a/llvm/lib/CAS/PluginAPI.h +++ b/llvm/lib/CAS/PluginAPI.h @@ -108,6 +108,8 @@ struct llcas_functions_t { bool globally, void *ctx_cb, llcas_actioncache_put_cb, llcas_cancellable_t *); + + bool (*actioncache_validate)(llcas_cas_t, char **error); }; #endif // LLVM_LIB_CAS_PLUGINAPI_H diff --git a/llvm/lib/CAS/PluginAPI_functions.def b/llvm/lib/CAS/PluginAPI_functions.def index 3dca709689d4a..f242611bdd603 100644 --- a/llvm/lib/CAS/PluginAPI_functions.def +++ b/llvm/lib/CAS/PluginAPI_functions.def @@ -7,6 +7,7 @@ CASPLUGINAPI_FUNCTION(actioncache_get_for_digest, true) CASPLUGINAPI_FUNCTION(actioncache_get_for_digest_async, true) CASPLUGINAPI_FUNCTION(actioncache_put_for_digest, true) CASPLUGINAPI_FUNCTION(actioncache_put_for_digest_async, true) +CASPLUGINAPI_FUNCTION(actioncache_validate, false) CASPLUGINAPI_FUNCTION(cancellable_cancel, false) CASPLUGINAPI_FUNCTION(cancellable_dispose, false) CASPLUGINAPI_FUNCTION(cas_contains_object, true) diff --git a/llvm/lib/CAS/PluginCAS.cpp b/llvm/lib/CAS/PluginCAS.cpp index f27be05711495..9d9553160bdd1 100644 --- a/llvm/lib/CAS/PluginCAS.cpp +++ b/llvm/lib/CAS/PluginCAS.cpp @@ -461,6 +461,8 @@ class PluginActionCache : public ActionCache { PluginActionCache(std::shared_ptr); + Error validate() const final; + private: std::shared_ptr Ctx; }; @@ -596,6 +598,16 @@ void PluginActionCache::putImplAsync(ArrayRef ResolvedKey, PluginActionCache::PluginActionCache(std::shared_ptr CASCtx) : ActionCache(*CASCtx), Ctx(std::move(CASCtx)) {} +Error PluginActionCache::validate() const { + if (Ctx->Functions.actioncache_validate) { + char *c_err = nullptr; + if (Ctx->Functions.actioncache_validate(Ctx->c_cas, &c_err)) + return Ctx->errorAndDispose(c_err); + return Error::success(); + } + return createStringError("plugin action cache doesn't support validation"); +} + //===----------------------------------------------------------------------===// // createPluginCASDatabases API //===----------------------------------------------------------------------===// diff --git a/llvm/lib/CAS/UnifiedOnDiskCache.cpp b/llvm/lib/CAS/UnifiedOnDiskCache.cpp index 632456f8f7c03..b7491228784ea 100644 --- a/llvm/lib/CAS/UnifiedOnDiskCache.cpp +++ b/llvm/lib/CAS/UnifiedOnDiskCache.cpp @@ -47,17 +47,48 @@ // without affecting any active readers/writers in the same process or other // processes. // +// The \c UnifiedOnDiskCache also provides validation and recovery on top of the +// underlying on-disk storage. The low-level storage is designed to remain +// coherent across regular process crashes, but may be invalid after power loss +// or similar system failures. \c UnifiedOnDiskCache::validateIfNeeded allows +// validating the contents once per boot and can recover by marking invalid +// data for garbage collection. +// +// The data recovery described above requires exclusive access to the CAS, and +// it is an error to attempt recovery if the CAS is open in any process/thread. +// In order to maximize backwards compatibility with tools that do not perform +// validation before opening the CAS, we do not attempt to get exclusive access +// until recovery is actually performed, meaning as long as the data is valid +// it will not conflict with concurrent use. +// //===----------------------------------------------------------------------===// #include "llvm/CAS/UnifiedOnDiskCache.h" +#include "BuiltinCAS.h" #include "OnDiskCommon.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/CAS/OnDiskCASLogger.h" +#include "llvm/CAS/OnDiskGraphDB.h" #include "llvm/CAS/OnDiskKeyValueDB.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/raw_ostream.h" +#include + +#if __has_include() +#include +#endif using namespace llvm; using namespace llvm::cas; @@ -68,6 +99,9 @@ using namespace llvm::cas::ondisk; /// the \p UnifiedOnDiskCache::collectGarbage function. static constexpr StringLiteral DBDirPrefix = "v1."; +static constexpr StringLiteral ValidationFilename = "v1.validation"; +static constexpr StringLiteral CorruptPrefix = "corrupt."; + Expected UnifiedOnDiskCache::KVPut(ObjectID Key, ObjectID Value) { return KVPut(PrimaryGraphDB->getDigest(Key), Value); } @@ -123,10 +157,35 @@ UnifiedOnDiskCache::faultInFromUpstreamKV(ArrayRef Key) { return KVPut(Key, *PrimaryID); } +Error UnifiedOnDiskCache::validateActionCache() { + auto ValidateRef = [&](FileOffset Offset, ArrayRef Value) -> Error { + assert(Value.size() == sizeof(uint64_t) && "should be validated already"); + auto ID = ObjectID::fromOpaqueData(support::endian::read64le(Value.data())); + auto formatError = [&](Twine Msg) { + return createStringError( + llvm::errc::illegal_byte_sequence, + "bad record at 0x" + + utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) + ": " + + Msg.str()); + }; + if (ID.getOpaqueData() == 0) + return formatError("zero is not a valid ref"); + if (!PrimaryGraphDB->containsObject(ID)) + return formatError("cas does not contain ref"); + return Error::success(); + }; + if (Error E = PrimaryKVDB->validate(ValidateRef)) + return E; + if (UpstreamKVDB) + return UpstreamKVDB->validate(ValidateRef); + return Error::success(); +} + /// \returns all the 'v.' names of sub-directories, sorted with -/// ascending order of the integer after the dot. -static Error getAllDBDirs(StringRef Path, - SmallVectorImpl &DBDirs) { +/// ascending order of the integer after the dot. Corrupt directories, if +/// included, will come first. +static Error getAllDBDirs(StringRef Path, SmallVectorImpl &DBDirs, + bool IncludeCorrupt = false) { struct DBDir { uint64_t Order; std::string Name; @@ -139,6 +198,10 @@ static Error getAllDBDirs(StringRef Path, if (DirI->type() != sys::fs::file_type::directory_file) continue; StringRef SubDir = sys::path::filename(DirI->path()); + if (IncludeCorrupt && SubDir.starts_with(CorruptPrefix)) { + FoundDBDirs.push_back({0, std::string(SubDir)}); + continue; + } if (!SubDir.starts_with(DBDirPrefix)) continue; uint64_t Order; @@ -158,6 +221,23 @@ static Error getAllDBDirs(StringRef Path, return Error::success(); } +static Error getAllGarbageDirs(StringRef Path, + SmallVectorImpl &DBDirs) { + if (Error E = getAllDBDirs(Path, DBDirs, /*IncludeCorrupt=*/true)) + return E; + + // FIXME: When the version of \p DBDirPrefix is bumped up we need to figure + // out how to handle the leftover sub-directories of the previous version. + + for (unsigned Keep = 2; Keep > 0 && !DBDirs.empty(); --Keep) { + StringRef Back(DBDirs.back()); + if (Back.starts_with(CorruptPrefix)) + break; + DBDirs.pop_back(); + } + return Error::success(); +} + /// \returns Given a sub-directory named 'v.', it outputs the /// 'v.' name. static void getNextDBDirName(StringRef DBDir, llvm::raw_ostream &OS) { @@ -169,6 +249,231 @@ static void getNextDBDirName(StringRef DBDir, llvm::raw_ostream &OS) { OS << DBDirPrefix << Count + 1; } +static Error validateOutOfProcess(StringRef LLVMCasBinary, StringRef RootPath, + bool CheckHash) { + SmallVector Args{LLVMCasBinary, "-cas", RootPath, "-validate"}; + if (CheckHash) + Args.push_back("-check-hash"); + + llvm::SmallString<128> StdErrPath; + int StdErrFD = -1; + if (std::error_code EC = sys::fs::createTemporaryFile( + "llvm-cas-validate-stderr", "txt", StdErrFD, StdErrPath, + llvm::sys::fs::OF_Text)) + return createStringError(EC, "failed to create temporary file"); + FileRemover OutputRemover(StdErrPath.c_str()); + + std::optional Redirects[] = { + {""}, // stdin = /dev/null + {""}, // stdout = /dev/null + StdErrPath.str(), + }; + + std::string ErrMsg; + int Result = + sys::ExecuteAndWait(LLVMCasBinary, Args, /*Env=*/std::nullopt, Redirects, + /*SecondsToWait=*/120, /*MemoryLimit=*/0, &ErrMsg); + + if (Result == -1) + return createStringError("failed to exec " + join(Args, " ") + ": " + + ErrMsg); + if (Result != 0) { + llvm::SmallString<64> Err("cas contents invalid"); + if (!ErrMsg.empty()) { + Err += ": "; + Err += ErrMsg; + } + auto StdErrBuf = MemoryBuffer::getFile(StdErrPath.c_str()); + if (StdErrBuf && !(*StdErrBuf)->getBuffer().empty()) { + Err += ": "; + Err += (*StdErrBuf)->getBuffer(); + } + return createStringError(Err); + } + return Error::success(); +} + +static Error validateInProcess(StringRef RootPath, StringRef HashName, + unsigned HashByteSize, bool CheckHash) { + std::shared_ptr UniDB; + if (Error E = UnifiedOnDiskCache::open(RootPath, std::nullopt, HashName, + HashByteSize) + .moveInto(UniDB)) + return E; + auto CAS = builtin::createObjectStoreFromUnifiedOnDiskCache(UniDB); + if (Error E = CAS->validate(CheckHash)) + return E; + if (Error E = UniDB->validateActionCache()) + return E; + return Error::success(); +} + +static Expected getBootTime() { +#if __has_include() && defined(KERN_BOOTTIME) + struct timeval TV; + size_t TVLen = sizeof(TV); + int KernBoot[2] = {CTL_KERN, KERN_BOOTTIME}; + if (sysctl(KernBoot, 2, &TV, &TVLen, nullptr, 0) < 0) + return createStringError(llvm::errnoAsErrorCode(), + "failed to get boottime"); + if (TVLen != sizeof(TV)) + return createStringError("sysctl kern.boottime unexpected format"); + return TV.tv_sec; +#elif defined(__linux__) + // Use the mtime for /proc, which is recreated during system boot. + // We could also read /proc/stat and search for 'btime'. + sys::fs::file_status Status; + if (std::error_code EC = sys::fs::status("/proc", Status)) + return createFileError("/proc", EC); + return Status.getLastModificationTime().time_since_epoch().count(); +#else + llvm::report_fatal_error("unimplemented"); +#endif +} + +Expected +UnifiedOnDiskCache::validateIfNeeded(StringRef RootPath, StringRef HashName, + unsigned HashByteSize, bool CheckHash, + bool AllowRecovery, bool ForceValidation, + std::optional LLVMCasBinary) { + if (std::error_code EC = sys::fs::create_directories(RootPath)) + return createFileError(RootPath, EC); + + SmallString<256> PathBuf(RootPath); + sys::path::append(PathBuf, ValidationFilename); + int FD = -1; + if (std::error_code EC = sys::fs::openFileForReadWrite( + PathBuf, FD, sys::fs::CD_OpenAlways, sys::fs::OF_None)) + return createFileError(PathBuf, EC); + assert(FD != -1); + + sys::fs::file_t File = sys::fs::convertFDToNativeFile(FD); + auto CloseFile = make_scope_exit([&]() { sys::fs::closeFile(File); }); + + if (std::error_code EC = lockFileThreadSafe(FD, /*Exclusive=*/true)) + return createFileError(PathBuf, EC); + auto UnlockFD = make_scope_exit([&]() { unlockFileThreadSafe(FD); }); + + std::shared_ptr Logger; + if (Error E = + ondisk::OnDiskCASLogger::openIfEnabled(RootPath).moveInto(Logger)) + return std::move(E); + + SmallString<8> Bytes; + if (Error E = sys::fs::readNativeFileToEOF(File, Bytes)) + return createFileError(PathBuf, std::move(E)); + + uint64_t ValidationBootTime = 0; + if (!Bytes.empty() && + StringRef(Bytes).trim().getAsInteger(10, ValidationBootTime)) + return createFileError(PathBuf, errc::illegal_byte_sequence, + "expected integer"); + + static uint64_t BootTime = 0; + if (BootTime == 0) + if (Error E = getBootTime().moveInto(BootTime)) + return std::move(E); + + bool Recovered = false; + bool Skipped = false; + std::string LogValidationError; + + auto Log = llvm::make_scope_exit([&] { + if (!Logger) + return; + Logger->log_UnifiedOnDiskCache_validateIfNeeded( + RootPath, BootTime, ValidationBootTime, CheckHash, AllowRecovery, + ForceValidation, LLVMCasBinary, LogValidationError, Skipped, Recovered); + }); + + if (ValidationBootTime == BootTime && !ForceValidation) { + Skipped = true; + return ValidationResult::Skipped; + } + + // Validate! + bool NeedsRecovery = false; + Error E = + LLVMCasBinary + ? validateOutOfProcess(*LLVMCasBinary, RootPath, CheckHash) + : validateInProcess(RootPath, HashName, HashByteSize, CheckHash); + if (E) { + if (Logger) + LogValidationError = toStringWithoutConsuming(E); + if (AllowRecovery) { + consumeError(std::move(E)); + NeedsRecovery = true; + } else { + return std::move(E); + } + } + + if (NeedsRecovery) { + sys::path::remove_filename(PathBuf); + sys::path::append(PathBuf, "lock"); + + int LockFD = -1; + if (std::error_code EC = sys::fs::openFileForReadWrite( + PathBuf, LockFD, sys::fs::CD_OpenAlways, sys::fs::OF_None)) + return createFileError(PathBuf, EC); + sys::fs::file_t LockFile = sys::fs::convertFDToNativeFile(LockFD); + auto CloseLock = make_scope_exit([&]() { sys::fs::closeFile(LockFile); }); + if (std::error_code EC = tryLockFileThreadSafe(LockFD)) { + if (EC == std::errc::no_lock_available) + return createFileError( + PathBuf, EC, + "CAS validation requires exclusive access but CAS was in use"); + return createFileError(PathBuf, EC); + } + auto UnlockFD = make_scope_exit([&]() { unlockFileThreadSafe(LockFD); }); + + SmallVector DBDirs; + if (Error E = getAllDBDirs(RootPath, DBDirs)) + return std::move(E); + + for (StringRef DBDir : DBDirs) { + sys::path::remove_filename(PathBuf); + sys::path::append(PathBuf, DBDir); + std::error_code EC; + int Attempt = 0, MaxAttempts = 100; + SmallString<128> GCPath; + for (; Attempt < MaxAttempts; ++Attempt) { + GCPath.assign(RootPath); + sys::path::append(GCPath, CorruptPrefix + std::to_string(Attempt) + + "." + DBDir); + EC = sys::fs::rename(PathBuf, GCPath); + // Darwin uses ENOTEMPTY. Linux may return either ENOTEMPTY or EEXIST. + if (EC != errc::directory_not_empty && EC != errc::file_exists) + break; + } + if (Attempt == MaxAttempts) + return createStringError( + EC, "rename " + PathBuf + + " failed: too many CAS directories awaiting pruning"); + if (EC) + return createStringError(EC, "rename " + PathBuf + " to " + GCPath + + " failed: " + EC.message()); + } + Recovered = true; + } + + if (ValidationBootTime != BootTime) { + // Fix filename in case we have error to report. + sys::path::remove_filename(PathBuf); + sys::path::append(PathBuf, ValidationFilename); + if (std::error_code EC = sys::fs::resize_file(FD, 0)) + return createFileError(PathBuf, EC); + raw_fd_ostream OS(FD, /*shouldClose=*/false); + OS.seek(0); // resize does not reset position + OS << BootTime << '\n'; + if (OS.has_error()) + return createFileError(PathBuf, OS.error()); + } + + return NeedsRecovery ? ValidationResult::Recovered + : ValidationResult::Valid; +} + Expected> UnifiedOnDiskCache::open(StringRef RootPath, std::optional SizeLimit, StringRef HashName, unsigned HashByteSize, @@ -359,16 +664,11 @@ UnifiedOnDiskCache::~UnifiedOnDiskCache() { consumeError(close()); } Error UnifiedOnDiskCache::collectGarbage(StringRef Path, ondisk::OnDiskCASLogger *Logger) { SmallVector DBDirs; - if (Error E = getAllDBDirs(Path, DBDirs)) + if (Error E = getAllGarbageDirs(Path, DBDirs)) return E; - if (DBDirs.size() <= 2) - return Error::success(); // no unused directories. - - // FIXME: When the version of \p DBDirPrefix is bumped up we need to figure - // out how to handle the leftover sub-directories of the previous version. SmallString<256> PathBuf(Path); - for (StringRef UnusedSubDir : ArrayRef(DBDirs).drop_back(2)) { + for (StringRef UnusedSubDir : DBDirs) { sys::path::append(PathBuf, UnusedSubDir); if (Logger) Logger->log_UnifiedOnDiskCache_collectGarbage(PathBuf); diff --git a/llvm/lib/RemoteCachingService/CAS/GRPCRelayCAS.cpp b/llvm/lib/RemoteCachingService/CAS/GRPCRelayCAS.cpp index 6665ad728162f..607414c92430d 100644 --- a/llvm/lib/RemoteCachingService/CAS/GRPCRelayCAS.cpp +++ b/llvm/lib/RemoteCachingService/CAS/GRPCRelayCAS.cpp @@ -220,6 +220,11 @@ class GRPCActionCache : public ActionCache { Error putImpl(ArrayRef ResolvedKey, const CASID &Result, bool Globally) final; + Error validate() const final { + // Not supported yet. Always return success. + return Error::success(); + } + private: std::unique_ptr KVDB; }; diff --git a/llvm/test/CAS/logging.test b/llvm/test/CAS/logging.test index 2c2d72fbd4035..98dd6cd9ff001 100644 --- a/llvm/test/CAS/logging.test +++ b/llvm/test/CAS/logging.test @@ -1,8 +1,11 @@ RUN: rm -rf %t RUN: split-file %s %t RUN: env LLVM_CAS_LOG=2 llvm-cas --cas %t/cas --ingest %t/input +RUN: env LLVM_CAS_LOG=2 llvm-cas --cas %t/cas --validate-if-needed -check-hash +RUN: env LLVM_CAS_LOG=2 llvm-cas --cas %t/cas --validate-if-needed -force -allow-recovery RUN: FileCheck %s --input-file %t/cas/v1.log + // CHECK: resize mapped file '{{.*}}v8.index' // CHECK: mmap '{{.*}}v8.index' [[INDEX:0x[0-9a-f]+]] // CHECK: resize mapped file '{{.*}}v8.data' @@ -22,6 +25,9 @@ RUN: FileCheck %s --input-file %t/cas/v1.log // CHECK: resize mapped file '{{.*}}v8.index' // CHECK: close mmap '{{.*}}v8.index' +// CHECK: validate-if-needed '{{.*}}cas' boot=[[BOOT:[0-9]+]] last-valid=0 check-hash=1 allow-recovery=0 force=0 llvm-cas={{.*}}llvm-cas +// CHECK: validate-if-needed '{{.*}}cas' boot=[[BOOT]] last-valid=[[BOOT]] check-hash=0 allow-recovery=1 force=1 llvm-cas={{.*}}llvm-cas + //--- input/a Input 1 diff --git a/llvm/test/CAS/validate-if-needed.test b/llvm/test/CAS/validate-if-needed.test new file mode 100644 index 0000000000000..c2d359c153849 --- /dev/null +++ b/llvm/test/CAS/validate-if-needed.test @@ -0,0 +1,43 @@ +RUN: rm -rf %t && mkdir %t +RUN: llvm-cas --cas %t/cas --ingest %S/Inputs > %t/cas.id +RUN: mv %t/cas/v1.1/v8.data %t/cas/v1.1/v8.data.bak + +# INVALID: bad record +# VALID: validated successfully +# SKIPPED: validation skipped +# RECOVERED: recovered from invalid data + +# Validation failures are not saved. +RUN: not llvm-cas --cas %t/cas --validate-if-needed 2>&1 | FileCheck %s -check-prefix=INVALID +RUN: not llvm-cas --cas %t/cas --validate-if-needed 2>&1 | FileCheck %s -check-prefix=INVALID + +# Validation happens once per boot. +RUN: mv %t/cas/v1.1/v8.data.bak %t/cas/v1.1/v8.data +RUN: llvm-cas --cas %t/cas --validate-if-needed | FileCheck %s -check-prefix=VALID +RUN: llvm-cas --cas %t/cas --validate-if-needed | FileCheck %s -check-prefix=SKIPPED +# Wrong timestamp triggers re-validation. +RUN: echo '123' > %t/cas/v1.validation +RUN: llvm-cas --cas %t/cas --validate-if-needed | FileCheck %s -check-prefix=VALID +RUN: llvm-cas --cas %t/cas --validate-if-needed | FileCheck %s -check-prefix=SKIPPED +# Skipped validation does not catch errors. +RUN: mv %t/cas/v1.1/v8.data %t/cas/v1.1/v8.data.bak +RUN: llvm-cas --cas %t/cas --validate-if-needed | FileCheck %s -check-prefix=SKIPPED + +# Unless forced. +RUN: not llvm-cas --cas %t/cas --validate-if-needed --force 2>&1 | FileCheck %s -check-prefix=INVALID + +# Recovering from invalid data. +RUN: llvm-cas --cas %t/cas --validate-if-needed --allow-recovery --force | FileCheck %s -check-prefix=RECOVERED +RUN: ls %t/cas/corrupt.0.v1.1 +RUN: llvm-cas --cas %t/cas --validate-if-needed --allow-recovery | FileCheck %s -check-prefix=SKIPPED +RUN: llvm-cas --cas %t/cas --validate-if-needed --force | FileCheck %s -check-prefix=VALID +RUN: rm -rf %t/cas/v1.1 +RUN: cp -r %t/cas/corrupt.0.v1.1 %t/cas/v1.1 +RUN: mv %t/cas/v1.1/v8.data %t/cas/v1.1/v8.data.bak +RUN: llvm-cas --cas %t/cas --validate-if-needed --allow-recovery --force | FileCheck %s -check-prefix=RECOVERED +RUN: ls %t/cas/corrupt.1.v1.1 + +# Corrupt data is pruned. +RUN: llvm-cas --cas %t/cas --prune +RUN: not ls %t/cas/corrupt.0.v1.1 +RUN: not ls %t/cas/corrupt.1.v1.1 diff --git a/llvm/test/tools/llvm-cas/validation.test b/llvm/test/tools/llvm-cas/validation.test index b29d37f49422b..a60ee05c8d569 100644 --- a/llvm/test/tools/llvm-cas/validation.test +++ b/llvm/test/tools/llvm-cas/validation.test @@ -9,3 +9,17 @@ RUN: rm %t/cas/v1.1/v8.data RUN: not llvm-cas --cas %t/cas --validate RUN: not llvm-cas --cas %t/cas --validate --check-hash +RUN: mkdir %t/ac + +RUN: llvm-cas --cas %t/ac --make-blob \ +RUN: --data /dev/null > %t/empty.casid +RUN: echo "abc" | \ +RUN: llvm-cas --cas %t/ac --make-blob \ +RUN: --data - >%t/abc.casid + +RUN: llvm-cas --cas %t/ac --put-cache-key @%t/abc.casid @%t/empty.casid +RUN: llvm-cas --cas %t/ac --validate +# Note: records are 40 bytes (32 hash bytes + 8 byte value), so trim the last +# allocated record, leaving it invalid. +RUN: truncate -s -40 %t/ac/v1.1/v3.actions +RUN: not llvm-cas --cas %t/ac --validate diff --git a/llvm/tools/llvm-cas/llvm-cas.cpp b/llvm/tools/llvm-cas/llvm-cas.cpp index b804edc07a783..40ccf5c1ec57a 100644 --- a/llvm/tools/llvm-cas/llvm-cas.cpp +++ b/llvm/tools/llvm-cas/llvm-cas.cpp @@ -13,6 +13,7 @@ #include "llvm/CAS/HierarchicalTreeBuilder.h" #include "llvm/CAS/ObjectStore.h" #include "llvm/CAS/TreeSchema.h" +#include "llvm/CAS/UnifiedOnDiskCache.h" #include "llvm/CAS/Utils.h" #include "llvm/RemoteCachingService/RemoteCachingService.h" #include "llvm/Support/CommandLine.h" @@ -64,9 +65,14 @@ static int putCacheKey(ObjectStore &CAS, ActionCache &AC, ArrayRef Objects); static int getCacheResult(ObjectStore &CAS, ActionCache &AC, const CASID &ID); static int validateObject(ObjectStore &CAS, const CASID &ID); -static int validate(ObjectStore &CAS, bool CheckHash); +static int validate(ObjectStore &CAS, ActionCache &AC, bool CheckHash); +static int validateIfNeeded(StringRef Path, StringRef PluginPath, + ArrayRef PluginOpts, bool CheckHash, + bool Force, bool AllowRecovery, bool InProcess, + const char *Argv0); static int ingestCasIDFile(cas::ObjectStore &CAS, ArrayRef CASIDs); static int checkLockFiles(StringRef CASPath); +static int prune(cas::ObjectStore &CAS); int main(int Argc, char **Argv) { InitLLVM X(Argc, Argv); @@ -86,6 +92,11 @@ int main(int Argc, char **Argv) { cl::value_desc("path")); cl::opt CheckHash("check-hash", cl::desc("check all hashes during validation")); + cl::opt AllowRecovery("allow-recovery", + cl::desc("allow recovery of cas data")); + cl::opt Force("force", + cl::desc("force validation even if unnecessary")); + cl::opt InProcess("in-process", cl::desc("validate in-process")); enum CommandKind { Invalid, @@ -109,6 +120,8 @@ int main(int Argc, char **Argv) { CheckLockFiles, Validate, ValidateObject, + ValidateIfNeeded, + Prune, }; cl::opt Command( cl::desc("choose command action:"), @@ -137,7 +150,10 @@ int main(int Argc, char **Argv) { "Test file locking behaviour of on-disk CAS"), clEnumValN(Validate, "validate", "validate ObjectStore"), clEnumValN(ValidateObject, "validate-object", - "validate the object for CASID")), + "validate the object for CASID"), + clEnumValN(ValidateIfNeeded, "validate-if-needed", + "validate cas contents if needed"), + clEnumValN(Prune, "prune", "prune local cas storage")), cl::init(CommandKind::Invalid)); cl::ParseCommandLineOptions(Argc, Argv, "llvm-cas CAS tool\n"); @@ -155,6 +171,10 @@ int main(int Argc, char **Argv) { if (Command == CheckLockFiles) return checkLockFiles(CASPath); + if (Command == ValidateIfNeeded) + return validateIfNeeded(CASPath, CASPluginPath, CASPluginOpts, CheckHash, + Force, AllowRecovery, InProcess, Argv[0]); + std::shared_ptr CAS; std::shared_ptr AC; std::optional CASFilePath; @@ -184,7 +204,7 @@ int main(int Argc, char **Argv) { return dump(*CAS); if (Command == Validate) - return validate(*CAS, CheckHash); + return validate(*CAS, *AC, CheckHash); if (Command == MakeBlob) return makeBlob(*CAS, DataPath); @@ -210,6 +230,9 @@ int main(int Argc, char **Argv) { if (Command == MergeTrees) return mergeTrees(*CAS, Inputs); + if (Command == Prune) + return prune(*CAS); + if (Inputs.empty()) ExitOnErr(createStringError(inconvertibleErrorCode(), "missing to operate on")); @@ -722,9 +745,49 @@ int validateObject(ObjectStore &CAS, const CASID &ID) { return 0; } -int validate(ObjectStore &CAS, bool CheckHash) { +int validate(ObjectStore &CAS, ActionCache &AC, bool CheckHash) { ExitOnError ExitOnErr("llvm-cas: validate: "); ExitOnErr(CAS.validate(CheckHash)); + ExitOnErr(AC.validate()); outs() << "validated successfully\n"; return 0; } + +int validateIfNeeded(StringRef Path, StringRef PluginPath, + ArrayRef PluginOpts, bool CheckHash, + bool Force, bool AllowRecovery, bool InProcess, + const char *Argv0) { + ExitOnError ExitOnErr("llvm-cas: validate-if-needed: "); + std::string ExecStorage; + std::optional Exec; + if (!InProcess) { + ExecStorage = sys::fs::getMainExecutable(Argv0, (void *)validateIfNeeded); + Exec = ExecStorage; + } + ValidationResult Result; + if (PluginPath.empty()) { + Result = ExitOnErr(validateOnDiskUnifiedCASDatabasesIfNeeded( + Path, CheckHash, AllowRecovery, Force, Exec)); + } else { + // FIXME: add a hook for plugin validation + Result = ValidationResult::Skipped; + } + switch (Result) { + case ValidationResult::Valid: + outs() << "validated successfully\n"; + break; + case ValidationResult::Recovered: + outs() << "recovered from invalid data\n"; + break; + case ValidationResult::Skipped: + outs() << "validation skipped\n"; + break; + } + return 0; +} + +static int prune(cas::ObjectStore &CAS) { + ExitOnError ExitOnErr("llvm-cas: prune: "); + ExitOnErr(CAS.pruneStorageData()); + return 0; +} \ No newline at end of file diff --git a/llvm/unittests/Support/ErrorTest.cpp b/llvm/unittests/Support/ErrorTest.cpp index bd098a4988dc5..b5b1c70d3e532 100644 --- a/llvm/unittests/Support/ErrorTest.cpp +++ b/llvm/unittests/Support/ErrorTest.cpp @@ -976,6 +976,17 @@ TEST(Error, FileErrorTest) { handleAllErrors(std::move(FE6), [](std::unique_ptr F) { EXPECT_EQ(F->messageWithoutFileInfo(), "CustomError {6}"); }); + + Error FE7 = + createFileError("file.bin", make_error_code(std::errc::invalid_argument), + "invalid argument"); + EXPECT_EQ(toString(std::move(FE7)), "'file.bin': invalid argument"); + + StringRef Argument = "arg"; + Error FE8 = + createFileError("file.bin", make_error_code(std::errc::invalid_argument), + "invalid argument '%s'", Argument.str().c_str()); + EXPECT_EQ(toString(std::move(FE8)), "'file.bin': invalid argument 'arg'"); } TEST(Error, FileErrorErrorCode) {