Skip to content
Merged
2 changes: 2 additions & 0 deletions clang/include/clang/CAS/CASOptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ class CASOptions : public CASConfiguration {
/// default on-disk CAS, otherwise this is a noop.
void ensurePersistentCAS();

void getResolvedCASPath(llvm::SmallVectorImpl<char> &Result) const;

private:
/// Initialize Cached CAS and ActionCache.
llvm::Error initCache() const;
Expand Down
9 changes: 9 additions & 0 deletions clang/lib/CAS/CASOptions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ llvm::Error CASOptions::initCache() const {
}

SmallString<256> PathBuf;
getResolvedCASPath(PathBuf);
if (CASPath == "auto") {
getDefaultOnDiskCASPath(PathBuf);
CASPath = PathBuf;
Expand All @@ -119,3 +120,11 @@ llvm::Error CASOptions::initCache() const {
std::tie(Cache.CAS, Cache.AC) = std::move(DBs);
return llvm::Error::success();
}

void CASOptions::getResolvedCASPath(SmallVectorImpl<char> &Result) const {
if (CASPath == "auto") {
getDefaultOnDiskCASPath(Result);
} else {
Result.assign(CASPath.begin(), CASPath.end());
}
}
18 changes: 18 additions & 0 deletions clang/test/CAS/depscan-cas-log.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// Ensure both the first clang process and the daemon have logging enabled.
// It's hard to check this exhaustively, but in practice if the daemon does not
// enable logging there are currently zero records in the log.

// RUN: rm -rf %t && mkdir %t
// RUN: env LLVM_CACHE_CAS_PATH=%t/cas LLVM_CAS_LOG=1 LLVM_CAS_DISABLE_VALIDATION=1 %clang \
// RUN: -cc1depscan -fdepscan=daemon -fdepscan-include-tree -o - \
// RUN: -cc1-args -cc1 -triple x86_64-apple-macosx11.0.0 -emit-obj %s -o %t/t.o -fcas-path %t/cas
// RUN: FileCheck %s --input-file %t/cas/v1.log

// CHECK: [[PID1:[0-9]*]] {{[0-9]*}}: mmap '{{.*}}v8.index'
// CHECK: [[PID1]] {{[0-9]*}}: create subtrie

// CHECK: [[PID2:[0-9]*]] {{[0-9]*}}: mmap '{{.*}}v8.index'
// Even a minimal compilation involves at least 9 records for the cache key.
// CHECK-COUNT-9: [[PID2]] {{[0-9]*}}: create record

// CHECK: [[PID1]] {{[0-9]*}}: close mmap '{{.*}}v8.index'
18 changes: 18 additions & 0 deletions clang/test/CAS/validate-once.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// RUN: rm -rf %t

// RUN: llvm-cas --cas %t/cas --ingest %s
// RUN: mv %t/cas/v1.1/v8.data %t/cas/v1.1/v8.data.bak

// RUN: %clang -cc1depscand -execute %{clang-daemon-dir}/%basename_t -cas-args -fcas-path %t/cas -- \
// RUN: %clang -target x86_64-apple-macos11 -I %S/Inputs \
// RUN: -Xclang -fcas-path -Xclang %t/cas \
// RUN: -fdepscan=daemon -fdepscan-daemon=%{clang-daemon-dir}/%basename_t -fsyntax-only -x c %s

// RUN: ls %t/cas/corrupt.0.v1.1

// RUN: llvm-cas --cas %t/cas --validate-if-needed | FileCheck %s -check-prefix=SKIPPED
// SKIPPED: validation skipped

#include "test.h"

int func(void);
13 changes: 12 additions & 1 deletion clang/tools/driver/cc1depscanProtocol.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "llvm/Support/Process.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/StringSaver.h"
#include <cstdlib>

#if LLVM_ON_UNIX
#include <sys/socket.h> // FIXME: Unix-only. Not portable.
Expand Down Expand Up @@ -186,10 +187,20 @@ Expected<ScanDaemon> ScanDaemon::launchDaemon(StringRef BasePath,
return llvm::errorCodeToError(std::error_code(EC, std::generic_category()));
#endif

static constexpr const char *PassThroughEnv[] = {
"LLVM_CAS_LOG",
"LLVM_CAS_DISABLE_VALIDATION",
};
SmallVector<const char *> EnvP;
for (const char *Name : PassThroughEnv)
if (const char *Value = getenv(Name))
EnvP.push_back(Saver.save(llvm::Twine(Name) + "=" + Value).data());
EnvP.push_back(nullptr);

::pid_t Pid;
int EC = ::posix_spawn(&Pid, Args[0], /*file_actions=*/nullptr, &Attrs,
const_cast<char **>(LaunchArgs.data()),
/*envp=*/nullptr);
const_cast<char **>(EnvP.data()));
if (EC)
return llvm::errorCodeToError(std::error_code(EC, std::generic_category()));

Expand Down
82 changes: 63 additions & 19 deletions clang/tools/driver/cc1depscan_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,11 @@
#include "clang/Tooling/DependencyScanning/ScanAndUpdateArgs.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Bitstream/BitstreamReader.h"
#include "llvm/CAS/ActionCache.h"
#include "llvm/CAS/BuiltinUnifiedCASDatabases.h"
#include "llvm/CAS/CASProvidingFileSystem.h"
#include "llvm/CAS/CachingOnDiskFileSystem.h"
#include "llvm/CAS/HierarchicalTreeBuilder.h"
Expand All @@ -41,6 +43,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/PrefixMapper.h"
Expand All @@ -52,6 +55,7 @@
#include "llvm/Support/raw_ostream.h"
#include <cstdio>
#include <mutex>
#include <optional>
#include <shared_mutex>

#if LLVM_ON_UNIX
Expand Down Expand Up @@ -631,8 +635,8 @@ namespace {
struct ScanServer {
const char *Argv0 = nullptr;
SmallString<128> BasePath;
/// List of cas options.
ArrayRef<const char *> CASArgs;
CASOptions CASOpts;
bool ProduceIncludeTree = true;
int PidFD = -1;
int ListenSocket = -1;
/// \p std::nullopt means it runs indefinitely.
Expand All @@ -641,7 +645,7 @@ struct ScanServer {

~ScanServer() { shutdown(); }

void start(bool Exclusive);
void start(bool Exclusive, ArrayRef<const char *> CASArgs);
int listen();

/// Tear down the socket and bind file immediately but wait till all existing
Expand Down Expand Up @@ -706,13 +710,13 @@ int cc1depscand_main(ArrayRef<const char *> Argv, const char *Argv0,
// particular "build session", to shutdown, then have it stay alive until the
// session is finished.
bool LongRunning = false;

ArrayRef<const char *> CASArgs;
for (const auto *A = Argv.begin() + 2; A != Argv.end(); ++A) {
StringRef Arg(*A);
if (Arg == "-long-running")
LongRunning = true;
else if (Arg == "-cas-args") {
Server.CASArgs = ArrayRef(A + 1, Argv.end());
CASArgs = ArrayRef(A + 1, Argv.end());
break;
}
}
Expand All @@ -723,7 +727,7 @@ int cc1depscand_main(ArrayRef<const char *> Argv, const char *Argv0,
reportError(Twine("cannot create basedir: ") + EC.message());

if (Command == "-serve") {
Server.start(/*Exclusive*/ true);
Server.start(/*Exclusive*/ true, CASArgs);
return Server.listen();

} else if (Command == "-execute") {
Expand All @@ -734,7 +738,7 @@ int cc1depscand_main(ArrayRef<const char *> Argv, const char *Argv0,
}

// Make sure to start the server before executing the command.
Server.start(/*Exclusive*/ true);
Server.start(/*Exclusive*/ true, CASArgs);
std::thread ServerThread([&Server]() { Server.listen(); });

setenv("CLANG_CACHE_SCAN_DAEMON_SOCKET_PATH", Server.BasePath.c_str(),
Expand Down Expand Up @@ -785,11 +789,61 @@ int cc1depscand_main(ArrayRef<const char *> Argv, const char *Argv0,
openAndReplaceFD(1, LogOutPath);
openAndReplaceFD(2, LogErrPath);

Server.start(/*Exclusive*/ false);
Server.start(/*Exclusive*/ false, CASArgs);
return Server.listen();
}

void ScanServer::start(bool Exclusive) {
static std::optional<StringRef>
findLLVMCasBinary(const char *Argv0, llvm::SmallVectorImpl<char> &Storage) {
using namespace llvm::sys;
std::string Path = fs::getMainExecutable(Argv0, (void *)cc1depscan_main);
Storage.assign(Path.begin(), Path.end());
path::remove_filename(Storage);
path::append(Storage, "llvm-cas");
StringRef PathStr(Storage.data(), Storage.size());
if (fs::exists(PathStr))
return PathStr;
// Look for a corresponding usr/local/bin/llvm-cas
PathStr = path::parent_path(PathStr);
if (path::filename(PathStr) != "bin")
return std::nullopt;
PathStr = path::parent_path(PathStr);
Storage.truncate(PathStr.size());
path::append(Storage, "local", "bin", "llvm-cas");
PathStr = StringRef{Storage.data(), Storage.size()};
if (fs::exists(PathStr))
return PathStr;
return std::nullopt;
}

void ScanServer::start(bool Exclusive, ArrayRef<const char *> CASArgs) {
// Parse CAS options and validate if needed.
DiagnosticsEngine Diags(new DiagnosticIDs(), new DiagnosticOptions());

const OptTable &Opts = clang::driver::getDriverOptTable();
unsigned MissingArgIndex, MissingArgCount;
auto ParsedCASArgs =
Opts.ParseArgs(CASArgs, MissingArgIndex, MissingArgCount);
CompilerInvocation::ParseCASArgs(CASOpts, ParsedCASArgs, Diags);
CASOpts.ensurePersistentCAS();
ProduceIncludeTree =
ParsedCASArgs.hasArg(driver::options::OPT_fdepscan_include_tree);

static std::once_flag ValidateOnce;
std::call_once(ValidateOnce, [&] {
if (getenv("LLVM_CAS_DISABLE_VALIDATION"))
return;
if (CASOpts.CASPath.empty() || !CASOpts.PluginPath.empty())
return;
SmallString<64> LLVMCasStorage;
SmallString<64> CASPath;
CASOpts.getResolvedCASPath(CASPath);
ExitOnErr(llvm::cas::validateOnDiskUnifiedCASDatabasesIfNeeded(
CASPath, /*CheckHash=*/true,
/*AllowRecovery=*/true,
/*Force=*/false, findLLVMCasBinary(Argv0, LLVMCasStorage)));
});

// Check the pidfile.
SmallString<128> PidPath;
(BasePath + ".pid").toVector(PidPath);
Expand Down Expand Up @@ -828,16 +882,6 @@ int ScanServer::listen() {
llvm::DefaultThreadPool Pool;

DiagnosticsEngine Diags(new DiagnosticIDs(), new DiagnosticOptions());
CASOptions CASOpts;
const OptTable &Opts = clang::driver::getDriverOptTable();
unsigned MissingArgIndex, MissingArgCount;
auto ParsedCASArgs =
Opts.ParseArgs(CASArgs, MissingArgIndex, MissingArgCount);
CompilerInvocation::ParseCASArgs(CASOpts, ParsedCASArgs, Diags);
CASOpts.ensurePersistentCAS();
bool ProduceIncludeTree =
ParsedCASArgs.hasArg(driver::options::OPT_fdepscan_include_tree);

std::shared_ptr<llvm::cas::ObjectStore> CAS;
std::shared_ptr<llvm::cas::ActionCache> Cache;
std::tie(CAS, Cache) = CASOpts.getOrCreateDatabases(Diags);
Expand Down
3 changes: 3 additions & 0 deletions llvm/include/llvm/CAS/ActionCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@ class ActionCache {
Globally, std::move(Callback), CancelObj);
}

/// Validate the ActionCache contents.
virtual Error validate() const = 0;

virtual ~ActionCache() = default;

protected:
Expand Down
33 changes: 33 additions & 0 deletions llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,39 @@ class ObjectStore;
Expected<std::pair<std::unique_ptr<ObjectStore>, std::unique_ptr<ActionCache>>>
createOnDiskUnifiedCASDatabases(StringRef Path);

/// Represents the result of validating the contents using
/// \c validateOnDiskUnifiedCASDatabasesIfNeeded.
///
/// Note: invalid results are handled as an \c Error.
enum class ValidationResult {
/// The data is already valid.
Valid,
/// The data was invalid, but was recovered.
Recovered,
/// Validation was skipped, as it was not needed.
Skipped,
};

/// Validate the data in \p Path, if needed to ensure correctness.
///
/// \param Path directory for the on-disk database.
/// \param CheckHash Whether to validate hashes match the data.
/// \param AllowRecovery Whether to automatically recover from invalid data by
/// marking the files for garbage collection.
/// \param ForceValidation Whether to force validation to occur even if it
/// should not be necessary.
/// \param LLVMCasBinary If provided, validation is performed out-of-process
/// using the given \c llvm-cas executable which protects against crashes
/// during validation. Otherwise validation is performed in-process.
///
/// \returns \c Valid if the data is already valid, \c Recovered if data
/// was invalid but has been cleared, \c Skipped if validation is not needed,
/// or an \c Error if validation cannot be performed or if the data is left
/// in an invalid state because \p AllowRecovery is false.
Expected<ValidationResult> validateOnDiskUnifiedCASDatabasesIfNeeded(
StringRef Path, bool CheckHash, bool AllowRecovery, bool ForceValidation,
std::optional<StringRef> LLVMCasBinary);

} // namespace llvm::cas

#endif // LLVM_CAS_BUILTINUNIFIEDCASDATABASES_H
5 changes: 5 additions & 0 deletions llvm/include/llvm/CAS/OnDiskCASLogger.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,11 @@ class OnDiskCASLogger {
void log_MappedFileRegionBumpPtr_allocate(void *Region, TrieOffset Off,
size_t Size);
void log_UnifiedOnDiskCache_collectGarbage(StringRef Path);
void log_UnifiedOnDiskCache_validateIfNeeded(
StringRef Path, uint64_t BootTime, uint64_t ValidationTime,
bool CheckHash, bool AllowRecovery, bool Force,
std::optional<StringRef> LLVMCas, StringRef ValidationError, bool Skipped,
bool Recovered);
void log_TempFile_create(StringRef Name);
void log_TempFile_keep(StringRef TmpName, StringRef Name, std::error_code EC);
void log_TempFile_remove(StringRef TmpName, std::error_code EC);
Expand Down
3 changes: 3 additions & 0 deletions llvm/include/llvm/CAS/OnDiskKeyValueDB.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ class OnDiskKeyValueDB {
StringRef ValueName, size_t ValueSize,
std::shared_ptr<OnDiskCASLogger> Logger = nullptr);

using CheckValueT = function_ref<Error(FileOffset Offset, ArrayRef<char>)>;
Error validate(CheckValueT CheckValue) const;

private:
OnDiskKeyValueDB(size_t ValueSize, OnDiskHashMappedTrie Cache)
: ValueSize(ValueSize), Cache(std::move(Cache)) {}
Expand Down
31 changes: 31 additions & 0 deletions llvm/include/llvm/CAS/UnifiedOnDiskCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#ifndef LLVM_CAS_UNIFIEDONDISKCACHE_H
#define LLVM_CAS_UNIFIEDONDISKCACHE_H

#include "llvm/CAS/BuiltinUnifiedCASDatabases.h"
#include "llvm/CAS/OnDiskGraphDB.h"

namespace llvm::cas::ondisk {
Expand Down Expand Up @@ -82,6 +83,34 @@ class UnifiedOnDiskCache {
OnDiskGraphDB::FaultInPolicy FaultInPolicy =
OnDiskGraphDB::FaultInPolicy::FullTree);

/// Validate the data in \p Path, if needed to ensure correctness.
///
/// Note: if invalid data is detected and \p AllowRecovery is true, then
/// recovery requires exclusive access to the CAS and it is an error to
/// attempt recovery if there is concurrent use of the CAS.
///
/// \param Path directory for the on-disk database.
/// \param HashName Identifier name for the hashing algorithm that is going to
/// be used.
/// \param HashByteSize Size for the object digest hash bytes.
/// \param CheckHash Whether to validate hashes match the data.
/// \param AllowRecovery Whether to automatically recover from invalid data by
/// marking the files for garbage collection.
/// \param ForceValidation Whether to force validation to occur even if it
/// should not be necessary.
/// \param LLVMCasBinary If provided, validation is performed out-of-process
/// using the given \c llvm-cas executable which protects against crashes
/// during validation. Otherwise validation is performed in-process.
///
/// \returns \c Valid if the data is already valid, \c Recovered if data
/// was invalid but has been cleared, \c Skipped if validation is not needed,
/// or an \c Error if validation cannot be performed or if the data is left
/// in an invalid state because \p AllowRecovery is false.
static Expected<ValidationResult>
validateIfNeeded(StringRef Path, StringRef HashName, unsigned HashByteSize,
bool CheckHash, bool AllowRecovery, bool ForceValidation,
std::optional<StringRef> LLVMCasBinary);

/// This is called implicitly at destruction time, so it is not required for a
/// client to call this. After calling \p close the only method that is valid
/// to call is \p needsGarbaseCollection.
Expand Down Expand Up @@ -124,6 +153,8 @@ class UnifiedOnDiskCache {

~UnifiedOnDiskCache();

Error validateActionCache();

private:
UnifiedOnDiskCache();

Expand Down
Loading