Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions llvm/include/llvm/CAS/ActionCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ class ActionCache {
CanBeDistributed);
}

/// Validate the ActionCache contents.
virtual Error validate() const = 0;

virtual ~ActionCache() = default;

protected:
Expand All @@ -97,6 +100,9 @@ class ActionCache {
/// Create an action cache in memory.
std::unique_ptr<ActionCache> createInMemoryActionCache();

/// Create an action cache on disk.
Expected<std::unique_ptr<ActionCache>> createOnDiskActionCache(StringRef Path);

} // end namespace llvm::cas

#endif // LLVM_CAS_ACTIONCACHE_H
59 changes: 59 additions & 0 deletions llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_CAS_BUILTINUNIFIEDCASDATABASES_H
#define LLVM_CAS_BUILTINUNIFIEDCASDATABASES_H

#include "llvm/Support/Error.h"

namespace llvm::cas {

class ActionCache;
class ObjectStore;

/// Create on-disk \c ObjectStore and \c ActionCache instances based on
/// \c ondisk::UnifiedOnDiskCache, with built-in hashing.
Expected<std::pair<std::unique_ptr<ObjectStore>, std::unique_ptr<ActionCache>>>
createOnDiskUnifiedCASDatabases(StringRef Path);

/// Represents the result of validating the contents using
/// \c validateOnDiskUnifiedCASDatabasesIfNeeded.
///
/// Note: invalid results are handled as an \c Error.
enum class ValidationResult {
/// The data is already valid.
Valid,
/// The data was invalid, but was recovered.
Recovered,
/// Validation was skipped, as it was not needed.
Skipped,
};

/// Validate the data in \p Path, if needed to ensure correctness.
///
/// \param Path directory for the on-disk database.
/// \param CheckHash Whether to validate hashes match the data.
/// \param AllowRecovery Whether to automatically recover from invalid data by
/// marking the files for garbage collection.
/// \param ForceValidation Whether to force validation to occur even if it
/// should not be necessary.
/// \param LLVMCasBinary If provided, validation is performed out-of-process
/// using the given \c llvm-cas executable which protects against crashes
/// during validation. Otherwise validation is performed in-process.
///
/// \returns \c Valid if the data is already valid, \c Recovered if data
/// was invalid but has been cleared, \c Skipped if validation is not needed,
/// or an \c Error if validation cannot be performed or if the data is left
/// in an invalid state because \p AllowRecovery is false.
Expected<ValidationResult> validateOnDiskUnifiedCASDatabasesIfNeeded(
StringRef Path, bool CheckHash, bool AllowRecovery, bool ForceValidation,
std::optional<StringRef> LLVMCasBinary);

} // namespace llvm::cas

#endif // LLVM_CAS_BUILTINUNIFIEDCASDATABASES_H
47 changes: 46 additions & 1 deletion llvm/include/llvm/CAS/ObjectStore.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file contains the declaration of the ObjectStore class.
///
//===----------------------------------------------------------------------===//

#ifndef LLVM_CAS_OBJECTSTORE_H
#define LLVM_CAS_OBJECTSTORE_H
Expand Down Expand Up @@ -111,7 +116,10 @@ class ObjectStore {
virtual Expected<bool> isMaterialized(ObjectRef Ref) const = 0;

/// Validate the underlying object referred by CASID.
virtual Error validate(const CASID &ID) = 0;
virtual Error validateObject(const CASID &ID) = 0;

/// Validate the entire ObjectStore.
virtual Error validate(bool CheckHash) const = 0;

protected:
/// Load the object referenced by \p Ref.
Expand Down Expand Up @@ -215,9 +223,39 @@ class ObjectStore {
return Data.size();
}

/// Set the size for limiting growth of on-disk storage. This has an effect
/// for when the instance is closed.
///
/// Implementations may be not have this implemented.
virtual Error setSizeLimit(std::optional<uint64_t> SizeLimit) {
return Error::success();
}

/// \returns the storage size of the on-disk CAS data.
///
/// Implementations that don't have an implementation for this should return
/// \p std::nullopt.
virtual Expected<std::optional<uint64_t>> getStorageSize() const {
return std::nullopt;
}

/// Prune local storage to reduce its size according to the desired size
/// limit. Pruning can happen concurrently with other operations.
///
/// Implementations may be not have this implemented.
virtual Error pruneStorageData() { return Error::success(); }

/// Validate the whole node tree.
Error validateTree(ObjectRef Ref);

/// Import object from another CAS. This will import the full tree from the
/// other CAS.
Expected<ObjectRef> importObject(ObjectStore &Upstream, ObjectRef Other);

/// Print the ObjectStore internals for debugging purpose.
virtual void print(raw_ostream &) const {}
void dump() const;

/// Get CASContext
const CASContext &getContext() const { return Context; }

Expand Down Expand Up @@ -290,8 +328,15 @@ class ObjectProxy {
ObjectHandle H;
};

/// Create an in memory CAS.
std::unique_ptr<ObjectStore> createInMemoryCAS();

/// \returns true if \c LLVM_ENABLE_ONDISK_CAS configuration was enabled.
bool isOnDiskCASEnabled();

/// Create a persistent on-disk path at \p Path.
Expected<std::unique_ptr<ObjectStore>> createOnDiskCAS(const Twine &Path);

} // namespace cas
} // namespace llvm

Expand Down
191 changes: 191 additions & 0 deletions llvm/include/llvm/CAS/UnifiedOnDiskCache.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_CAS_UNIFIEDONDISKCACHE_H
#define LLVM_CAS_UNIFIEDONDISKCACHE_H

#include "llvm/CAS/BuiltinUnifiedCASDatabases.h"
#include "llvm/CAS/OnDiskGraphDB.h"
#include <atomic>

namespace llvm::cas::ondisk {

class OnDiskKeyValueDB;

/// A unified CAS nodes and key-value database, using on-disk storage for both.
/// It manages storage growth and provides APIs for garbage collection.
///
/// High-level properties:
/// * While \p UnifiedOnDiskCache is open on a directory, by any process, the
/// storage size in that directory will keep growing unrestricted. For data to
/// become eligible for garbage-collection there should be no open instances
/// of \p UnifiedOnDiskCache for that directory, by any process.
/// * Garbage-collection needs to be triggered explicitly by the client. It can
/// be triggered on a directory concurrently, at any time and by any process,
/// without affecting any active readers/writers, in the same process or other
/// processes.
///
/// Usage patterns should be that an instance of \p UnifiedOnDiskCache is open
/// for a limited period of time, e.g. for the duration of a build operation.
/// For long-living processes that need periodic access to a
/// \p UnifiedOnDiskCache, the client should device a scheme where access is
/// performed within some defined period. For example, if a service is designed
/// to continuously wait for requests that access a \p UnifiedOnDiskCache, it
/// could keep the instance alive while new requests are coming in but close it
/// after a time period in which there are no new requests.
class UnifiedOnDiskCache {
public:
/// The \p OnDiskGraphDB instance for the open directory.
OnDiskGraphDB &getGraphDB() { return *PrimaryGraphDB; }

/// Associate an \p ObjectID, of the \p OnDiskGraphDB instance, with a key.
///
/// \param Key the hash bytes for the key.
/// \param Value the \p ObjectID value.
///
/// \returns the \p ObjectID associated with the \p Key. It may be different
/// than \p Value if another value was already associated with this key.
Expected<ObjectID> KVPut(ArrayRef<uint8_t> Key, ObjectID Value);

/// Associate an \p ObjectID, of the \p OnDiskGraphDB instance, with a key.
/// An \p ObjectID as a key is equivalent to its digest bytes.
///
/// \param Key the \p ObjectID for the key.
/// \param Value the \p ObjectID value.
///
/// \returns the \p ObjectID associated with the \p Key. It may be different
/// than \p Value if another value was already associated with this key.
Expected<ObjectID> KVPut(ObjectID Key, ObjectID Value);

/// \returns the \p ObjectID, of the \p OnDiskGraphDB instance, associated
/// with the \p Key, or \p std::nullopt if the key does not exist.
Expected<std::optional<ObjectID>> KVGet(ArrayRef<uint8_t> Key);

/// Open a \p UnifiedOnDiskCache instance for a directory.
///
/// \param Path directory for the on-disk database. The directory will be
/// created if it doesn't exist.
/// \param SizeLimit Optional size for limiting growth. This has an effect for
/// when the instance is closed.
/// \param HashName Identifier name for the hashing algorithm that is going to
/// be used.
/// \param HashByteSize Size for the object digest hash bytes.
/// \param FaultInPolicy Controls how nodes are copied to primary store. This
/// is recorded at creation time and subsequent opens need to pass the same
/// policy otherwise the \p open will fail.
static Expected<std::unique_ptr<UnifiedOnDiskCache>>
open(StringRef Path, std::optional<uint64_t> SizeLimit, StringRef HashName,
unsigned HashByteSize,
OnDiskGraphDB::FaultInPolicy FaultInPolicy =
OnDiskGraphDB::FaultInPolicy::FullTree);

/// Validate the data in \p Path, if needed to ensure correctness.
///
/// Note: if invalid data is detected and \p AllowRecovery is true, then
/// recovery requires exclusive access to the CAS and it is an error to
/// attempt recovery if there is concurrent use of the CAS.
///
/// \param Path directory for the on-disk database.
/// \param HashName Identifier name for the hashing algorithm that is going to
/// be used.
/// \param HashByteSize Size for the object digest hash bytes.
/// \param CheckHash Whether to validate hashes match the data.
/// \param AllowRecovery Whether to automatically recover from invalid data by
/// marking the files for garbage collection.
/// \param ForceValidation Whether to force validation to occur even if it
/// should not be necessary.
/// \param LLVMCasBinary If provided, validation is performed out-of-process
/// using the given \c llvm-cas executable which protects against crashes
/// during validation. Otherwise validation is performed in-process.
///
/// \returns \c Valid if the data is already valid, \c Recovered if data
/// was invalid but has been cleared, \c Skipped if validation is not needed,
/// or an \c Error if validation cannot be performed or if the data is left
/// in an invalid state because \p AllowRecovery is false.
static Expected<ValidationResult>
validateIfNeeded(StringRef Path, StringRef HashName, unsigned HashByteSize,
bool CheckHash, bool AllowRecovery, bool ForceValidation,
std::optional<StringRef> LLVMCasBinary);

/// This is called implicitly at destruction time, so it is not required for a
/// client to call this. After calling \p close the only method that is valid
/// to call is \p needsGarbageCollection.
///
/// \param CheckSizeLimit if true it will check whether the primary store has
/// exceeded its intended size limit. If false the check is skipped even if a
/// \p SizeLimit was passed to the \p open call.
Error close(bool CheckSizeLimit = true);

/// Set the size for limiting growth. This has an effect for when the instance
/// is closed.
void setSizeLimit(std::optional<uint64_t> SizeLimit);

/// \returns the storage size of the cache data.
uint64_t getStorageSize() const;

/// \returns whether the primary store has exceeded the intended size limit.
/// This can return false even if the overall size of the opened directory is
/// over the \p SizeLimit passed to \p open. To know whether garbage
/// collection needs to be triggered or not, call \p needsGarbaseCollection.
bool hasExceededSizeLimit() const;

/// \returns whether there are unused data that can be deleted using a
/// \p collectGarbage call.
bool needsGarbageCollection() const { return NeedsGarbageCollection; }

/// Remove any unused data from the directory at \p Path. If there are no such
/// data the operation is a no-op.
///
/// This can be called concurrently, regardless of whether there is an open
/// \p UnifiedOnDiskCache instance or not; it has no effect on readers/writers
/// in the same process or other processes.
///
/// It is recommended that garbage-collection is triggered concurrently in the
/// background, so that it has minimal effect on the workload of the process.
static Error collectGarbage(StringRef Path);

/// Remove unused data from the current UnifiedOnDiskCache.
Error collectGarbage();

/// Validate the key value databases.
Error validateActionCache();

/// Get the upstream OnDiskGraphDB if exists.
///
/// \returns upstream database or nullptr if upstream database doesn't exist.
OnDiskGraphDB *getUpstreamGraphDB() const { return UpstreamGraphDB; }

~UnifiedOnDiskCache();

private:
UnifiedOnDiskCache();

Expected<std::optional<ObjectID>>
faultInFromUpstreamKV(ArrayRef<uint8_t> Key);

/// \returns the storage size of the primary directory.
uint64_t getPrimaryStorageSize() const;

std::string RootPath;
std::atomic<uint64_t> SizeLimit;

int LockFD = -1;

std::atomic<bool> NeedsGarbageCollection;
std::string PrimaryDBDir;

OnDiskGraphDB *UpstreamGraphDB = nullptr;
std::unique_ptr<OnDiskGraphDB> PrimaryGraphDB;

std::unique_ptr<OnDiskKeyValueDB> UpstreamKVDB;
std::unique_ptr<OnDiskKeyValueDB> PrimaryKVDB;
};

} // namespace llvm::cas::ondisk

#endif // LLVM_CAS_UNIFIEDONDISKCACHE_H
Loading