|
| 1 | +//===- UnifiedOnDiskCache.h -------------------------------------*- C++ -*-===// |
| 2 | +// |
| 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | +// See https://llvm.org/LICENSE.txt for license information. |
| 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | +// |
| 7 | +//===----------------------------------------------------------------------===// |
| 8 | + |
| 9 | +#ifndef LLVM_CAS_UNIFIEDONDISKCACHE_H |
| 10 | +#define LLVM_CAS_UNIFIEDONDISKCACHE_H |
| 11 | + |
| 12 | +#include "llvm/CAS/OnDiskGraphDB.h" |
| 13 | + |
| 14 | +namespace llvm::cas::ondisk { |
| 15 | + |
| 16 | +class OnDiskKeyValueDB; |
| 17 | + |
| 18 | +/// A unified CAS nodes and key-value database, using on-disk storage for both. |
| 19 | +/// It manages storage growth and provides APIs for garbage collection. |
| 20 | +/// |
| 21 | +/// High-level properties: |
| 22 | +/// * While \p UnifiedOnDiskCache is open on a directory, by any process, the |
| 23 | +/// storage size in that directory will keep growing unrestricted. For data to |
| 24 | +/// become eligible for garbase-collection there should be no open instances |
| 25 | +/// of \p UnifiedOnDiskCache for that directory, by any process. |
| 26 | +/// * Garbage-collection needs to be triggered explicitly by the client. It can |
| 27 | +/// be triggered on a directory concurrently, at any time and by any process, |
| 28 | +/// without affecting any active readers/writers, in the same process or other |
| 29 | +/// processes. |
| 30 | +/// |
| 31 | +/// Usage patterns should be that an instance of \p UnifiedOnDiskCache is open |
| 32 | +/// for a limited period of time, e.g. for the duration of a build operation. |
| 33 | +/// For long-living processes that need periodic access to a |
| 34 | +/// \p UnifiedOnDiskCache, the client should device a scheme where access is |
| 35 | +/// performed within some defined period. For example, if a service is designed |
| 36 | +/// to continuously wait for requests that access a \p UnifiedOnDiskCache, it |
| 37 | +/// could keep the instance alive while new requests are coming in but close it |
| 38 | +/// after a time period in which there are no new requests. |
| 39 | +class UnifiedOnDiskCache { |
| 40 | +public: |
| 41 | + /// The \p OnDiskGraphDB instance for the open directory. |
| 42 | + OnDiskGraphDB &getGraphDB() { return *PrimaryGraphDB; } |
| 43 | + |
| 44 | + /// Associate an \p ObjectID, of the \p OnDiskGraphDB instance, with a key. |
| 45 | + /// |
| 46 | + /// \param Key the hash bytes for the key. |
| 47 | + /// \param Value the \p ObjectID value. |
| 48 | + /// |
| 49 | + /// \returns the \p ObjectID associated with the \p Key. It may be different |
| 50 | + /// than \p Value if another value was already associated with this key. |
| 51 | + Expected<ObjectID> KVPut(ArrayRef<uint8_t> Key, ObjectID Value); |
| 52 | + |
| 53 | + /// Associate an \p ObjectID, of the \p OnDiskGraphDB instance, with a key. |
| 54 | + /// An \p ObjectID as a key is equivalent to its digest bytes. |
| 55 | + /// |
| 56 | + /// \param Key the \p ObjectID for the key. |
| 57 | + /// \param Value the \p ObjectID value. |
| 58 | + /// |
| 59 | + /// \returns the \p ObjectID associated with the \p Key. It may be different |
| 60 | + /// than \p Value if another value was already associated with this key. |
| 61 | + Expected<ObjectID> KVPut(ObjectID Key, ObjectID Value); |
| 62 | + |
| 63 | + /// \returns the \p ObjectID, of the \p OnDiskGraphDB instance, associated |
| 64 | + /// with the \p Key, or \p std::nullopt if the key does not exist. |
| 65 | + Expected<std::optional<ObjectID>> KVGet(ArrayRef<uint8_t> Key); |
| 66 | + |
| 67 | + /// Open a \p UnifiedOnDiskCache instance for a directory. |
| 68 | + /// |
| 69 | + /// \param Path directory for the on-disk database. The directory will be |
| 70 | + /// created if it doesn't exist. |
| 71 | + /// \param SizeLimit Optional size for limiting growth. This has an effect for |
| 72 | + /// when the instance is closed. |
| 73 | + /// \param HashName Identifier name for the hashing algorithm that is going to |
| 74 | + /// be used. |
| 75 | + /// \param HashByteSize Size for the object digest hash bytes. |
| 76 | + /// \param FaultInPolicy Controls how nodes are copied to primary store. This |
| 77 | + /// is recorded at creation time and subsequent opens need to pass the same |
| 78 | + /// policy otherwise the \p open will fail. |
| 79 | + static Expected<std::unique_ptr<UnifiedOnDiskCache>> |
| 80 | + open(StringRef Path, std::optional<uint64_t> SizeLimit, StringRef HashName, |
| 81 | + unsigned HashByteSize, |
| 82 | + OnDiskGraphDB::FaultInPolicy FaultInPolicy = |
| 83 | + OnDiskGraphDB::FaultInPolicy::FullTree); |
| 84 | + |
| 85 | + /// This is called implicitly at destruction time, so it is not required for a |
| 86 | + /// client to call this. After calling \p close the only method that is valid |
| 87 | + /// to call is \p needsGarbaseCollection. |
| 88 | + /// |
| 89 | + /// \param CheckSizeLimit if true it will check whether the primary store has |
| 90 | + /// exceeded its intended size limit. If false the check is skipped even if a |
| 91 | + /// \p SizeLimit was passed to the \p open call. |
| 92 | + Error close(bool CheckSizeLimit = true); |
| 93 | + |
| 94 | + /// \returns whether the primary store has exceeded the intended size limit. |
| 95 | + /// This can return false even if the overall size of the opened directory is |
| 96 | + /// over the \p SizeLimit passed to \p open. To know whether garbage |
| 97 | + /// collection needs to be triggered or not, call \p needsGarbaseCollection. |
| 98 | + bool hasExceededSizeLimit() const; |
| 99 | + |
| 100 | + /// \returns whether there are unused data that can be deleted using a |
| 101 | + /// \p collectGarbage call. |
| 102 | + bool needsGarbaseCollection() const { return NeedsGarbageCollection; } |
| 103 | + |
| 104 | + /// Remove any unused data from the directory at \p Path. If there are no such |
| 105 | + /// data the operation is a no-op. |
| 106 | + /// |
| 107 | + /// This can be called concurrently, regardless of whether there is an open |
| 108 | + /// \p UnifiedOnDiskCache instance or not; it has no effect on readers/writers |
| 109 | + /// in the same process or other processes. |
| 110 | + /// |
| 111 | + /// It is recommended that garbage-collection is triggered concurrently in the |
| 112 | + /// background, so that it has minimal effect on the workload of the process. |
| 113 | + static Error collectGarbage(StringRef Path); |
| 114 | + |
| 115 | + ~UnifiedOnDiskCache(); |
| 116 | + |
| 117 | +private: |
| 118 | + UnifiedOnDiskCache(); |
| 119 | + |
| 120 | + Expected<std::optional<ObjectID>> |
| 121 | + faultInFromUpstreamKV(ArrayRef<uint8_t> Key); |
| 122 | + |
| 123 | + std::string RootPath; |
| 124 | + std::optional<uint64_t> SizeLimit; |
| 125 | + |
| 126 | + int LockFD = -1; |
| 127 | + |
| 128 | + std::atomic<bool> NeedsGarbageCollection; |
| 129 | + std::string PrimaryDBDir; |
| 130 | + |
| 131 | + OnDiskGraphDB *UpstreamGraphDB = nullptr; |
| 132 | + std::unique_ptr<OnDiskGraphDB> PrimaryGraphDB; |
| 133 | + |
| 134 | + std::unique_ptr<OnDiskKeyValueDB> UpstreamKVDB; |
| 135 | + std::unique_ptr<OnDiskKeyValueDB> PrimaryKVDB; |
| 136 | +}; |
| 137 | + |
| 138 | +} // namespace llvm::cas::ondisk |
| 139 | + |
| 140 | +#endif // LLVM_CAS_UNIFIEDONDISKCACHE_H |
0 commit comments