Skip to content

Commit 111f6c1

Browse files
committed
[CAS/OnDiskKeyValueDB] Factor out the underlying implementation of OnDiskActionCache
Introduce `OnDiskKeyValueDB` as an on-disk key-value database that is independent of a particular hashing algorithm and data values. `OnDiskActionCache` is implemented as a wrapper of `OnDiskKeyValueDB` with `BLAKE3` keys and values.
1 parent 72bc8c2 commit 111f6c1

File tree

6 files changed

+235
-54
lines changed

6 files changed

+235
-54
lines changed
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
//===- OnDiskKeyValueDB.h ---------------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_CAS_ONDISKKEYVALUEDB_H
10+
#define LLVM_CAS_ONDISKKEYVALUEDB_H
11+
12+
#include "llvm/CAS/OnDiskHashMappedTrie.h"
13+
14+
namespace llvm::cas::ondisk {
15+
16+
/// An on-disk key-value data store with the following properties:
17+
/// * Keys are fixed length binary hashes with expected normal distribution.
18+
/// * Values are buffers of the same size, specified at creation time.
19+
/// * The value of a key cannot be changed once it is set.
20+
/// * The value buffers returned from a key lookup have 8-byte alignment.
21+
class OnDiskKeyValueDB {
22+
public:
23+
/// Associate a value with a key.
24+
///
25+
/// \param Key the hash bytes for the key
26+
/// \param Value the value bytes, same size as \p ValueSize parameter of
27+
/// \p open call.
28+
///
29+
/// \returns the value associated with the \p Key. It may be different than
30+
/// \p Value if another value is already associated with this key.
31+
Expected<ArrayRef<char>> put(ArrayRef<uint8_t> Key, ArrayRef<char> Value);
32+
33+
/// \returns the value associated with the \p Key, or \p std::nullopt if the
34+
/// key does not exist.
35+
Expected<std::optional<ArrayRef<char>>> get(ArrayRef<uint8_t> Key);
36+
37+
/// \returns Total size of stored data.
38+
size_t getStorageSize() const {
39+
return Cache.size();
40+
}
41+
42+
/// Open the on-disk store from a directory.
43+
///
44+
/// \param Path directory for the on-disk store. The directory will be created
45+
/// if it doesn't exist.
46+
/// \param HashName Identifier name for the hashing algorithm that is going to
47+
/// be used.
48+
/// \param KeySize Size for the key hash bytes.
49+
/// \param ValueName Identifier name for the values.
50+
/// \param ValueSize Size for the value bytes.
51+
static Expected<std::unique_ptr<OnDiskKeyValueDB>>
52+
open(StringRef Path, StringRef HashName, unsigned KeySize,
53+
StringRef ValueName, size_t ValueSize);
54+
55+
private:
56+
OnDiskKeyValueDB(size_t ValueSize, OnDiskHashMappedTrie Cache)
57+
: ValueSize(ValueSize), Cache(std::move(Cache)) {}
58+
59+
const size_t ValueSize;
60+
OnDiskHashMappedTrie Cache;
61+
};
62+
63+
} // namespace llvm::cas::ondisk
64+
65+
#endif // LLVM_CAS_ONDISKKEYVALUEDB_H

llvm/lib/CAS/ActionCaches.cpp

Lines changed: 30 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "llvm/CAS/HashMappedTrie.h"
1212
#include "llvm/CAS/ObjectStore.h"
1313
#include "llvm/CAS/OnDiskHashMappedTrie.h"
14+
#include "llvm/CAS/OnDiskKeyValueDB.h"
1415
#include "llvm/Config/llvm-config.h"
1516
#include "llvm/Support/Alignment.h"
1617
#include "llvm/Support/BLAKE3.h"
@@ -61,19 +62,10 @@ class OnDiskActionCache final : public ActionCache {
6162

6263
private:
6364
static StringRef getHashName() { return "BLAKE3"; }
64-
static StringRef getActionCacheTableName() {
65-
static const std::string Name =
66-
("llvm.actioncache[" + getHashName() + "->" + getHashName() + "]")
67-
.str();
68-
return Name;
69-
}
70-
static constexpr StringLiteral ActionCacheFile = "actions";
71-
static constexpr StringLiteral FilePrefix = "v1.";
72-
73-
OnDiskActionCache(StringRef RootPath, OnDiskHashMappedTrie ActionCache);
74-
75-
std::string Path;
76-
OnDiskHashMappedTrie Cache;
65+
66+
OnDiskActionCache(std::unique_ptr<ondisk::OnDiskKeyValueDB> DB);
67+
68+
std::unique_ptr<ondisk::OnDiskKeyValueDB> DB;
7769
using DataT = CacheEntry<sizeof(HashType)>;
7870
};
7971
} // end namespace
@@ -137,62 +129,46 @@ std::unique_ptr<ActionCache> createInMemoryActionCache() {
137129
} // namespace cas
138130
} // namespace llvm
139131

140-
constexpr StringLiteral OnDiskActionCache::ActionCacheFile;
141-
constexpr StringLiteral OnDiskActionCache::FilePrefix;
142-
143-
OnDiskActionCache::OnDiskActionCache(StringRef Path, OnDiskHashMappedTrie Cache)
132+
OnDiskActionCache::OnDiskActionCache(
133+
std::unique_ptr<ondisk::OnDiskKeyValueDB> DB)
144134
: ActionCache(builtin::BuiltinCASContext::getDefaultContext()),
145-
Path(Path.str()), Cache(std::move(Cache)) {}
135+
DB(std::move(DB)) {}
146136

147137
Expected<std::unique_ptr<OnDiskActionCache>>
148138
OnDiskActionCache::create(StringRef AbsPath) {
149-
if (std::error_code EC = sys::fs::create_directories(AbsPath))
150-
return createFileError(AbsPath, EC);
151-
152-
SmallString<256> CachePath(AbsPath);
153-
sys::path::append(CachePath, FilePrefix + ActionCacheFile);
154-
constexpr uint64_t MB = 1024ull * 1024ull;
155-
constexpr uint64_t GB = 1024ull * 1024ull * 1024ull;
156-
157-
Optional<OnDiskHashMappedTrie> ActionCache;
158-
if (Error E = OnDiskHashMappedTrie::create(
159-
CachePath, getActionCacheTableName(), sizeof(HashType) * 8,
160-
/*DataSize=*/sizeof(DataT), /*MaxFileSize=*/GB,
161-
/*MinFileSize=*/MB)
162-
.moveInto(ActionCache))
139+
std::unique_ptr<ondisk::OnDiskKeyValueDB> DB;
140+
if (Error E = ondisk::OnDiskKeyValueDB::open(AbsPath, getHashName(),
141+
sizeof(HashType), getHashName(),
142+
sizeof(DataT))
143+
.moveInto(DB))
163144
return std::move(E);
164-
165145
return std::unique_ptr<OnDiskActionCache>(
166-
new OnDiskActionCache(AbsPath, std::move(*ActionCache)));
146+
new OnDiskActionCache(std::move(DB)));
167147
}
168148

169149
Expected<Optional<CASID>>
170150
OnDiskActionCache::getImpl(ArrayRef<uint8_t> Key) const {
171-
// Check the result cache.
172-
OnDiskHashMappedTrie::const_pointer ActionP = Cache.find(Key);
173-
if (!ActionP)
174-
return None;
175-
176-
const DataT *Output = reinterpret_cast<const DataT *>(ActionP->Data.data());
177-
return CASID::create(&getContext(), toStringRef(Output->getValue()));
151+
std::optional<ArrayRef<char>> Val;
152+
if (Error E = DB->get(Key).moveInto(Val))
153+
return std::move(E);
154+
if (!Val)
155+
return std::nullopt;
156+
return CASID::create(&getContext(), toStringRef(*Val));
178157
}
179158

180159
Error OnDiskActionCache::putImpl(ArrayRef<uint8_t> Key, const CASID &Result) {
181-
DataT Expected(Result.getHash());
182-
OnDiskHashMappedTrie::pointer ActionP = Cache.insertLazy(
183-
Key, [&](FileOffset TentativeOffset,
184-
OnDiskHashMappedTrie::ValueProxy TentativeValue) {
185-
assert(TentativeValue.Data.size() == sizeof(DataT));
186-
assert(isAddrAligned(Align::Of<DataT>(), TentativeValue.Data.data()));
187-
new (TentativeValue.Data.data()) DataT{Expected};
188-
});
189-
const DataT *Observed = reinterpret_cast<const DataT *>(ActionP->Data.data());
190-
191-
if (Expected.getValue() == Observed->getValue())
160+
auto ResultHash = Result.getHash();
161+
ArrayRef Expected((const char *)ResultHash.data(), ResultHash.size());
162+
ArrayRef<char> Observed;
163+
if (Error E = DB->put(Key, Expected).moveInto(Observed))
164+
return E;
165+
166+
if (Expected == Observed)
192167
return Error::success();
193168

194-
return createResultCachePoisonedError(hashToString(Key), getContext(), Result,
195-
Observed->getValue());
169+
return createResultCachePoisonedError(
170+
hashToString(Key), getContext(), Result,
171+
ArrayRef((const uint8_t *)Observed.data(), Observed.size()));
196172
}
197173

198174
#if LLVM_ENABLE_ONDISK_CAS

llvm/lib/CAS/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ add_llvm_component_library(LLVMCAS
2121
OnDiskCAS.cpp
2222
OnDiskGraphDB.cpp
2323
OnDiskHashMappedTrie.cpp
24+
OnDiskKeyValueDB.cpp
2425
TreeSchema.cpp
2526
Utils.cpp
2627

llvm/lib/CAS/OnDiskKeyValueDB.cpp

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
//===- OnDiskKeyValueDB.cpp -------------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "llvm/CAS/OnDiskKeyValueDB.h"
10+
#include "llvm/Support/Alignment.h"
11+
#include "llvm/Support/Errc.h"
12+
#include "llvm/Support/Path.h"
13+
14+
using namespace llvm;
15+
using namespace llvm::cas;
16+
using namespace llvm::cas::ondisk;
17+
18+
static constexpr StringLiteral ActionCacheFile = "actions";
19+
static constexpr StringLiteral FilePrefix = "v2.";
20+
21+
Expected<ArrayRef<char>> OnDiskKeyValueDB::put(ArrayRef<uint8_t> Key,
22+
ArrayRef<char> Value) {
23+
if (LLVM_UNLIKELY(Value.size() != ValueSize))
24+
return createStringError(errc::invalid_argument,
25+
"expected value size of " + itostr(ValueSize) +
26+
", got: " + itostr(Value.size()));
27+
assert(Value.size() == ValueSize);
28+
OnDiskHashMappedTrie::pointer ActionP = Cache.insertLazy(
29+
Key, [&](FileOffset TentativeOffset,
30+
OnDiskHashMappedTrie::ValueProxy TentativeValue) {
31+
assert(TentativeValue.Data.size() == ValueSize);
32+
llvm::copy(Value, TentativeValue.Data.data());
33+
});
34+
return ActionP->Data;
35+
}
36+
37+
Expected<std::optional<ArrayRef<char>>>
38+
OnDiskKeyValueDB::get(ArrayRef<uint8_t> Key) {
39+
// Check the result cache.
40+
OnDiskHashMappedTrie::const_pointer ActionP = Cache.find(Key);
41+
if (!ActionP)
42+
return std::nullopt;
43+
assert(isAddrAligned(Align(8), ActionP->Data.data()));
44+
return ActionP->Data;
45+
}
46+
47+
Expected<std::unique_ptr<OnDiskKeyValueDB>>
48+
OnDiskKeyValueDB::open(StringRef Path, StringRef HashName, unsigned KeySize,
49+
StringRef ValueName, size_t ValueSize) {
50+
if (std::error_code EC = sys::fs::create_directories(Path))
51+
return createFileError(Path, EC);
52+
53+
SmallString<256> CachePath(Path);
54+
sys::path::append(CachePath, FilePrefix + ActionCacheFile);
55+
constexpr uint64_t MB = 1024ull * 1024ull;
56+
constexpr uint64_t GB = 1024ull * 1024ull * 1024ull;
57+
58+
std::optional<OnDiskHashMappedTrie> ActionCache;
59+
if (Error E = OnDiskHashMappedTrie::create(
60+
CachePath,
61+
"llvm.actioncache[" + HashName + "->" + ValueName + "]",
62+
KeySize * 8,
63+
/*DataSize=*/ValueSize, /*MaxFileSize=*/GB,
64+
/*MinFileSize=*/MB)
65+
.moveInto(ActionCache))
66+
return std::move(E);
67+
68+
return std::unique_ptr<OnDiskKeyValueDB>(
69+
new OnDiskKeyValueDB(ValueSize, std::move(*ActionCache)));
70+
}

llvm/unittests/CAS/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ add_llvm_unittest(CASTests
2828
ObjectStoreTest.cpp
2929
OnDiskGraphDBTest.cpp
3030
OnDiskHashMappedTrieTest.cpp
31+
OnDiskKeyValueDBTest.cpp
3132
ThreadSafeAllocatorTest.cpp
3233
TreeSchemaTest.cpp
3334
)
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
//===- llvm/unittest/CAS/OnDiskKeyValueDBTest.cpp -------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "llvm/CAS/OnDiskKeyValueDB.h"
10+
#include "llvm/CAS/BuiltinObjectHasher.h"
11+
#include "llvm/Support/BLAKE3.h"
12+
#include "llvm/Testing/Support/Error.h"
13+
#include "llvm/Testing/Support/SupportHelpers.h"
14+
#include "gtest/gtest.h"
15+
16+
#if LLVM_ENABLE_ONDISK_CAS
17+
18+
using namespace llvm;
19+
using namespace llvm::cas;
20+
using namespace llvm::cas::ondisk;
21+
22+
using HasherT = BLAKE3;
23+
using HashType = decltype(HasherT::hash(std::declval<ArrayRef<uint8_t> &>()));
24+
using ValueType = std::array<char, 20>;
25+
26+
static HashType digest(StringRef Data) {
27+
return HasherT::hash(arrayRefFromStringRef(Data));
28+
}
29+
30+
static ValueType valueFromString(StringRef S) {
31+
ValueType Val;
32+
llvm::copy(S.substr(0, sizeof(Val)), Val.data());
33+
return Val;
34+
}
35+
36+
TEST(OnDiskKeyValueDBTest, Basic) {
37+
unittest::TempDir Temp("ondiskkv", /*Unique=*/true);
38+
std::unique_ptr<OnDiskKeyValueDB> DB;
39+
ASSERT_THAT_ERROR(OnDiskKeyValueDB::open(Temp.path(), "blake3",
40+
sizeof(HashType), "test",
41+
sizeof(ValueType))
42+
.moveInto(DB),
43+
Succeeded());
44+
45+
{
46+
std::optional<ArrayRef<char>> Val;
47+
ASSERT_THAT_ERROR(DB->get(digest("hello")).moveInto(Val), Succeeded());
48+
EXPECT_FALSE(Val.has_value());
49+
}
50+
51+
ValueType ValW = valueFromString("world");
52+
ArrayRef<char> Val;
53+
ASSERT_THAT_ERROR(DB->put(digest("hello"), ValW).moveInto(Val), Succeeded());
54+
EXPECT_EQ(Val, ArrayRef(ValW));
55+
ASSERT_THAT_ERROR(
56+
DB->put(digest("hello"), valueFromString("other")).moveInto(Val),
57+
Succeeded());
58+
EXPECT_EQ(Val, ArrayRef(ValW));
59+
60+
{
61+
std::optional<ArrayRef<char>> Val;
62+
ASSERT_THAT_ERROR(DB->get(digest("hello")).moveInto(Val), Succeeded());
63+
EXPECT_TRUE(Val.has_value());
64+
EXPECT_EQ(*Val, ArrayRef(ValW));
65+
}
66+
}
67+
68+
#endif // LLVM_ENABLE_ONDISK_CAS

0 commit comments

Comments
 (0)