Skip to content

Commit 72bc8c2

Browse files
committed
[OnDiskGraphDB] Provide capability to track and report the total storage size of the database
Part of this is extending `OnDiskDataAllocator` for optionally creating it with a "user header" buffer that a caller is free to use for their own purposes. `OnDiskGraphDB` uses that to keep track of the total size of standalone files.
1 parent dbdbb76 commit 72bc8c2

File tree

5 files changed

+116
-16
lines changed

5 files changed

+116
-16
lines changed

llvm/include/llvm/CAS/OnDiskGraphDB.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,12 @@ class OnDiskGraphDB {
291291
return make_range(Refs.begin(), Refs.end());
292292
}
293293

294+
/// \returns Total size of stored objects.
295+
///
296+
/// NOTE: There's a possibility that the returned size is not including a
297+
/// large object if the process crashed right at the point of inserting it.
298+
size_t getStorageSize() const;
299+
294300
void print(raw_ostream &OS) const;
295301

296302
/// How to fault-in nodes if an upstream database is used.
@@ -366,6 +372,11 @@ class OnDiskGraphDB {
366372

367373
InternalRefArrayRef getInternalRefs(ObjectHandle Node) const;
368374

375+
void recordStandaloneSizeIncrease(size_t SizeIncrease);
376+
377+
std::atomic<uint64_t> &getStandaloneStorageSize();
378+
uint64_t getStandaloneStorageSize() const;
379+
369380
OnDiskGraphDB(StringRef RootPath, OnDiskHashMappedTrie Index,
370381
OnDiskDataAllocator DataPool,
371382
std::unique_ptr<OnDiskGraphDB> UpstreamDB,

llvm/include/llvm/CAS/OnDiskHashMappedTrie.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,8 @@ class OnDiskHashMappedTrie {
271271
return insert(const_pointer(), Value);
272272
}
273273

274+
size_t size() const;
275+
274276
/// Gets or creates a file at \p Path with a hash-mapped trie named \p
275277
/// TrieName. The hash size is \p NumHashBits (in bits) and the records store
276278
/// data of size \p DataSize (in bytes).
@@ -354,9 +356,17 @@ class OnDiskDataAllocator {
354356
return save(ArrayRef<char>(Data.begin(), Data.size()));
355357
}
356358

359+
/// \returns the buffer that was allocated at \p create time, with size
360+
/// \p UserHeaderSize.
361+
MutableArrayRef<uint8_t> getUserHeader();
362+
363+
size_t size() const;
364+
357365
static Expected<OnDiskDataAllocator>
358366
create(const Twine &Path, const Twine &TableName, uint64_t MaxFileSize,
359-
Optional<uint64_t> NewFileInitialSize);
367+
Optional<uint64_t> NewFileInitialSize,
368+
uint32_t UserHeaderSize = 0,
369+
function_ref<void(void *)> UserHeaderInit = nullptr);
360370

361371
OnDiskDataAllocator(OnDiskDataAllocator &&RHS);
362372
OnDiskDataAllocator &operator=(OnDiskDataAllocator &&RHS);

llvm/lib/CAS/OnDiskGraphDB.cpp

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1181,8 +1181,10 @@ Error OnDiskGraphDB::createStandaloneLeaf(IndexProxy &I, ArrayRef<char> Data) {
11811181
TrieRecord::Data Existing;
11821182
{
11831183
TrieRecord::Data Leaf{SK, FileOffset()};
1184-
if (I.Ref.compare_exchange_strong(Existing, Leaf))
1184+
if (I.Ref.compare_exchange_strong(Existing, Leaf)) {
1185+
recordStandaloneSizeIncrease(FileSize);
11851186
return Error::success();
1187+
}
11861188
}
11871189

11881190
// If there was a race, confirm that the new value has valid storage.
@@ -1223,6 +1225,7 @@ Error OnDiskGraphDB::store(ObjectID ID, ArrayRef<ObjectID> Refs,
12231225
FileOffset PoolOffset;
12241226
SmallString<256> Path;
12251227
std::optional<MappedTempFile> File;
1228+
std::optional<uint64_t> FileSize;
12261229
auto Alloc = [&](size_t Size) -> Expected<char *> {
12271230
if (Size <= TrieRecord::MaxEmbeddedSize) {
12281231
SK = TrieRecord::StorageKind::DataPool;
@@ -1240,6 +1243,8 @@ Error OnDiskGraphDB::store(ObjectID ID, ArrayRef<ObjectID> Refs,
12401243
getStandalonePath(TrieRecord::getStandaloneFileSuffix(SK), I, Path);
12411244
if (Error E = createTempFile(Path, Size).moveInto(File))
12421245
return std::move(E);
1246+
assert(File->size() == Size);
1247+
FileSize = Size;
12431248
return File->data();
12441249
};
12451250
DataRecordHandle Record;
@@ -1275,8 +1280,11 @@ Error OnDiskGraphDB::store(ObjectID ID, ArrayRef<ObjectID> Refs,
12751280
// TODO: Find a way to reuse the storage from the new-but-abandoned record
12761281
// handle.
12771282
if (Existing.SK == TrieRecord::StorageKind::Unknown) {
1278-
if (I.Ref.compare_exchange_strong(Existing, NewObject))
1283+
if (I.Ref.compare_exchange_strong(Existing, NewObject)) {
1284+
if (FileSize)
1285+
recordStandaloneSizeIncrease(*FileSize);
12791286
return Error::success();
1287+
}
12801288
}
12811289
}
12821290

@@ -1287,6 +1295,26 @@ Error OnDiskGraphDB::store(ObjectID ID, ArrayRef<ObjectID> Refs,
12871295
return Error::success();
12881296
}
12891297

1298+
void OnDiskGraphDB::recordStandaloneSizeIncrease(size_t SizeIncrease) {
1299+
getStandaloneStorageSize().fetch_add(SizeIncrease, std::memory_order_relaxed);
1300+
}
1301+
1302+
std::atomic<uint64_t> &OnDiskGraphDB::getStandaloneStorageSize() {
1303+
MutableArrayRef<uint8_t> UserHeader = DataPool.getUserHeader();
1304+
assert(UserHeader.size() == sizeof(std::atomic<uint64_t>));
1305+
assert(isAddrAligned(Align(8), UserHeader.data()));
1306+
return *reinterpret_cast<std::atomic<uint64_t> *>(UserHeader.data());
1307+
}
1308+
1309+
uint64_t OnDiskGraphDB::getStandaloneStorageSize() const {
1310+
return const_cast<OnDiskGraphDB *>(this)->getStandaloneStorageSize().load(
1311+
std::memory_order_relaxed);
1312+
}
1313+
1314+
size_t OnDiskGraphDB::getStorageSize() const {
1315+
return Index.size() + DataPool.size() + getStandaloneStorageSize();
1316+
}
1317+
12901318
Expected<std::unique_ptr<OnDiskGraphDB>> OnDiskGraphDB::open(
12911319
StringRef AbsPath, StringRef HashName, unsigned HashByteSize,
12921320
std::unique_ptr<OnDiskGraphDB> UpstreamDB, FaultInPolicy Policy) {
@@ -1306,15 +1334,23 @@ Expected<std::unique_ptr<OnDiskGraphDB>> OnDiskGraphDB::open(
13061334
.moveInto(Index))
13071335
return std::move(E);
13081336

1337+
uint32_t UserHeaderSize = sizeof(std::atomic<uint64_t>);
13091338
std::optional<OnDiskDataAllocator> DataPool;
13101339
StringRef PolicyName =
13111340
Policy == FaultInPolicy::SingleNode ? "single" : "full";
13121341
if (Error E = OnDiskDataAllocator::create(
13131342
AbsPath + Slash + FilePrefix + DataPoolFile,
13141343
DataPoolTableName + "[" + HashName + "]" + PolicyName,
1315-
/*MaxFileSize=*/16 * GB, /*MinFileSize=*/MB)
1344+
/*MaxFileSize=*/16 * GB, /*MinFileSize=*/MB, UserHeaderSize,
1345+
[](void *UserHeaderPtr) {
1346+
new (UserHeaderPtr) std::atomic<uint64_t>(0);
1347+
})
13161348
.moveInto(DataPool))
13171349
return std::move(E);
1350+
if (DataPool->getUserHeader().size() != UserHeaderSize)
1351+
return createStringError(llvm::errc::argument_out_of_domain,
1352+
"unexpected user header in '" + AbsPath + Slash +
1353+
FilePrefix + DataPoolFile + "'");
13181354

13191355
return std::unique_ptr<OnDiskGraphDB>(
13201356
new OnDiskGraphDB(AbsPath, std::move(*Index), std::move(*DataPool),

llvm/lib/CAS/OnDiskHashMappedTrie.cpp

Lines changed: 36 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,8 @@ class DatabaseFile {
143143
return DatabaseFile(std::move(LMFR));
144144
}
145145

146+
size_t size() const { return Alloc.size(); }
147+
146148
private:
147149
static Error validate(LazyMappedFileRegion &LMFR);
148150

@@ -1020,6 +1022,8 @@ static Error checkTable(StringRef Label, size_t Expected, size_t Observed,
10201022
", observed: " + Twine(Observed) + ")");
10211023
}
10221024

1025+
size_t OnDiskHashMappedTrie::size() const { return Impl->File.size(); }
1026+
10231027
Expected<OnDiskHashMappedTrie> OnDiskHashMappedTrie::create(
10241028
const Twine &PathTwine, const Twine &TrieNameTwine, size_t NumHashBits,
10251029
uint64_t DataSize, uint64_t MaxFileSize,
@@ -1125,6 +1129,8 @@ namespace {
11251129
/// DataAllocator table layout:
11261130
/// - [8-bytes: Generic table header]
11271131
/// - 8-bytes: AllocatorOffset (reserved for implementing free lists)
1132+
/// - 8-bytes: Size for user data header
1133+
/// - <user data buffer>
11281134
///
11291135
/// Record layout:
11301136
/// - <data>
@@ -1136,6 +1142,7 @@ class DataAllocatorHandle {
11361142
struct Header {
11371143
TableHandle::Header GenericHeader;
11381144
std::atomic<int64_t> AllocatorOffset;
1145+
const uint64_t UserHeaderSize;
11391146
};
11401147

11411148
operator TableHandle() const {
@@ -1154,8 +1161,13 @@ class DataAllocatorHandle {
11541161
const Header &getHeader() const { return *H; }
11551162
LazyMappedFileRegion &getRegion() const { return *LMFR; }
11561163

1164+
MutableArrayRef<uint8_t> getUserHeader() {
1165+
return MutableArrayRef(reinterpret_cast<uint8_t *>(H + 1),
1166+
H->UserHeaderSize);
1167+
}
1168+
11571169
static DataAllocatorHandle create(LazyMappedFileRegionBumpPtr &Alloc,
1158-
StringRef Name);
1170+
StringRef Name, uint32_t UserHeaderSize);
11591171

11601172
DataAllocatorHandle() = default;
11611173
DataAllocatorHandle(LazyMappedFileRegion &LMFR, Header &H)
@@ -1177,27 +1189,31 @@ struct OnDiskDataAllocator::ImplType {
11771189
};
11781190

11791191
DataAllocatorHandle
1180-
DataAllocatorHandle::create(LazyMappedFileRegionBumpPtr &Alloc,
1181-
StringRef Name) {
1192+
DataAllocatorHandle::create(LazyMappedFileRegionBumpPtr &Alloc, StringRef Name,
1193+
uint32_t UserHeaderSize) {
11821194
// Allocate.
1183-
intptr_t Offset = Alloc.allocateOffset(sizeof(Header) + Name.size() + 1);
1195+
intptr_t Offset =
1196+
Alloc.allocateOffset(sizeof(Header) + UserHeaderSize + Name.size() + 1);
11841197

11851198
// Construct the header and the name.
11861199
assert(Name.size() <= UINT16_MAX && "Expected smaller table name");
11871200
auto *H = new (Alloc.getRegion().data() + Offset)
11881201
Header{{TableHandle::TableKind::DataAllocator, (uint16_t)Name.size(),
1189-
(uint32_t)sizeof(Header)},
1190-
/*AllocatorOffset=*/{0}};
1191-
char *NameStorage = reinterpret_cast<char *>(H + 1);
1202+
(int32_t)(sizeof(Header) + UserHeaderSize)},
1203+
/*AllocatorOffset=*/{0},
1204+
/*UserHeaderSize=*/UserHeaderSize};
1205+
memset(H + 1, 0, UserHeaderSize);
1206+
char *NameStorage = reinterpret_cast<char *>(H + 1) + UserHeaderSize;
11921207
llvm::copy(Name, NameStorage);
11931208
NameStorage[Name.size()] = 0;
11941209
return DataAllocatorHandle(Alloc.getRegion(), *H);
11951210
}
11961211

1197-
Expected<OnDiskDataAllocator>
1198-
OnDiskDataAllocator::create(const Twine &PathTwine, const Twine &TableNameTwine,
1199-
uint64_t MaxFileSize,
1200-
Optional<uint64_t> NewFileInitialSize) {
1212+
Expected<OnDiskDataAllocator> OnDiskDataAllocator::create(
1213+
const Twine &PathTwine, const Twine &TableNameTwine, uint64_t MaxFileSize,
1214+
Optional<uint64_t> NewFileInitialSize, uint32_t UserHeaderSize,
1215+
function_ref<void(void *)> UserHeaderInit) {
1216+
assert(!UserHeaderSize || UserHeaderInit);
12011217
SmallString<128> PathStorage;
12021218
StringRef Path = PathTwine.toStringRef(PathStorage);
12031219
SmallString<128> TableNameStorage;
@@ -1210,8 +1226,10 @@ OnDiskDataAllocator::create(const Twine &PathTwine, const Twine &TableNameTwine,
12101226
return DB.takeError();
12111227

12121228
DataAllocatorHandle Store =
1213-
DataAllocatorHandle::create(DB->getAlloc(), TableName);
1229+
DataAllocatorHandle::create(DB->getAlloc(), TableName, UserHeaderSize);
12141230
DB->addTable(Store);
1231+
if (UserHeaderSize)
1232+
UserHeaderInit(Store.getUserHeader().data());
12151233
return Error::success();
12161234
};
12171235

@@ -1258,6 +1276,12 @@ const char *OnDiskDataAllocator::beginData(FileOffset Offset) const {
12581276
return Impl->File.getRegion().data() + Offset.get();
12591277
}
12601278

1279+
MutableArrayRef<uint8_t> OnDiskDataAllocator::getUserHeader() {
1280+
return Impl->Store.getUserHeader();
1281+
}
1282+
1283+
size_t OnDiskDataAllocator::size() const { return Impl->File.size(); }
1284+
12611285
OnDiskDataAllocator::OnDiskDataAllocator(std::unique_ptr<ImplType> Impl)
12621286
: Impl(std::move(Impl)) {}
12631287

llvm/unittests/CAS/OnDiskGraphDBTest.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,25 @@ TEST(OnDiskGraphDBTest, Basic) {
9999
ASSERT_THAT_ERROR(DB->load(ID3).moveInto(Obj2), Succeeded());
100100
ASSERT_TRUE(Obj2.has_value());
101101
EXPECT_EQ(toStringRef(DB->getObjectData(*Obj2)), "world");
102+
103+
size_t LargeDataSize = 256LL * 1024LL; // 256K.
104+
// The precise size number is not important, we mainly check that the large
105+
// object will be properly accounted for.
106+
EXPECT_TRUE(DB->getStorageSize() > 10 &&
107+
DB->getStorageSize() < LargeDataSize);
108+
109+
SmallString<16> Buffer;
110+
Buffer.resize(LargeDataSize);
111+
ASSERT_THAT_ERROR(store(Buffer, {}).moveInto(ID1), Succeeded());
112+
size_t StorageSize = DB->getStorageSize();
113+
EXPECT_TRUE(StorageSize > LargeDataSize);
114+
115+
// Close & re-open the DB and check that it reports the same storage size.
116+
DB.reset();
117+
ASSERT_THAT_ERROR(
118+
OnDiskGraphDB::open(Temp.path(), "blake3", sizeof(HashType)).moveInto(DB),
119+
Succeeded());
120+
EXPECT_EQ(DB->getStorageSize(), StorageSize);
102121
}
103122

104123
TEST(OnDiskGraphDBTest, FaultInSingleNode) {

0 commit comments

Comments
 (0)