Skip to content

Commit 305d823

Browse files
authored
Merge pull request #10306 from benlangmuir/cas-log-stable
[🍒][llvm][cas] Introduce On-disk CAS Log
2 parents 14bd0f8 + 3441858 commit 305d823

20 files changed

+745
-85
lines changed

llvm/include/llvm/CAS/MappedFileRegionBumpPtr.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@
1616

1717
namespace llvm::cas {
1818

19+
namespace ondisk {
20+
class OnDiskCASLogger;
21+
}
22+
1923
/// Allocator for an owned mapped file region that supports thread-safe and
2024
/// process-safe bump pointer allocation.
2125
///
@@ -48,6 +52,7 @@ class MappedFileRegionBumpPtr {
4852
/// access to the file. Must call \c initializeBumpPtr.
4953
static Expected<MappedFileRegionBumpPtr>
5054
create(const Twine &Path, uint64_t Capacity, int64_t BumpPtrOffset,
55+
std::shared_ptr<ondisk::OnDiskCASLogger> Logger,
5156
function_ref<Error(MappedFileRegionBumpPtr &)> NewFileConstructor);
5257

5358
/// Finish initializing the bump pointer. Must be called by
@@ -99,6 +104,7 @@ class MappedFileRegionBumpPtr {
99104
std::swap(Path, RHS.Path);
100105
std::swap(FD, RHS.FD);
101106
std::swap(SharedLockFD, RHS.SharedLockFD);
107+
std::swap(Logger, RHS.Logger);
102108
}
103109

104110
private:
@@ -107,6 +113,7 @@ class MappedFileRegionBumpPtr {
107113
std::string Path;
108114
std::optional<int> FD;
109115
std::optional<int> SharedLockFD;
116+
std::shared_ptr<ondisk::OnDiskCASLogger> Logger = nullptr;
110117
};
111118

112119
} // namespace llvm::cas
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
//===- OnDiskCASLogger.h ----------------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_CAS_ONDISKLOGGER_H
10+
#define LLVM_CAS_ONDISKLOGGER_H
11+
12+
#include "llvm/Support/Error.h"
13+
#include <memory>
14+
15+
namespace llvm {
16+
class raw_fd_ostream;
17+
class Twine;
18+
} // namespace llvm
19+
20+
namespace llvm::cas::ondisk {
21+
22+
/// Interface for logging low-level on-disk cas operations.
23+
///
24+
/// This log is intended to mirror the low-level details of the CAS in order to
25+
/// aid with debugging corruption or other issues with the on-disk format.
26+
class OnDiskCASLogger {
27+
public:
28+
/// Create or append to a log file inside the given CAS directory \p Path.
29+
///
30+
/// \param Path The parent directory of the log file.
31+
/// \param LogAllocations Whether to log all low-level allocations. This is
32+
/// on the order of twice as expensive to log.
33+
static Expected<std::unique_ptr<OnDiskCASLogger>> open(const Twine &Path,
34+
bool LogAllocations);
35+
36+
/// Create or append to a log file inside the given CAS directory \p Path if
37+
/// logging is enabled by the environment variable \c LLVM_CAS_LOG. If
38+
/// LLVM_CAS_LOG is set >= 2 then also log allocations.
39+
static Expected<std::unique_ptr<OnDiskCASLogger>>
40+
openIfEnabled(const Twine &Path);
41+
42+
~OnDiskCASLogger();
43+
44+
/// An offset into an \c OnDiskHashMappedTrie.
45+
using TrieOffset = int64_t;
46+
47+
void log_compare_exchange_strong(void *Region, TrieOffset Trie, size_t SlotI,
48+
TrieOffset Expected, TrieOffset New,
49+
TrieOffset Previous);
50+
void log_SubtrieHandle_create(void *Region, TrieOffset Trie,
51+
uint32_t StartBit, uint32_t NumBits);
52+
void log_HashMappedTrieHandle_createRecord(void *Region,
53+
TrieOffset TrieOffset,
54+
ArrayRef<uint8_t> Hash);
55+
void log_MappedFileRegionBumpPtr_resizeFile(StringRef Path, size_t Before,
56+
size_t After);
57+
void log_MappedFileRegionBumpPtr_create(StringRef Path, int FD, void *Region,
58+
size_t Capacity, size_t Size);
59+
void log_MappedFileRegionBumpPtr_oom(StringRef Path, size_t Capacity,
60+
size_t Size, size_t AllocSize);
61+
void log_MappedFileRegionBumpPtr_close(StringRef Path);
62+
void log_MappedFileRegionBumpPtr_allocate(void *Region, TrieOffset Off,
63+
size_t Size);
64+
void log_UnifiedOnDiskCache_collectGarbage(StringRef Path);
65+
void log_TempFile_create(StringRef Name);
66+
void log_TempFile_keep(StringRef TmpName, StringRef Name, std::error_code EC);
67+
void log_TempFile_remove(StringRef TmpName, std::error_code EC);
68+
69+
private:
70+
OnDiskCASLogger(raw_fd_ostream &OS, bool LogAllocations);
71+
72+
raw_fd_ostream &OS;
73+
bool LogAllocations;
74+
};
75+
76+
} // namespace llvm::cas::ondisk
77+
78+
#endif // LLVM_CAS_ONDISKLOGGER_H

llvm/include/llvm/CAS/OnDiskGraphDB.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,7 @@ class OnDiskGraphDB {
338338
static Expected<std::unique_ptr<OnDiskGraphDB>>
339339
open(StringRef Path, StringRef HashName, unsigned HashByteSize,
340340
std::unique_ptr<OnDiskGraphDB> UpstreamDB = nullptr,
341+
std::shared_ptr<OnDiskCASLogger> Logger = nullptr,
341342
FaultInPolicy Policy = FaultInPolicy::FullTree);
342343

343344
~OnDiskGraphDB();
@@ -411,8 +412,8 @@ class OnDiskGraphDB {
411412

412413
OnDiskGraphDB(StringRef RootPath, OnDiskHashMappedTrie Index,
413414
OnDiskDataAllocator DataPool,
414-
std::unique_ptr<OnDiskGraphDB> UpstreamDB,
415-
FaultInPolicy Policy);
415+
std::unique_ptr<OnDiskGraphDB> UpstreamDB, FaultInPolicy Policy,
416+
std::shared_ptr<OnDiskCASLogger> Logger);
416417

417418
/// Mapping from hash to object reference.
418419
///
@@ -431,6 +432,8 @@ class OnDiskGraphDB {
431432
/// Optional on-disk store to be used for faulting-in nodes.
432433
std::unique_ptr<OnDiskGraphDB> UpstreamDB;
433434
FaultInPolicy FIPolicy;
435+
436+
std::shared_ptr<OnDiskCASLogger> Logger;
434437
};
435438

436439
} // namespace llvm::cas::ondisk

llvm/include/llvm/CAS/OnDiskHashMappedTrie.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@ class raw_ostream;
2828

2929
namespace cas {
3030

31+
namespace ondisk {
32+
class OnDiskCASLogger;
33+
}
34+
3135
class FileOffset {
3236
public:
3337
int64_t get() const { return Offset; }
@@ -253,6 +257,7 @@ class OnDiskHashMappedTrie {
253257
create(const Twine &Path, const Twine &TrieName, size_t NumHashBits,
254258
uint64_t DataSize, uint64_t MaxFileSize,
255259
std::optional<uint64_t> NewFileInitialSize,
260+
std::shared_ptr<ondisk::OnDiskCASLogger> Logger = nullptr,
256261
std::optional<size_t> NewTableNumRootBits = std::nullopt,
257262
std::optional<size_t> NewTableNumSubtrieBits = std::nullopt);
258263

@@ -328,6 +333,7 @@ class OnDiskDataAllocator {
328333
create(const Twine &Path, const Twine &TableName, uint64_t MaxFileSize,
329334
std::optional<uint64_t> NewFileInitialSize,
330335
uint32_t UserHeaderSize = 0,
336+
std::shared_ptr<ondisk::OnDiskCASLogger> Logger = nullptr,
331337
function_ref<void(void *)> UserHeaderInit = nullptr);
332338

333339
OnDiskDataAllocator(OnDiskDataAllocator &&RHS);

llvm/include/llvm/CAS/OnDiskKeyValueDB.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,8 @@ class OnDiskKeyValueDB {
5757
/// \param ValueSize Size for the value bytes.
5858
static Expected<std::unique_ptr<OnDiskKeyValueDB>>
5959
open(StringRef Path, StringRef HashName, unsigned KeySize,
60-
StringRef ValueName, size_t ValueSize);
60+
StringRef ValueName, size_t ValueSize,
61+
std::shared_ptr<OnDiskCASLogger> Logger = nullptr);
6162

6263
private:
6364
OnDiskKeyValueDB(size_t ValueSize, OnDiskHashMappedTrie Cache)

llvm/include/llvm/CAS/UnifiedOnDiskCache.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,8 @@ class UnifiedOnDiskCache {
117117
///
118118
/// It is recommended that garbage-collection is triggered concurrently in the
119119
/// background, so that it has minimal effect on the workload of the process.
120-
static Error collectGarbage(StringRef Path);
120+
static Error collectGarbage(StringRef Path,
121+
ondisk::OnDiskCASLogger *Logger = nullptr);
121122

122123
Error collectGarbage();
123124

@@ -145,6 +146,8 @@ class UnifiedOnDiskCache {
145146

146147
std::unique_ptr<OnDiskKeyValueDB> UpstreamKVDB;
147148
std::unique_ptr<OnDiskKeyValueDB> PrimaryKVDB;
149+
150+
std::shared_ptr<ondisk::OnDiskCASLogger> Logger = nullptr;
148151
};
149152

150153
} // namespace llvm::cas::ondisk

llvm/lib/CAS/ActionCaches.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "llvm/CAS/ActionCache.h"
1111
#include "llvm/CAS/HashMappedTrie.h"
1212
#include "llvm/CAS/ObjectStore.h"
13+
#include "llvm/CAS/OnDiskCASLogger.h"
1314
#include "llvm/CAS/OnDiskGraphDB.h"
1415
#include "llvm/CAS/OnDiskHashMappedTrie.h"
1516
#include "llvm/CAS/OnDiskKeyValueDB.h"
@@ -157,10 +158,14 @@ OnDiskActionCache::OnDiskActionCache(
157158

158159
Expected<std::unique_ptr<OnDiskActionCache>>
159160
OnDiskActionCache::create(StringRef AbsPath) {
161+
std::shared_ptr<ondisk::OnDiskCASLogger> Logger;
162+
if (Error E =
163+
ondisk::OnDiskCASLogger::openIfEnabled(AbsPath).moveInto(Logger))
164+
return std::move(E);
160165
std::unique_ptr<ondisk::OnDiskKeyValueDB> DB;
161166
if (Error E = ondisk::OnDiskKeyValueDB::open(AbsPath, getHashName(),
162167
sizeof(HashType), getHashName(),
163-
sizeof(DataT))
168+
sizeof(DataT), std::move(Logger))
164169
.moveInto(DB))
165170
return std::move(E);
166171
return std::unique_ptr<OnDiskActionCache>(

llvm/lib/CAS/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ add_llvm_component_library(LLVMCAS
1919
MappedFileRegionBumpPtr.cpp
2020
ObjectStore.cpp
2121
OnDiskCAS.cpp
22+
OnDiskCASLogger.cpp
2223
OnDiskCommon.cpp
2324
OnDiskGraphDB.cpp
2425
OnDiskHashMappedTrie.cpp

llvm/lib/CAS/MappedFileRegionBumpPtr.cpp

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
#include "llvm/CAS/MappedFileRegionBumpPtr.h"
5555
#include "OnDiskCommon.h"
5656
#include "llvm/ADT/StringMap.h"
57+
#include "llvm/CAS/OnDiskCASLogger.h"
5758

5859
using namespace llvm;
5960
using namespace llvm::cas;
@@ -89,9 +90,11 @@ struct FileLockRAII {
8990

9091
Expected<MappedFileRegionBumpPtr> MappedFileRegionBumpPtr::create(
9192
const Twine &Path, uint64_t Capacity, int64_t BumpPtrOffset,
93+
std::shared_ptr<ondisk::OnDiskCASLogger> Logger,
9294
function_ref<Error(MappedFileRegionBumpPtr &)> NewFileConstructor) {
9395
MappedFileRegionBumpPtr Result;
9496
Result.Path = Path.str();
97+
Result.Logger = std::move(Logger);
9598
// Open the main file.
9699
int FD;
97100
if (std::error_code EC = sys::fs::openFileForReadWrite(
@@ -147,6 +150,10 @@ Expected<MappedFileRegionBumpPtr> MappedFileRegionBumpPtr::create(
147150
// to make this a sparse region, if supported.
148151
if (std::error_code EC = sys::fs::resize_file(FD, Capacity))
149152
return createFileError(Result.Path, EC);
153+
154+
if (Result.Logger)
155+
Result.Logger->log_MappedFileRegionBumpPtr_resizeFile(
156+
Result.Path, Status.getSize(), Capacity);
150157
} else {
151158
// Someone else initialized it.
152159
Capacity = Status.getSize();
@@ -186,9 +193,14 @@ void MappedFileRegionBumpPtr::destroyImpl() {
186193
if (BumpPtr) {
187194
assert(SharedLockFD && "Must have shared lock file open");
188195
if (tryLockFileThreadSafe(*SharedLockFD) == std::error_code()) {
189-
assert(size() <= capacity());
196+
size_t Size = size();
197+
size_t Capacity = capacity();
198+
assert(Size < Capacity);
190199
(void)sys::fs::resize_file(*FD, size());
191200
(void)unlockFileThreadSafe(*SharedLockFD);
201+
202+
if (Logger)
203+
Logger->log_MappedFileRegionBumpPtr_resizeFile(Path, Capacity, Size);
192204
}
193205
}
194206

@@ -203,6 +215,9 @@ void MappedFileRegionBumpPtr::destroyImpl() {
203215
// Close the file and shared lock.
204216
Close(FD);
205217
Close(SharedLockFD);
218+
219+
if (Logger)
220+
Logger->log_MappedFileRegionBumpPtr_close(Path);
206221
}
207222

208223
void MappedFileRegionBumpPtr::initializeBumpPtr(int64_t BumpPtrOffset) {
@@ -218,6 +233,10 @@ void MappedFileRegionBumpPtr::initializeBumpPtr(int64_t BumpPtrOffset) {
218233
if (!BumpPtr->compare_exchange_strong(ExistingValue, BumpPtrEndOffset))
219234
assert(ExistingValue >= BumpPtrEndOffset &&
220235
"Expected 0, or past the end of the BumpPtr itself");
236+
237+
if (Logger)
238+
Logger->log_MappedFileRegionBumpPtr_create(Path, *FD, data(), capacity(),
239+
size());
221240
}
222241

223242
static Error createAllocatorOutOfSpaceError() {
@@ -239,7 +258,15 @@ Expected<int64_t> MappedFileRegionBumpPtr::allocateOffset(uint64_t AllocSize) {
239258
if (OldEnd <= (int64_t)capacity())
240259
(void)BumpPtr->exchange(OldEnd);
241260

261+
if (Logger)
262+
Logger->log_MappedFileRegionBumpPtr_oom(Path, capacity(), OldEnd,
263+
AllocSize);
264+
242265
return createAllocatorOutOfSpaceError();
243266
}
267+
268+
if (Logger)
269+
Logger->log_MappedFileRegionBumpPtr_allocate(data(), OldEnd, AllocSize);
270+
244271
return OldEnd;
245272
}

llvm/lib/CAS/OnDiskCAS.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,11 @@
99
#include "BuiltinCAS.h"
1010
#include "llvm/CAS/BuiltinCASContext.h"
1111
#include "llvm/CAS/BuiltinObjectHasher.h"
12+
#include "llvm/CAS/OnDiskCASLogger.h"
1213
#include "llvm/CAS/OnDiskGraphDB.h"
1314
#include "llvm/CAS/UnifiedOnDiskCache.h"
1415
#include "llvm/Support/Compiler.h"
16+
#include "llvm/Support/Error.h"
1517
#include "llvm/Support/Path.h"
1618

1719
using namespace llvm;
@@ -168,9 +170,15 @@ Expected<std::optional<uint64_t>> OnDiskCAS::getStorageSize() const {
168170
Error OnDiskCAS::pruneStorageData() { return UniDB->collectGarbage(); }
169171

170172
Expected<std::unique_ptr<OnDiskCAS>> OnDiskCAS::open(StringRef AbsPath) {
173+
std::shared_ptr<ondisk::OnDiskCASLogger> Logger;
174+
if (Error E =
175+
ondisk::OnDiskCASLogger::openIfEnabled(AbsPath).moveInto(Logger))
176+
return std::move(E);
177+
171178
Expected<std::unique_ptr<ondisk::OnDiskGraphDB>> DB =
172179
ondisk::OnDiskGraphDB::open(AbsPath, BuiltinCASContext::getHashName(),
173-
sizeof(HashType));
180+
sizeof(HashType), /*UpstreamDB=*/nullptr,
181+
std::move(Logger));
174182
if (!DB)
175183
return DB.takeError();
176184
return std::unique_ptr<OnDiskCAS>(new OnDiskCAS(std::move(*DB)));

0 commit comments

Comments
 (0)