Skip to content

Commit fecc45e

Browse files
xavierdfacebook-github-bot
authored andcommitted
inodes: recover from corrupted sqlite database
Summary: We've seen several cases where the sqlite database is corrupted causing EdenFS to fail to start and requiring manual remediation. On Windows, we can always reconstruct the sqlite database from scratch due to FSCK being able to build it from scratch. Thus, we can simply delete the database on disk and continue starting up. Reviewed By: chadaustin Differential Revision: D44155034 fbshipit-source-id: de05c814796ab8f76fd3cd9a3e98df438431c657
1 parent ab42a39 commit fecc45e

12 files changed

+151
-14
lines changed

eden/fs/inodes/Overlay.cpp

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -43,20 +43,22 @@ std::unique_ptr<InodeCatalog> makeInodeCatalog(
4343
AbsolutePathPiece localDir,
4444
Overlay::InodeCatalogType inodeCatalogType,
4545
const EdenConfig& config,
46-
IFileContentStore* fileContentStore) {
46+
IFileContentStore* fileContentStore,
47+
const std::shared_ptr<StructuredLogger>& logger) {
4748
if (inodeCatalogType == Overlay::InodeCatalogType::Tree) {
48-
return std::make_unique<SqliteInodeCatalog>(localDir);
49+
return std::make_unique<SqliteInodeCatalog>(localDir, logger);
4950
} else if (inodeCatalogType == Overlay::InodeCatalogType::TreeInMemory) {
5051
XLOG(WARN) << "In-memory overlay requested. This will cause data loss.";
5152
return std::make_unique<SqliteInodeCatalog>(
5253
std::make_unique<SqliteDatabase>(SqliteDatabase::inMemory));
5354
} else if (
5455
inodeCatalogType == Overlay::InodeCatalogType::TreeSynchronousOff) {
5556
return std::make_unique<SqliteInodeCatalog>(
56-
localDir, SqliteTreeStore::SynchronousMode::Off);
57+
localDir, logger, SqliteTreeStore::SynchronousMode::Off);
5758
} else if (inodeCatalogType == Overlay::InodeCatalogType::TreeBuffered) {
5859
XLOG(DBG4) << "Buffered tree overlay being used";
59-
return std::make_unique<BufferedSqliteInodeCatalog>(localDir, config);
60+
return std::make_unique<BufferedSqliteInodeCatalog>(
61+
localDir, logger, config);
6062
} else if (
6163
inodeCatalogType == Overlay::InodeCatalogType::TreeInMemoryBuffered) {
6264
XLOG(WARN)
@@ -69,15 +71,15 @@ std::unique_ptr<InodeCatalog> makeInodeCatalog(
6971
XLOG(DBG2)
7072
<< "Buffered tree overlay being used with synchronous-mode = off";
7173
return std::make_unique<BufferedSqliteInodeCatalog>(
72-
localDir, config, SqliteTreeStore::SynchronousMode::Off);
74+
localDir, logger, config, SqliteTreeStore::SynchronousMode::Off);
7375
}
7476
#ifdef _WIN32
7577
(void)fileContentStore;
7678
if (inodeCatalogType == Overlay::InodeCatalogType::Legacy) {
7779
throw std::runtime_error(
7880
"Legacy overlay type is not supported. Please reclone.");
7981
}
80-
return std::make_unique<SqliteInodeCatalog>(localDir);
82+
return std::make_unique<SqliteInodeCatalog>(localDir, logger);
8183
#else
8284
return std::make_unique<FsInodeCatalog>(
8385
static_cast<FileContentStore*>(fileContentStore));
@@ -143,7 +145,8 @@ Overlay::Overlay(
143145
localDir,
144146
inodeCatalogType,
145147
config,
146-
fileContentStore_ ? fileContentStore_.get() : nullptr)},
148+
fileContentStore_ ? fileContentStore_.get() : nullptr,
149+
logger)},
147150
inodeCatalogType_{inodeCatalogType},
148151
supportsSemanticOperations_{inodeCatalog_->supportsSemanticOperations()},
149152
localDir_{localDir},

eden/fs/inodes/sqlitecatalog/BufferedSqliteInodeCatalog.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,14 @@
1919

2020
namespace facebook::eden {
2121

22+
class StructuredLogger;
23+
2224
BufferedSqliteInodeCatalog::BufferedSqliteInodeCatalog(
2325
AbsolutePathPiece path,
26+
std::shared_ptr<StructuredLogger> logger,
2427
const EdenConfig& config,
2528
SqliteTreeStore::SynchronousMode mode)
26-
: SqliteInodeCatalog(path, mode),
29+
: SqliteInodeCatalog(path, std::move(logger), mode),
2730
bufferSize_{config.overlayBufferSize.getValue()} {
2831
workerThread_ = std::thread{[this] {
2932
folly::setThreadName("OverlayBuffer");

eden/fs/inodes/sqlitecatalog/BufferedSqliteInodeCatalog.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,13 @@ namespace facebook::eden {
2323

2424
struct InodeNumber;
2525
class EdenConfig;
26+
class StructuredLogger;
2627

2728
class BufferedSqliteInodeCatalog : public SqliteInodeCatalog {
2829
public:
2930
explicit BufferedSqliteInodeCatalog(
3031
AbsolutePathPiece path,
32+
std::shared_ptr<StructuredLogger> logger,
3133
const EdenConfig& config,
3234
SqliteTreeStore::SynchronousMode mode =
3335
SqliteTreeStore::SynchronousMode::Normal);

eden/fs/inodes/sqlitecatalog/SqliteInodeCatalog.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,13 @@
1515

1616
namespace facebook::eden {
1717

18+
class StructuredLogger;
19+
1820
SqliteInodeCatalog::SqliteInodeCatalog(
1921
AbsolutePathPiece path,
22+
std::shared_ptr<StructuredLogger> logger,
2023
SqliteTreeStore::SynchronousMode mode)
21-
: store_{path, mode} {}
24+
: store_{path, std::move(logger), mode} {}
2225

2326
std::optional<InodeNumber> SqliteInodeCatalog::initOverlay(
2427
bool createIfNonExisting) {

eden/fs/inodes/sqlitecatalog/SqliteInodeCatalog.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,13 @@ namespace overlay {
2828
class OverlayDir;
2929
}
3030
struct InodeNumber;
31+
class StructuredLogger;
3132

3233
class SqliteInodeCatalog : public InodeCatalog {
3334
public:
3435
explicit SqliteInodeCatalog(
3536
AbsolutePathPiece path,
37+
std::shared_ptr<StructuredLogger> logger,
3638
SqliteTreeStore::SynchronousMode mode =
3739
SqliteTreeStore::SynchronousMode::Normal);
3840

eden/fs/inodes/sqlitecatalog/SqliteTreeStore.cpp

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,13 @@
88
#include "eden/fs/inodes/sqlitecatalog/SqliteTreeStore.h"
99

1010
#include <folly/Range.h>
11+
#include <folly/stop_watch.h>
1112
#include <array>
1213
#include "eden/fs/inodes/InodeNumber.h"
1314
#include "eden/fs/inodes/overlay/gen-cpp2/overlay_types.h"
1415
#include "eden/fs/sqlite/PersistentSqliteStatement.h"
1516
#include "eden/fs/sqlite/SqliteStatement.h"
17+
#include "eden/fs/telemetry/StructuredLogger.h"
1618
#include "eden/fs/utils/DirType.h"
1719

1820
namespace facebook::eden {
@@ -119,12 +121,73 @@ struct SqliteTreeStore::StatementCache {
119121
std::array<PersistentSqliteStatement, kBatchInsertSize> batchInsert;
120122
};
121123

124+
namespace {
125+
std::unique_ptr<SqliteDatabase> removeAndRecreateDb(AbsolutePathPiece path) {
126+
int rc = ::unlink(path.copy().c_str());
127+
if (rc != 0 && errno != ENOENT) {
128+
throw_<std::runtime_error>(
129+
"unable to remove sqlite database ", path, ", errno: ", errno);
130+
}
131+
return std::make_unique<SqliteDatabase>(path);
132+
}
133+
134+
std::unique_ptr<SqliteDatabase> openAndVerifyDb(
135+
AbsolutePathPiece path,
136+
std::shared_ptr<StructuredLogger> logger) {
137+
try {
138+
auto db = std::make_unique<SqliteDatabase>(path);
139+
140+
std::vector<std::string> errors;
141+
errors.reserve(0);
142+
143+
folly::stop_watch<> integrityCheckRuntime;
144+
{
145+
auto dbLock = db->lock();
146+
auto stmt = SqliteStatement(dbLock, "PRAGMA integrity_check");
147+
while (stmt.step()) {
148+
errors.push_back(stmt.columnBlob(0).str());
149+
}
150+
}
151+
auto runtimeInSeconds =
152+
std::chrono::duration<double>{integrityCheckRuntime.elapsed()}.count();
153+
154+
if (errors.empty() || (errors.size() == 1 && errors.front() == "ok")) {
155+
logger->logEvent(SqliteIntegrityCheck{runtimeInSeconds, 0});
156+
return db;
157+
} else {
158+
logger->logEvent(SqliteIntegrityCheck{
159+
runtimeInSeconds, folly::to_signed(errors.size())});
160+
if (folly::kIsWindows) {
161+
XLOG(WARN) << "SqliteDatabase is corrupted";
162+
for (auto& error : errors) {
163+
XLOG(WARN) << "Sqlite error: " << error;
164+
}
165+
db.reset();
166+
return removeAndRecreateDb(path);
167+
} else {
168+
throw_<std::runtime_error>("SqliteDatabase is corrupted");
169+
}
170+
}
171+
172+
} catch (const std::exception& ex) {
173+
if (folly::kIsWindows) {
174+
XLOG(WARN) << "SqliteDatabase (" << path
175+
<< ") failed to open: " << ex.what();
176+
return removeAndRecreateDb(path);
177+
}
178+
throw;
179+
}
180+
}
181+
} // namespace
182+
122183
SqliteTreeStore::SqliteTreeStore(
123184
AbsolutePathPiece path,
185+
std::shared_ptr<StructuredLogger> logger,
124186
SqliteTreeStore::SynchronousMode synchronous_mode) {
125187
ensureDirectoryExists(path);
126188

127-
db_ = std::make_unique<SqliteDatabase>(path + kTreeStorePath);
189+
AbsolutePath sqliteDbPath = path + kTreeStorePath;
190+
db_ = openAndVerifyDb(sqliteDbPath, std::move(logger));
128191

129192
// Enable WAL for faster writes to the database. See also:
130193
// https://www.sqlite.org/wal.html

eden/fs/inodes/sqlitecatalog/SqliteTreeStore.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ class OverlayEntry;
2424
} // namespace overlay
2525
class SqliteStatement;
2626
struct InodeNumber;
27+
class StructuredLogger;
2728

2829
class SqliteTreeStoreNonEmptyError : public std::exception {
2930
public:
@@ -52,6 +53,7 @@ class SqliteTreeStore {
5253

5354
explicit SqliteTreeStore(
5455
AbsolutePathPiece dir,
56+
std::shared_ptr<StructuredLogger> logger,
5557
SqliteTreeStore::SynchronousMode mode =
5658
SqliteTreeStore::SynchronousMode::Normal);
5759

eden/fs/inodes/sqlitecatalog/WindowsOverlayScanner.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "eden/fs/config/EdenConfig.h"
1616
#include "eden/fs/inodes/fscatalog/OverlayChecker.h"
1717
#include "eden/fs/inodes/sqlitecatalog/SqliteInodeCatalog.h"
18+
#include "eden/fs/telemetry/NullStructuredLogger.h"
1819
#include "eden/fs/utils/PathFuncs.h"
1920
#include "eden/fs/utils/WinStackTrace.h"
2021

@@ -48,7 +49,8 @@ int main(int argc, char** argv) {
4849
auto overlayPath = canonicalPath(argv[1]);
4950
auto mountPath = canonicalPath(FLAGS_mount_path);
5051

51-
SqliteInodeCatalog inodeCatalog(overlayPath);
52+
SqliteInodeCatalog inodeCatalog(
53+
overlayPath, std::make_shared<NullStructuredLogger>());
5254
inodeCatalog.initOverlay(true);
5355
XLOG(INFO) << "start scanning";
5456
OverlayChecker::LookupCallback lookup = [](auto, auto) {

eden/fs/telemetry/LogEvent.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,4 +344,16 @@ struct WorkingCopyGc {
344344
}
345345
};
346346

347+
struct SqliteIntegrityCheck {
348+
static constexpr const char* type = "sqlite_integrity_check";
349+
350+
double duration = 0.0;
351+
int64_t numErrors = 0;
352+
353+
void populate(DynamicEvent& event) const {
354+
event.addDouble("duration", duration);
355+
event.addInt("num_errors", numErrors);
356+
}
357+
};
358+
347359
} // namespace facebook::eden

eden/integration/corrupt_overlay_test.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
import eden.integration.lib.overlay as overlay_mod
1414
from eden.integration.lib import testcase
1515

16+
from facebook.eden.ttypes import MountState
17+
1618

1719
@testcase.eden_nfs_repo_test
1820
class CorruptOverlayTest(testcase.HgRepoTestMixin, testcase.EdenRepoTest):
@@ -100,3 +102,43 @@ def test_eden_list_does_not_return_corrupt_mounts(self) -> None:
100102

101103
self.eden.start()
102104
self.assertEqual({str(self.mount): "NOT_RUNNING"}, self.eden.list_cmd_simple())
105+
106+
107+
@testcase.eden_repo_test
108+
class CorruptSqliteOverlayTest(testcase.EdenRepoTest):
109+
def populate_repo(self) -> None:
110+
self.repo.write_file("hello", "hola\n")
111+
self.repo.write_file("dir/a", "a\n")
112+
self.repo.write_file("dir/b", "b\n")
113+
114+
self.repo.commit("Initial commit.")
115+
116+
def test_integrity_corrupt_sqlite(self) -> None:
117+
"""Slightly corrupt the sqlite database so opening it doesn't catch the
118+
corruption.
119+
"""
120+
self.read_dir("dir")
121+
self.eden.shutdown()
122+
123+
treestoredb = self.eden.overlay_dir_for_mount(self.mount_path) / "treestore.db"
124+
size = os.stat(treestoredb).st_size
125+
os.truncate(
126+
self.eden.overlay_dir_for_mount(self.mount_path) / "treestore.db", size - 5
127+
)
128+
129+
self.eden.start()
130+
self.assertEqual(self.eden.get_mount_state(self.mount_path), MountState.RUNNING)
131+
132+
def test_full_corrupt_sqlite(self) -> None:
133+
"""Fully corrupt the database, opening it is sufficient to detect the corruption."""
134+
self.read_dir("dir")
135+
self.eden.shutdown()
136+
137+
treestoredb = self.eden.overlay_dir_for_mount(self.mount_path) / "treestore.db"
138+
size = os.stat(treestoredb).st_size
139+
os.truncate(
140+
self.eden.overlay_dir_for_mount(self.mount_path) / "treestore.db", size // 2
141+
)
142+
143+
self.eden.start()
144+
self.assertEqual(self.eden.get_mount_state(self.mount_path), MountState.RUNNING)

0 commit comments

Comments
 (0)