Skip to content

Commit 33dfafa

Browse files
author
Kent Overstreet
committed
bcachefs: Fix safe errors by default
i.e. the start of automatic self healing: If errors=continue or fix_safe, we now automatically fix simple errors without user intervention. New error action option: fix_safe This replaces the existing errors=ro option, which gets a new slot, i.e. existing errors=ro users now get errors=fix_safe. This is currently only enabled for a limited set of errors - initially just disk accounting; errors we would never not want to fix, and we don't want to require user intervention (i.e. to make sure a bug report gets filed). Errors will still be counted in the superblock, so we (developers) will still know they've been occuring if a bug report gets filed (as bug reports typically include the errors superblock section). Eventually we'll be enabling this for a much wider set of errors, after we've done thorough error injection testing. Signed-off-by: Kent Overstreet <[email protected]>
1 parent a56da69 commit 33dfafa

File tree

5 files changed

+308
-289
lines changed

5 files changed

+308
-289
lines changed

fs/bcachefs/bcachefs_format.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -990,8 +990,9 @@ enum bch_version_upgrade_opts {
990990

991991
#define BCH_ERROR_ACTIONS() \
992992
x(continue, 0) \
993-
x(ro, 1) \
994-
x(panic, 2)
993+
x(fix_safe, 1) \
994+
x(panic, 2) \
995+
x(ro, 3)
995996

996997
enum bch_error_actions {
997998
#define x(t, n) BCH_ON_ERROR_##t = n,

fs/bcachefs/error.c

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ bool bch2_inconsistent_error(struct bch_fs *c)
1515
switch (c->opts.errors) {
1616
case BCH_ON_ERROR_continue:
1717
return false;
18+
case BCH_ON_ERROR_fix_safe:
1819
case BCH_ON_ERROR_ro:
1920
if (bch2_fs_emergency_read_only(c))
2021
bch_err(c, "inconsistency detected - emergency read only at journal seq %llu",
@@ -191,6 +192,12 @@ static void prt_actioning(struct printbuf *out, const char *action)
191192
prt_str(out, "ing");
192193
}
193194

195+
static const u8 fsck_flags_extra[] = {
196+
#define x(t, n, flags) [BCH_FSCK_ERR_##t] = flags,
197+
BCH_SB_ERRS()
198+
#undef x
199+
};
200+
194201
int bch2_fsck_err(struct bch_fs *c,
195202
enum bch_fsck_flags flags,
196203
enum bch_sb_error_id err,
@@ -203,6 +210,9 @@ int bch2_fsck_err(struct bch_fs *c,
203210
int ret = -BCH_ERR_fsck_ignore;
204211
const char *action_orig = "fix?", *action = action_orig;
205212

213+
if (!WARN_ON(err >= ARRAY_SIZE(fsck_flags_extra)))
214+
flags |= fsck_flags_extra[err];
215+
206216
if ((flags & FSCK_CAN_FIX) &&
207217
test_bit(err, c->sb.errors_silent))
208218
return -BCH_ERR_fsck_fix;
@@ -265,7 +275,14 @@ int bch2_fsck_err(struct bch_fs *c,
265275
prt_printf(out, bch2_log_msg(c, ""));
266276
#endif
267277

268-
if (!test_bit(BCH_FS_fsck_running, &c->flags)) {
278+
if ((flags & FSCK_CAN_FIX) &&
279+
(flags & FSCK_AUTOFIX) &&
280+
(c->opts.errors == BCH_ON_ERROR_continue ||
281+
c->opts.errors == BCH_ON_ERROR_fix_safe)) {
282+
prt_str(out, ", ");
283+
prt_actioning(out, action);
284+
ret = -BCH_ERR_fsck_fix;
285+
} else if (!test_bit(BCH_FS_fsck_running, &c->flags)) {
269286
if (c->opts.errors != BCH_ON_ERROR_continue ||
270287
!(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) {
271288
prt_str(out, ", shutting down");

fs/bcachefs/error.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -108,13 +108,6 @@ struct fsck_err_state {
108108
char *last_msg;
109109
};
110110

111-
enum bch_fsck_flags {
112-
FSCK_CAN_FIX = 1 << 0,
113-
FSCK_CAN_IGNORE = 1 << 1,
114-
FSCK_NEED_FSCK = 1 << 2,
115-
FSCK_NO_RATELIMIT = 1 << 3,
116-
};
117-
118111
#define fsck_err_count(_c, _err) bch2_sb_err_count(_c, BCH_FSCK_ERR_##_err)
119112

120113
__printf(4, 5) __cold

fs/bcachefs/opts.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ enum fsck_err_opts {
137137
x(errors, u8, \
138138
OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
139139
OPT_STR(bch2_error_actions), \
140-
BCH_SB_ERROR_ACTION, BCH_ON_ERROR_ro, \
140+
BCH_SB_ERROR_ACTION, BCH_ON_ERROR_fix_safe, \
141141
NULL, "Action to take on filesystem error") \
142142
x(metadata_replicas, u8, \
143143
OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \

0 commit comments

Comments
 (0)