Skip to content

Commit 2c5d8a8

Browse files
author
Kent Overstreet
committed
bcachefs: "Journal stuck" timeout now takes into account device latency
If a block device (e.g. your typical consumer SSD) is taking multiple seconds for IOs (typically flushes), we don't want to emit the "journal stuck" message prematurely. Also, make sure to drop the btree_trans srcu lock if we're blocking for more than a second. Signed-off-by: Kent Overstreet <[email protected]>
1 parent f917016 commit 2c5d8a8

File tree

3 files changed

+33
-8
lines changed

3 files changed

+33
-8
lines changed

fs/bcachefs/btree_trans_commit.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -348,7 +348,7 @@ static __always_inline int bch2_trans_journal_res_get(struct btree_trans *trans,
348348
unsigned flags)
349349
{
350350
return bch2_journal_res_get(&trans->c->journal, &trans->journal_res,
351-
trans->journal_u64s, flags);
351+
trans->journal_u64s, flags, trans);
352352
}
353353

354354
#define JSET_ENTRY_LOG_U64s 4

fs/bcachefs/journal.c

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -601,6 +601,16 @@ static int __journal_res_get(struct journal *j, struct journal_res *res,
601601
: -BCH_ERR_journal_res_get_blocked;
602602
}
603603

604+
static unsigned max_dev_latency(struct bch_fs *c)
605+
{
606+
u64 nsecs = 0;
607+
608+
for_each_rw_member(c, ca)
609+
nsecs = max(nsecs, ca->io_latency[WRITE].stats.max_duration);
610+
611+
return nsecs_to_jiffies(nsecs);
612+
}
613+
604614
/*
605615
* Essentially the entry function to the journaling code. When bcachefs is doing
606616
* a btree insert, it calls this function to get the current journal write.
@@ -612,17 +622,31 @@ static int __journal_res_get(struct journal *j, struct journal_res *res,
612622
* btree node write locks.
613623
*/
614624
int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
615-
unsigned flags)
625+
unsigned flags,
626+
struct btree_trans *trans)
616627
{
617628
int ret;
618629

619630
if (closure_wait_event_timeout(&j->async_wait,
620631
(ret = __journal_res_get(j, res, flags)) != -BCH_ERR_journal_res_get_blocked ||
621632
(flags & JOURNAL_RES_GET_NONBLOCK),
622-
HZ * 10))
633+
HZ))
623634
return ret;
624635

636+
if (trans)
637+
bch2_trans_unlock_long(trans);
638+
625639
struct bch_fs *c = container_of(j, struct bch_fs, journal);
640+
int remaining_wait = max(max_dev_latency(c) * 2, HZ * 10);
641+
642+
remaining_wait = max(0, remaining_wait - HZ);
643+
644+
if (closure_wait_event_timeout(&j->async_wait,
645+
(ret = __journal_res_get(j, res, flags)) != -BCH_ERR_journal_res_get_blocked ||
646+
(flags & JOURNAL_RES_GET_NONBLOCK),
647+
remaining_wait))
648+
return ret;
649+
626650
struct printbuf buf = PRINTBUF;
627651
bch2_journal_debug_to_text(&buf, j);
628652
bch_err(c, "Journal stuck? Waited for 10 seconds...\n%s",
@@ -727,7 +751,7 @@ int bch2_journal_flush_seq_async(struct journal *j, u64 seq,
727751
* livelock:
728752
*/
729753
sched_annotate_sleep();
730-
ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0);
754+
ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0, NULL);
731755
if (ret)
732756
return ret;
733757

@@ -848,7 +872,7 @@ bool bch2_journal_noflush_seq(struct journal *j, u64 start, u64 end)
848872
static int __bch2_journal_meta(struct journal *j)
849873
{
850874
struct journal_res res = {};
851-
int ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0);
875+
int ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0, NULL);
852876
if (ret)
853877
return ret;
854878

fs/bcachefs/journal.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -312,7 +312,7 @@ static inline void bch2_journal_res_put(struct journal *j,
312312
}
313313

314314
int bch2_journal_res_get_slowpath(struct journal *, struct journal_res *,
315-
unsigned);
315+
unsigned, struct btree_trans *);
316316

317317
/* First bits for BCH_WATERMARK: */
318318
enum journal_res_flags {
@@ -368,7 +368,8 @@ static inline int journal_res_get_fast(struct journal *j,
368368
}
369369

370370
static inline int bch2_journal_res_get(struct journal *j, struct journal_res *res,
371-
unsigned u64s, unsigned flags)
371+
unsigned u64s, unsigned flags,
372+
struct btree_trans *trans)
372373
{
373374
int ret;
374375

@@ -380,7 +381,7 @@ static inline int bch2_journal_res_get(struct journal *j, struct journal_res *re
380381
if (journal_res_get_fast(j, res, flags))
381382
goto out;
382383

383-
ret = bch2_journal_res_get_slowpath(j, res, flags);
384+
ret = bch2_journal_res_get_slowpath(j, res, flags, trans);
384385
if (ret)
385386
return ret;
386387
out:

0 commit comments

Comments
 (0)