Skip to content

Commit a77ffbe

Browse files
author
Kent Overstreet
committed
bcachefs: btree node scan no longer uses btree cache
Previously, btree node scan used the btree node cache to check if btree nodes were readable, but this is subject to interference from threads scanning different devices trying to read the same node - and more critically, nodes that we already attempted and failed to read before kicking off scan. Instead, we now allocate a 'struct btree' that does not live in the btree node cache, and call bch2_btree_node_read_done() directly. Cc: Nikita Ofitserov <[email protected]> Reviewed-by: Nikita Ofitserov <[email protected]> Reported-and-tested-by: Edoardo Codeglia <[email protected]> Signed-off-by: Kent Overstreet <[email protected]>
1 parent c2b2c7d commit a77ffbe

File tree

1 file changed

+41
-43
lines changed

1 file changed

+41
-43
lines changed

fs/bcachefs/btree_node_scan.c

Lines changed: 41 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -75,39 +75,6 @@ static inline u64 bkey_journal_seq(struct bkey_s_c k)
7575
}
7676
}
7777

78-
static bool found_btree_node_is_readable(struct btree_trans *trans,
79-
struct found_btree_node *f)
80-
{
81-
struct { __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX); } tmp;
82-
83-
found_btree_node_to_key(&tmp.k, f);
84-
85-
struct btree *b = bch2_btree_node_get_noiter(trans, &tmp.k, f->btree_id, f->level, false);
86-
bool ret = !IS_ERR_OR_NULL(b);
87-
if (!ret)
88-
return ret;
89-
90-
f->sectors_written = b->written;
91-
f->journal_seq = le64_to_cpu(b->data->keys.journal_seq);
92-
93-
struct bkey_s_c k;
94-
struct bkey unpacked;
95-
struct btree_node_iter iter;
96-
for_each_btree_node_key_unpack(b, k, &iter, &unpacked)
97-
f->journal_seq = max(f->journal_seq, bkey_journal_seq(k));
98-
99-
six_unlock_read(&b->c.lock);
100-
101-
/*
102-
* We might update this node's range; if that happens, we need the node
103-
* to be re-read so the read path can trim keys that are no longer in
104-
* this node
105-
*/
106-
if (b != btree_node_root(trans->c, b))
107-
bch2_btree_node_evict(trans, &tmp.k);
108-
return ret;
109-
}
110-
11178
static int found_btree_node_cmp_cookie(const void *_l, const void *_r)
11279
{
11380
const struct found_btree_node *l = _l;
@@ -159,17 +126,17 @@ static const struct min_heap_callbacks found_btree_node_heap_cbs = {
159126
};
160127

161128
static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
162-
struct bio *bio, struct btree_node *bn, u64 offset)
129+
struct btree *b, struct bio *bio, u64 offset)
163130
{
164131
struct bch_fs *c = container_of(f, struct bch_fs, found_btree_nodes);
132+
struct btree_node *bn = b->data;
165133

166134
bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ);
167135
bio->bi_iter.bi_sector = offset;
168-
bch2_bio_map(bio, bn, PAGE_SIZE);
136+
bch2_bio_map(bio, b->data, c->opts.block_size);
169137

170138
u64 submit_time = local_clock();
171139
submit_bio_wait(bio);
172-
173140
bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read, submit_time, !bio->bi_status);
174141

175142
if (bio->bi_status) {
@@ -201,6 +168,14 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
201168
if (BTREE_NODE_ID(bn) >= BTREE_ID_NR_MAX)
202169
return;
203170

171+
bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ);
172+
bio->bi_iter.bi_sector = offset;
173+
bch2_bio_map(bio, b->data, c->opts.btree_node_size);
174+
175+
submit_time = local_clock();
176+
submit_bio_wait(bio);
177+
bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read, submit_time, !bio->bi_status);
178+
204179
rcu_read_lock();
205180
struct found_btree_node n = {
206181
.btree_id = BTREE_NODE_ID(bn),
@@ -217,7 +192,20 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
217192
};
218193
rcu_read_unlock();
219194

220-
if (bch2_trans_run(c, found_btree_node_is_readable(trans, &n))) {
195+
found_btree_node_to_key(&b->key, &n);
196+
197+
CLASS(printbuf, buf)();
198+
if (!bch2_btree_node_read_done(c, ca, b, NULL, &buf)) {
199+
/* read_done will swap out b->data for another buffer */
200+
bn = b->data;
201+
/*
202+
* Grab journal_seq here because we want the max journal_seq of
203+
* any bset; read_done sorts down to a single set and picks the
204+
* max journal_seq
205+
*/
206+
n.journal_seq = le64_to_cpu(bn->keys.journal_seq),
207+
n.sectors_written = b->written;
208+
221209
mutex_lock(&f->lock);
222210
if (BSET_BIG_ENDIAN(&bn->keys) != CPU_BIG_ENDIAN) {
223211
bch_err(c, "try_read_btree_node() can't handle endian conversion");
@@ -237,12 +225,20 @@ static int read_btree_nodes_worker(void *p)
237225
struct find_btree_nodes_worker *w = p;
238226
struct bch_fs *c = container_of(w->f, struct bch_fs, found_btree_nodes);
239227
struct bch_dev *ca = w->ca;
240-
void *buf = (void *) __get_free_page(GFP_KERNEL);
241-
struct bio *bio = bio_alloc(NULL, 1, 0, GFP_KERNEL);
242228
unsigned long last_print = jiffies;
229+
struct btree *b = NULL;
230+
struct bio *bio = NULL;
231+
232+
b = __bch2_btree_node_mem_alloc(c);
233+
if (!b) {
234+
bch_err(c, "read_btree_nodes_worker: error allocating buf");
235+
w->f->ret = -ENOMEM;
236+
goto err;
237+
}
243238

244-
if (!buf || !bio) {
245-
bch_err(c, "read_btree_nodes_worker: error allocating bio/buf");
239+
bio = bio_alloc(NULL, buf_pages(b->data, c->opts.btree_node_size), 0, GFP_KERNEL);
240+
if (!bio) {
241+
bch_err(c, "read_btree_nodes_worker: error allocating bio");
246242
w->f->ret = -ENOMEM;
247243
goto err;
248244
}
@@ -266,11 +262,13 @@ static int read_btree_nodes_worker(void *p)
266262
!bch2_dev_btree_bitmap_marked_sectors(ca, sector, btree_sectors(c)))
267263
continue;
268264

269-
try_read_btree_node(w->f, ca, bio, buf, sector);
265+
try_read_btree_node(w->f, ca, b, bio, sector);
270266
}
271267
err:
268+
if (b)
269+
__btree_node_data_free(b);
270+
kfree(b);
272271
bio_put(bio);
273-
free_page((unsigned long) buf);
274272
enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scan);
275273
closure_put(w->cl);
276274
kfree(w);

0 commit comments

Comments
 (0)