Skip to content

Commit 85d7ab2

Browse files
committed
Merge tag 'for-6.4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba: "Mostly core changes and cleanups, some notable fixes and two performance improvements in directory logging. The IO path cleanups are removing or refactoring old code, scrub main loop has been completely rewritten also refactoring old code. There are some changes to non-btrfs code, mostly trivial, the cgroup punt bio logic is only moved from generic code. Performance improvements: - improve logging changes in a directory during one transaction, avoid iterating over items and reduce lock contention (fsync time 4x lower) - when logging directory entries during one transaction, reduce locking of subvolume trees by checking tree-log instead (improvement in throughput and latency for concurrent access to a subvolume) Notable fixes: - dev-replace: - properly honor read mode when requested to avoid reading from source device - target device won't be used for eventual read repair, this is unreliable for NODATASUM files - when there are unpaired (and unrepairable) metadata during replace, exit early with error and don't try to finish whole operation - scrub ioctl properly rejects unknown flags - fix global block reserve calculations - fix partial direct io write when there's a page fault in the middle, iomap will try to continue with partial request but the btrfs part did not match that, this can lead to zeros written instead of data Core changes: - io path: - continued cleanups and refactoring around bio handling - extent io submit path simplifications and cleanups - flush write path simplifications and cleanups - rework logic of passing sync mode of bio, with further cleanups - rewrite scrub code flow, restructure how the stripes are enumerated and verified in a more unified way - allow to set lower threshold for block group reclaim in debug mode to aid zoned mode testing - remove obsolete time-based delayed ref throttling logic when truncating items - DREW locks are not using percpu variables anymore - more warning fixes (-Wmaybe-uninitialized) - u64 division simplifications - error handling improvements Non-btrfs code changes: - push cgroup punt bio logic to btrfs code (there was no other user of that), the functionality can be now selected separately by BLK_CGROUP_PUNT_BIO - crc32c_impl removed after removing last uses in btrfs code - add btrfs_assertfail() to objtool table" * tag 'for-6.4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (147 commits) btrfs: mark btrfs_assertfail() __noreturn btrfs: fix uninitialized variable warnings btrfs: use log root when iterating over index keys when logging directory btrfs: avoid iterating over all indexes when logging directory btrfs: dev-replace: error out if we have unrepaired metadata error during btrfs: remove pointless loop at btrfs_get_next_valid_item() btrfs: scrub: reject unsupported scrub flags btrfs: reinterpret async discard iops_limit=0 as no delay btrfs: set default discard iops_limit to 1000 btrfs: remove unused raid56 functions which were dedicated for scrub btrfs: scrub: remove scrub_bio structure btrfs: scrub: remove scrub_block and scrub_sector structures btrfs: scrub: remove the old scrub recheck code btrfs: scrub: remove the old writeback infrastructure btrfs: scrub: remove scrub_parity structure btrfs: scrub: use scrub_stripe to implement RAID56 P/Q scrub btrfs: scrub: switch scrub_simple_mirror() to scrub_stripe infrastructure btrfs: scrub: introduce helper to queue a stripe for scrub btrfs: scrub: introduce error reporting functionality for scrub_stripe btrfs: scrub: introduce a writeback helper for scrub_stripe ...
2 parents 94fc079 + f372463 commit 85d7ab2

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+2867
-4844
lines changed

block/Kconfig

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ config BLK_RQ_ALLOC_TIME
4141
config BLK_CGROUP_RWSTAT
4242
bool
4343

44+
config BLK_CGROUP_PUNT_BIO
45+
bool
46+
4447
config BLK_DEV_BSG_COMMON
4548
tristate
4649

block/blk-cgroup.c

Lines changed: 44 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@ static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];
5656
static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */
5757

5858
bool blkcg_debug_stats = false;
59-
static struct workqueue_struct *blkcg_punt_bio_wq;
6059

6160
#define BLKG_DESTROY_BATCH_SIZE 64
6261

@@ -166,7 +165,9 @@ static void __blkg_release(struct rcu_head *rcu)
166165
{
167166
struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head);
168167

168+
#ifdef CONFIG_BLK_CGROUP_PUNT_BIO
169169
WARN_ON(!bio_list_empty(&blkg->async_bios));
170+
#endif
170171

171172
/* release the blkcg and parent blkg refs this blkg has been holding */
172173
css_put(&blkg->blkcg->css);
@@ -188,6 +189,9 @@ static void blkg_release(struct percpu_ref *ref)
188189
call_rcu(&blkg->rcu_head, __blkg_release);
189190
}
190191

192+
#ifdef CONFIG_BLK_CGROUP_PUNT_BIO
193+
static struct workqueue_struct *blkcg_punt_bio_wq;
194+
191195
static void blkg_async_bio_workfn(struct work_struct *work)
192196
{
193197
struct blkcg_gq *blkg = container_of(work, struct blkcg_gq,
@@ -198,10 +202,10 @@ static void blkg_async_bio_workfn(struct work_struct *work)
198202
bool need_plug = false;
199203

200204
/* as long as there are pending bios, @blkg can't go away */
201-
spin_lock_bh(&blkg->async_bio_lock);
205+
spin_lock(&blkg->async_bio_lock);
202206
bio_list_merge(&bios, &blkg->async_bios);
203207
bio_list_init(&blkg->async_bios);
204-
spin_unlock_bh(&blkg->async_bio_lock);
208+
spin_unlock(&blkg->async_bio_lock);
205209

206210
/* start plug only when bio_list contains at least 2 bios */
207211
if (bios.head && bios.head->bi_next) {
@@ -214,6 +218,40 @@ static void blkg_async_bio_workfn(struct work_struct *work)
214218
blk_finish_plug(&plug);
215219
}
216220

221+
/*
222+
* When a shared kthread issues a bio for a cgroup, doing so synchronously can
223+
* lead to priority inversions as the kthread can be trapped waiting for that
224+
* cgroup. Use this helper instead of submit_bio to punt the actual issuing to
225+
* a dedicated per-blkcg work item to avoid such priority inversions.
226+
*/
227+
void blkcg_punt_bio_submit(struct bio *bio)
228+
{
229+
struct blkcg_gq *blkg = bio->bi_blkg;
230+
231+
if (blkg->parent) {
232+
spin_lock(&blkg->async_bio_lock);
233+
bio_list_add(&blkg->async_bios, bio);
234+
spin_unlock(&blkg->async_bio_lock);
235+
queue_work(blkcg_punt_bio_wq, &blkg->async_bio_work);
236+
} else {
237+
/* never bounce for the root cgroup */
238+
submit_bio(bio);
239+
}
240+
}
241+
EXPORT_SYMBOL_GPL(blkcg_punt_bio_submit);
242+
243+
static int __init blkcg_punt_bio_init(void)
244+
{
245+
blkcg_punt_bio_wq = alloc_workqueue("blkcg_punt_bio",
246+
WQ_MEM_RECLAIM | WQ_FREEZABLE |
247+
WQ_UNBOUND | WQ_SYSFS, 0);
248+
if (!blkcg_punt_bio_wq)
249+
return -ENOMEM;
250+
return 0;
251+
}
252+
subsys_initcall(blkcg_punt_bio_init);
253+
#endif /* CONFIG_BLK_CGROUP_PUNT_BIO */
254+
217255
/**
218256
* bio_blkcg_css - return the blkcg CSS associated with a bio
219257
* @bio: target bio
@@ -269,10 +307,12 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk,
269307

270308
blkg->q = disk->queue;
271309
INIT_LIST_HEAD(&blkg->q_node);
310+
blkg->blkcg = blkcg;
311+
#ifdef CONFIG_BLK_CGROUP_PUNT_BIO
272312
spin_lock_init(&blkg->async_bio_lock);
273313
bio_list_init(&blkg->async_bios);
274314
INIT_WORK(&blkg->async_bio_work, blkg_async_bio_workfn);
275-
blkg->blkcg = blkcg;
315+
#endif
276316

277317
u64_stats_init(&blkg->iostat.sync);
278318
for_each_possible_cpu(cpu) {
@@ -1688,25 +1728,6 @@ void blkcg_policy_unregister(struct blkcg_policy *pol)
16881728
}
16891729
EXPORT_SYMBOL_GPL(blkcg_policy_unregister);
16901730

1691-
bool __blkcg_punt_bio_submit(struct bio *bio)
1692-
{
1693-
struct blkcg_gq *blkg = bio->bi_blkg;
1694-
1695-
/* consume the flag first */
1696-
bio->bi_opf &= ~REQ_CGROUP_PUNT;
1697-
1698-
/* never bounce for the root cgroup */
1699-
if (!blkg->parent)
1700-
return false;
1701-
1702-
spin_lock_bh(&blkg->async_bio_lock);
1703-
bio_list_add(&blkg->async_bios, bio);
1704-
spin_unlock_bh(&blkg->async_bio_lock);
1705-
1706-
queue_work(blkcg_punt_bio_wq, &blkg->async_bio_work);
1707-
return true;
1708-
}
1709-
17101731
/*
17111732
* Scale the accumulated delay based on how long it has been since we updated
17121733
* the delay. We only call this when we are adding delay, in case it's been a
@@ -2085,16 +2106,5 @@ bool blk_cgroup_congested(void)
20852106
return ret;
20862107
}
20872108

2088-
static int __init blkcg_init(void)
2089-
{
2090-
blkcg_punt_bio_wq = alloc_workqueue("blkcg_punt_bio",
2091-
WQ_MEM_RECLAIM | WQ_FREEZABLE |
2092-
WQ_UNBOUND | WQ_SYSFS, 0);
2093-
if (!blkcg_punt_bio_wq)
2094-
return -ENOMEM;
2095-
return 0;
2096-
}
2097-
subsys_initcall(blkcg_init);
2098-
20992109
module_param(blkcg_debug_stats, bool, 0644);
21002110
MODULE_PARM_DESC(blkcg_debug_stats, "True if you want debug stats, false if not");

block/blk-cgroup.h

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,10 @@ struct blkcg_gq {
7272
struct blkg_iostat_set iostat;
7373

7474
struct blkg_policy_data *pd[BLKCG_MAX_POLS];
75-
75+
#ifdef CONFIG_BLK_CGROUP_PUNT_BIO
7676
spinlock_t async_bio_lock;
7777
struct bio_list async_bios;
78+
#endif
7879
union {
7980
struct work_struct async_bio_work;
8081
struct work_struct free_work;
@@ -375,16 +376,6 @@ static inline void blkg_put(struct blkcg_gq *blkg)
375376
if (((d_blkg) = blkg_lookup(css_to_blkcg(pos_css), \
376377
(p_blkg)->q)))
377378

378-
bool __blkcg_punt_bio_submit(struct bio *bio);
379-
380-
static inline bool blkcg_punt_bio_submit(struct bio *bio)
381-
{
382-
if (bio->bi_opf & REQ_CGROUP_PUNT)
383-
return __blkcg_punt_bio_submit(bio);
384-
else
385-
return false;
386-
}
387-
388379
static inline void blkcg_bio_issue_init(struct bio *bio)
389380
{
390381
bio_issue_init(&bio->bi_issue, bio_sectors(bio));
@@ -506,8 +497,6 @@ static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return
506497
static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; }
507498
static inline void blkg_get(struct blkcg_gq *blkg) { }
508499
static inline void blkg_put(struct blkcg_gq *blkg) { }
509-
510-
static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; }
511500
static inline void blkcg_bio_issue_init(struct bio *bio) { }
512501
static inline void blk_cgroup_bio_start(struct bio *bio) { }
513502
static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) { return true; }

block/blk-core.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -830,9 +830,6 @@ EXPORT_SYMBOL(submit_bio_noacct);
830830
*/
831831
void submit_bio(struct bio *bio)
832832
{
833-
if (blkcg_punt_bio_submit(bio))
834-
return;
835-
836833
if (bio_op(bio) == REQ_OP_READ) {
837834
task_io_account_read(bio->bi_iter.bi_size);
838835
count_vm_events(PGPGIN, bio_sectors(bio));

fs/btrfs/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
config BTRFS_FS
44
tristate "Btrfs filesystem support"
5+
select BLK_CGROUP_PUNT_BIO
56
select CRYPTO
67
select CRYPTO_CRC32C
78
select LIBCRC32C

0 commit comments

Comments
 (0)