Skip to content

Commit bd610c0

Browse files
adam900710kdave
authored andcommitted
btrfs: only unlock the to-be-submitted ranges inside a folio
[SUBPAGE COMPRESSION LIMITS] Currently inside writepage_delalloc(), if a delalloc range is going to be submitted asynchronously (inline or compression, the page dirty/writeback/unlock are all handled in at different time, not at the submission time), then we return 1 and extent_writepage() will skip the submission. This is fine if every sector matches page size, but if a sector is smaller than page size (aka, subpage case), then it can be very problematic, for example for the following 64K page: 0 16K 32K 48K 64K |/| |///////| |/| | | 4K 52K Where |/| is the dirty range we need to submit. In the above case, we need the following different handling for the 3 ranges: - [0, 4K) needs to be submitted for regular write A single sector cannot be compressed. - [16K, 32K) needs to be submitted for compressed write - [48K, 52K) needs to be submitted for regular write. Above, if we try to submit [16K, 32K) for compressed write, we will return 1 and immediately, and without submitting the remaining [48K, 52K) range. Furthermore, since extent_writepage() will exit without unlocking any sectors, the submitted range [0, 4K) will not have sector unlocked. That's the reason why for now subpage is only allowed for full page range. [ENHANCEMENT] - Introduce a submission bitmap at btrfs_bio_ctrl::submit_bitmap This records which sectors will be submitted by extent_writepage_io(). This allows us to track which sectors needs to be submitted thus later to be properly unlocked. For asynchronously submitted range (inline/compression), the corresponding bits will be cleared from that bitmap. - Only return 1 if no sector needs to be submitted in writepage_delalloc() - Only submit sectors marked by submission bitmap inside extent_writepage_io() So we won't touch the asynchronously submitted part. - Introduce btrfs_folio_end_writer_lock_bitmap() helper This will only unlock the involved sectors specified by @bitmap parameter, to avoid touching the range asynchronously submitted. Please note that, since subpage compression is still limited to page aligned range, this change is only a preparation for future sector perfect compression support for subpage. Signed-off-by: Qu Wenruo <[email protected]> Reviewed-by: David Sterba <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent 49a9907 commit bd610c0

File tree

3 files changed

+86
-38
lines changed

3 files changed

+86
-38
lines changed

fs/btrfs/extent_io.c

Lines changed: 51 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,13 @@ struct btrfs_bio_ctrl {
101101
blk_opf_t opf;
102102
btrfs_bio_end_io_t end_io_func;
103103
struct writeback_control *wbc;
104+
105+
/*
106+
* The sectors of the page which are going to be submitted by
107+
* extent_writepage_io().
108+
* This is to avoid touching ranges covered by compression/inline.
109+
*/
110+
unsigned long submit_bitmap;
104111
};
105112

106113
static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl)
@@ -1106,9 +1113,10 @@ int btrfs_read_folio(struct file *file, struct folio *folio)
11061113
*/
11071114
static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
11081115
struct folio *folio,
1109-
struct writeback_control *wbc)
1116+
struct btrfs_bio_ctrl *bio_ctrl)
11101117
{
11111118
struct btrfs_fs_info *fs_info = inode_to_fs_info(&inode->vfs_inode);
1119+
struct writeback_control *wbc = bio_ctrl->wbc;
11121120
const bool is_subpage = btrfs_is_subpage(fs_info, folio->mapping);
11131121
const u64 page_start = folio_pos(folio);
11141122
const u64 page_end = page_start + folio_size(folio) - 1;
@@ -1123,6 +1131,14 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
11231131
u64 delalloc_to_write = 0;
11241132
int ret = 0;
11251133

1134+
/* Save the dirty bitmap as our submission bitmap will be a subset of it. */
1135+
if (btrfs_is_subpage(fs_info, inode->vfs_inode.i_mapping)) {
1136+
ASSERT(fs_info->sectors_per_page > 1);
1137+
btrfs_get_subpage_dirty_bitmap(fs_info, folio, &bio_ctrl->submit_bitmap);
1138+
} else {
1139+
bio_ctrl->submit_bitmap = 1;
1140+
}
1141+
11261142
/* Lock all (subpage) delalloc ranges inside the folio first. */
11271143
while (delalloc_start < page_end) {
11281144
delalloc_end = page_end;
@@ -1190,22 +1206,18 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
11901206
}
11911207

11921208
/*
1193-
* We can hit btrfs_run_delalloc_range() with >0 return value.
1194-
*
1195-
* This happens when either the IO is already done and folio
1196-
* unlocked (inline) or the IO submission and folio unlock would
1197-
* be handled as async (compression).
1198-
*
1199-
* Inline is only possible for regular sectorsize for now.
1200-
*
1201-
* Compression is possible for both subpage and regular cases,
1202-
* but even for subpage compression only happens for page aligned
1203-
* range, thus the found delalloc range must go beyond current
1204-
* folio.
1209+
* We have some ranges that's going to be submitted asynchronously
1210+
* (compression or inline). These range have their own control
1211+
* on when to unlock the pages. We should not touch them
1212+
* anymore, so clear the range from the submission bitmap.
12051213
*/
1206-
if (ret > 0)
1207-
ASSERT(!is_subpage || found_start + found_len >= page_end);
1208-
1214+
if (ret > 0) {
1215+
unsigned int start_bit = (found_start - page_start) >>
1216+
fs_info->sectorsize_bits;
1217+
unsigned int end_bit = (min(page_end + 1, found_start + found_len) -
1218+
page_start) >> fs_info->sectorsize_bits;
1219+
bitmap_clear(&bio_ctrl->submit_bitmap, start_bit, end_bit - start_bit);
1220+
}
12091221
/*
12101222
* Above btrfs_run_delalloc_range() may have unlocked the folio,
12111223
* thus for the last range, we cannot touch the folio anymore.
@@ -1230,10 +1242,10 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
12301242
DIV_ROUND_UP(delalloc_end + 1 - page_start, PAGE_SIZE);
12311243

12321244
/*
1233-
* If btrfs_run_dealloc_range() already started I/O and unlocked
1234-
* the folios, we just need to account for them here.
1245+
* If all ranges are submitted asynchronously, we just need to account
1246+
* for them here.
12351247
*/
1236-
if (ret == 1) {
1248+
if (bitmap_empty(&bio_ctrl->submit_bitmap, fs_info->sectors_per_page)) {
12371249
wbc->nr_to_write -= delalloc_to_write;
12381250
return 1;
12391251
}
@@ -1331,15 +1343,6 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
13311343
{
13321344
struct btrfs_fs_info *fs_info = inode->root->fs_info;
13331345
unsigned long range_bitmap = 0;
1334-
/*
1335-
* This is the default value for sectorsize == PAGE_SIZE case.
1336-
* We known we need to write the dirty sector (aka the page),
1337-
* even if the page is not dirty (we cleared it before entering).
1338-
*
1339-
* For subpage cases we will get the correct bitmap later.
1340-
*/
1341-
unsigned long dirty_bitmap = 1;
1342-
unsigned int bitmap_size = 1;
13431346
bool submitted_io = false;
13441347
const u64 folio_start = folio_pos(folio);
13451348
u64 cur;
@@ -1357,18 +1360,14 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
13571360
return 1;
13581361
}
13591362

1360-
if (btrfs_is_subpage(fs_info, inode->vfs_inode.i_mapping)) {
1361-
ASSERT(fs_info->sectors_per_page > 1);
1362-
btrfs_get_subpage_dirty_bitmap(fs_info, folio, &dirty_bitmap);
1363-
bitmap_size = fs_info->sectors_per_page;
1364-
}
13651363
for (cur = start; cur < start + len; cur += fs_info->sectorsize)
13661364
set_bit((cur - folio_start) >> fs_info->sectorsize_bits, &range_bitmap);
1367-
bitmap_and(&dirty_bitmap, &dirty_bitmap, &range_bitmap, bitmap_size);
1365+
bitmap_and(&bio_ctrl->submit_bitmap, &bio_ctrl->submit_bitmap, &range_bitmap,
1366+
fs_info->sectors_per_page);
13681367

13691368
bio_ctrl->end_io_func = end_bbio_data_write;
13701369

1371-
for_each_set_bit(bit, &dirty_bitmap, bitmap_size) {
1370+
for_each_set_bit(bit, &bio_ctrl->submit_bitmap, fs_info->sectors_per_page) {
13721371
cur = folio_pos(folio) + (bit << fs_info->sectorsize_bits);
13731372

13741373
if (cur >= i_size) {
@@ -1421,6 +1420,7 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
14211420
static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl)
14221421
{
14231422
struct inode *inode = folio->mapping->host;
1423+
struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
14241424
const u64 page_start = folio_pos(folio);
14251425
int ret;
14261426
size_t pg_offset;
@@ -1442,11 +1442,16 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl
14421442
if (folio->index == end_index)
14431443
folio_zero_range(folio, pg_offset, folio_size(folio) - pg_offset);
14441444

1445+
/*
1446+
* Default to unlock the whole folio.
1447+
* The proper bitmap can only be initialized until writepage_delalloc().
1448+
*/
1449+
bio_ctrl->submit_bitmap = (unsigned long)-1;
14451450
ret = set_folio_extent_mapped(folio);
14461451
if (ret < 0)
14471452
goto done;
14481453

1449-
ret = writepage_delalloc(BTRFS_I(inode), folio, bio_ctrl->wbc);
1454+
ret = writepage_delalloc(BTRFS_I(inode), folio, bio_ctrl);
14501455
if (ret == 1)
14511456
return 0;
14521457
if (ret)
@@ -1466,8 +1471,11 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl
14661471
mapping_set_error(folio->mapping, ret);
14671472
}
14681473

1469-
btrfs_folio_end_writer_lock(inode_to_fs_info(inode), folio,
1470-
page_start, PAGE_SIZE);
1474+
/*
1475+
* Only unlock ranges that are submitted. As there can be some async
1476+
* submitted ranges inside the folio.
1477+
*/
1478+
btrfs_folio_end_writer_lock_bitmap(fs_info, folio, bio_ctrl->submit_bitmap);
14711479
ASSERT(ret <= 0);
14721480
return ret;
14731481
}
@@ -2210,6 +2218,11 @@ void extent_write_locked_range(struct inode *inode, const struct folio *locked_f
22102218
if (pages_dirty && folio != locked_folio)
22112219
ASSERT(folio_test_dirty(folio));
22122220

2221+
/*
2222+
* Set the submission bitmap to submit all sectors.
2223+
* extent_writepage_io() will do the truncation correctly.
2224+
*/
2225+
bio_ctrl.submit_bitmap = (unsigned long)-1;
22132226
ret = extent_writepage_io(BTRFS_I(inode), folio, cur, cur_len,
22142227
&bio_ctrl, i_size);
22152228
if (ret == 1)

fs/btrfs/subpage.c

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,39 @@ void btrfs_folio_end_writer_lock(const struct btrfs_fs_info *fs_info,
424424
folio_unlock(folio);
425425
}
426426

427+
void btrfs_folio_end_writer_lock_bitmap(const struct btrfs_fs_info *fs_info,
428+
struct folio *folio, unsigned long bitmap)
429+
{
430+
struct btrfs_subpage *subpage = folio_get_private(folio);
431+
const int start_bit = fs_info->sectors_per_page * btrfs_bitmap_nr_locked;
432+
unsigned long flags;
433+
bool last = false;
434+
int cleared = 0;
435+
int bit;
436+
437+
if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio->mapping)) {
438+
folio_unlock(folio);
439+
return;
440+
}
441+
442+
if (atomic_read(&subpage->writers) == 0) {
443+
/* No writers, locked by plain lock_page(). */
444+
folio_unlock(folio);
445+
return;
446+
}
447+
448+
spin_lock_irqsave(&subpage->lock, flags);
449+
for_each_set_bit(bit, &bitmap, fs_info->sectors_per_page) {
450+
if (test_and_clear_bit(bit + start_bit, subpage->bitmaps))
451+
cleared++;
452+
}
453+
ASSERT(atomic_read(&subpage->writers) >= cleared);
454+
last = atomic_sub_and_test(cleared, &subpage->writers);
455+
spin_unlock_irqrestore(&subpage->lock, flags);
456+
if (last)
457+
folio_unlock(folio);
458+
}
459+
427460
#define subpage_test_bitmap_all_set(fs_info, subpage, name) \
428461
bitmap_test_range_all_set(subpage->bitmaps, \
429462
fs_info->sectors_per_page * btrfs_bitmap_nr_##name, \

fs/btrfs/subpage.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,8 @@ void btrfs_folio_end_writer_lock(const struct btrfs_fs_info *fs_info,
106106
struct folio *folio, u64 start, u32 len);
107107
void btrfs_folio_set_writer_lock(const struct btrfs_fs_info *fs_info,
108108
struct folio *folio, u64 start, u32 len);
109+
void btrfs_folio_end_writer_lock_bitmap(const struct btrfs_fs_info *fs_info,
110+
struct folio *folio, unsigned long bitmap);
109111
bool btrfs_subpage_find_writer_locked(const struct btrfs_fs_info *fs_info,
110112
struct folio *folio, u64 search_start,
111113
u64 *found_start_ret, u32 *found_len_ret);

0 commit comments

Comments
 (0)