Skip to content

Commit 1f934b0

Browse files
ukernelidryomov
authored andcommitted
ceph: properly get capsnap's size in get_oldest_context()
capsnap's size is set by __ceph_finish_cap_snap(). If capsnap is under writing, its size is zero. In this case, get_oldest_context() should read i_size. Besides, ceph_writepages_start() should re-check capsnap's size after dirty pages get locked. Signed-off-by: "Yan, Zheng" <[email protected]> Signed-off-by: Ilya Dryomov <[email protected]>
1 parent b072d77 commit 1f934b0

File tree

1 file changed

+80
-57
lines changed

1 file changed

+80
-57
lines changed

fs/ceph/addr.c

Lines changed: 80 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -463,14 +463,20 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
463463
return rc;
464464
}
465465

466+
struct ceph_writeback_ctl
467+
{
468+
loff_t i_size;
469+
u64 truncate_size;
470+
u32 truncate_seq;
471+
bool size_stable;
472+
};
473+
466474
/*
467475
* Get ref for the oldest snapc for an inode with dirty data... that is, the
468476
* only snap context we are allowed to write back.
469477
*/
470-
static struct ceph_snap_context *get_oldest_context(struct inode *inode,
471-
loff_t *snap_size,
472-
u64 *truncate_size,
473-
u32 *truncate_seq)
478+
static struct ceph_snap_context *
479+
get_oldest_context(struct inode *inode, struct ceph_writeback_ctl *ctl)
474480
{
475481
struct ceph_inode_info *ci = ceph_inode(inode);
476482
struct ceph_snap_context *snapc = NULL;
@@ -482,28 +488,62 @@ static struct ceph_snap_context *get_oldest_context(struct inode *inode,
482488
capsnap->context, capsnap->dirty_pages);
483489
if (capsnap->dirty_pages) {
484490
snapc = ceph_get_snap_context(capsnap->context);
485-
if (snap_size)
486-
*snap_size = capsnap->size;
487-
if (truncate_size)
488-
*truncate_size = capsnap->truncate_size;
489-
if (truncate_seq)
490-
*truncate_seq = capsnap->truncate_seq;
491+
if (ctl) {
492+
if (capsnap->writing) {
493+
ctl->i_size = i_size_read(inode);
494+
ctl->size_stable = false;
495+
} else {
496+
ctl->i_size = capsnap->size;
497+
ctl->size_stable = true;
498+
}
499+
ctl->truncate_size = capsnap->truncate_size;
500+
ctl->truncate_seq = capsnap->truncate_seq;
501+
}
491502
break;
492503
}
493504
}
494505
if (!snapc && ci->i_wrbuffer_ref_head) {
495506
snapc = ceph_get_snap_context(ci->i_head_snapc);
496507
dout(" head snapc %p has %d dirty pages\n",
497508
snapc, ci->i_wrbuffer_ref_head);
498-
if (truncate_size)
499-
*truncate_size = ci->i_truncate_size;
500-
if (truncate_seq)
501-
*truncate_seq = ci->i_truncate_seq;
509+
if (ctl) {
510+
ctl->i_size = i_size_read(inode);
511+
ctl->truncate_size = ci->i_truncate_size;
512+
ctl->truncate_seq = ci->i_truncate_seq;
513+
ctl->size_stable = false;
514+
}
502515
}
503516
spin_unlock(&ci->i_ceph_lock);
504517
return snapc;
505518
}
506519

520+
static u64 get_writepages_data_length(struct inode *inode,
521+
struct page *page, u64 start)
522+
{
523+
struct ceph_inode_info *ci = ceph_inode(inode);
524+
struct ceph_snap_context *snapc = page_snap_context(page);
525+
struct ceph_cap_snap *capsnap = NULL;
526+
u64 end = i_size_read(inode);
527+
528+
if (snapc != ci->i_head_snapc) {
529+
bool found = false;
530+
spin_lock(&ci->i_ceph_lock);
531+
list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
532+
if (capsnap->context == snapc) {
533+
if (!capsnap->writing)
534+
end = capsnap->size;
535+
found = true;
536+
break;
537+
}
538+
}
539+
spin_unlock(&ci->i_ceph_lock);
540+
WARN_ON(!found);
541+
}
542+
if (end > page_offset(page) + PAGE_SIZE)
543+
end = page_offset(page) + PAGE_SIZE;
544+
return end > start ? end - start : 0;
545+
}
546+
507547
/*
508548
* Write a single page, but leave the page locked.
509549
*
@@ -515,30 +555,25 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
515555
struct inode *inode;
516556
struct ceph_inode_info *ci;
517557
struct ceph_fs_client *fsc;
518-
struct ceph_osd_client *osdc;
519558
struct ceph_snap_context *snapc, *oldest;
520559
loff_t page_off = page_offset(page);
521-
loff_t snap_size = -1;
522560
long writeback_stat;
523-
u64 truncate_size;
524-
u32 truncate_seq;
525561
int err, len = PAGE_SIZE;
562+
struct ceph_writeback_ctl ceph_wbc;
526563

527564
dout("writepage %p idx %lu\n", page, page->index);
528565

529566
inode = page->mapping->host;
530567
ci = ceph_inode(inode);
531568
fsc = ceph_inode_to_client(inode);
532-
osdc = &fsc->client->osdc;
533569

534570
/* verify this is a writeable snap context */
535571
snapc = page_snap_context(page);
536572
if (!snapc) {
537573
dout("writepage %p page %p not dirty?\n", inode, page);
538574
return 0;
539575
}
540-
oldest = get_oldest_context(inode, &snap_size,
541-
&truncate_size, &truncate_seq);
576+
oldest = get_oldest_context(inode, &ceph_wbc);
542577
if (snapc->seq > oldest->seq) {
543578
dout("writepage %p page %p snapc %p not writeable - noop\n",
544579
inode, page, snapc);
@@ -550,17 +585,14 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
550585
}
551586
ceph_put_snap_context(oldest);
552587

553-
if (snap_size == -1)
554-
snap_size = i_size_read(inode);
555-
556588
/* is this a partial page at end of file? */
557-
if (page_off >= snap_size) {
558-
dout("%p page eof %llu\n", page, snap_size);
589+
if (page_off >= ceph_wbc.i_size) {
590+
dout("%p page eof %llu\n", page, ceph_wbc.i_size);
559591
return 0;
560592
}
561593

562-
if (snap_size < page_off + len)
563-
len = snap_size - page_off;
594+
if (ceph_wbc.i_size < page_off + len)
595+
len = ceph_wbc.i_size - page_off;
564596

565597
dout("writepage %p page %p index %lu on %llu~%u snapc %p seq %lld\n",
566598
inode, page, page->index, page_off, len, snapc, snapc->seq);
@@ -571,10 +603,10 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
571603
set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
572604

573605
set_page_writeback(page);
574-
err = ceph_osdc_writepages(osdc, ceph_vino(inode),
575-
&ci->i_layout, snapc,
576-
page_off, len,
577-
truncate_seq, truncate_size,
606+
err = ceph_osdc_writepages(&fsc->client->osdc, ceph_vino(inode),
607+
&ci->i_layout, snapc, page_off, len,
608+
ceph_wbc.truncate_seq,
609+
ceph_wbc.truncate_size,
578610
&inode->i_mtime, &page, 1);
579611
if (err < 0) {
580612
struct writeback_control tmp_wbc;
@@ -745,9 +777,7 @@ static int ceph_writepages_start(struct address_space *mapping,
745777
int rc = 0;
746778
unsigned int wsize = i_blocksize(inode);
747779
struct ceph_osd_request *req = NULL;
748-
loff_t snap_size, i_size;
749-
u64 truncate_size;
750-
u32 truncate_seq;
780+
struct ceph_writeback_ctl ceph_wbc;
751781

752782
dout("writepages_start %p (mode=%s)\n", inode,
753783
wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
@@ -786,9 +816,7 @@ static int ceph_writepages_start(struct address_space *mapping,
786816
retry:
787817
/* find oldest snap context with dirty data */
788818
ceph_put_snap_context(snapc);
789-
snap_size = -1;
790-
snapc = get_oldest_context(inode, &snap_size,
791-
&truncate_size, &truncate_seq);
819+
snapc = get_oldest_context(inode, &ceph_wbc);
792820
if (!snapc) {
793821
/* hmm, why does writepages get called when there
794822
is no dirty data? */
@@ -798,8 +826,6 @@ static int ceph_writepages_start(struct address_space *mapping,
798826
dout(" oldest snapc is %p seq %lld (%d snaps)\n",
799827
snapc, snapc->seq, snapc->num_snaps);
800828

801-
i_size = i_size_read(inode);
802-
803829
if (last_snapc && snapc != last_snapc) {
804830
/* if we switched to a newer snapc, restart our scan at the
805831
* start of the original file range. */
@@ -865,10 +891,9 @@ static int ceph_writepages_start(struct address_space *mapping,
865891
dout("waiting on writeback %p\n", page);
866892
wait_on_page_writeback(page);
867893
}
868-
if (page_offset(page) >=
869-
(snap_size == -1 ? i_size : snap_size)) {
870-
dout("%p page eof %llu\n", page,
871-
(snap_size == -1 ? i_size : snap_size));
894+
if (page_offset(page) >= ceph_wbc.i_size) {
895+
dout("%p page eof %llu\n",
896+
page, ceph_wbc.i_size);
872897
done = 1;
873898
unlock_page(page);
874899
break;
@@ -996,10 +1021,9 @@ static int ceph_writepages_start(struct address_space *mapping,
9961021
req = ceph_osdc_new_request(&fsc->client->osdc,
9971022
&ci->i_layout, vino,
9981023
offset, &len, 0, num_ops,
999-
CEPH_OSD_OP_WRITE,
1000-
CEPH_OSD_FLAG_WRITE,
1001-
snapc, truncate_seq,
1002-
truncate_size, false);
1024+
CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE,
1025+
snapc, ceph_wbc.truncate_seq,
1026+
ceph_wbc.truncate_size, false);
10031027
if (IS_ERR(req)) {
10041028
req = ceph_osdc_new_request(&fsc->client->osdc,
10051029
&ci->i_layout, vino,
@@ -1008,8 +1032,8 @@ static int ceph_writepages_start(struct address_space *mapping,
10081032
CEPH_OSD_SLAB_OPS),
10091033
CEPH_OSD_OP_WRITE,
10101034
CEPH_OSD_FLAG_WRITE,
1011-
snapc, truncate_seq,
1012-
truncate_size, true);
1035+
snapc, ceph_wbc.truncate_seq,
1036+
ceph_wbc.truncate_size, true);
10131037
BUG_ON(IS_ERR(req));
10141038
}
10151039
BUG_ON(len < page_offset(pages[locked_pages - 1]) +
@@ -1046,14 +1070,15 @@ static int ceph_writepages_start(struct address_space *mapping,
10461070
len += PAGE_SIZE;
10471071
}
10481072

1049-
if (snap_size != -1) {
1050-
len = min(len, snap_size - offset);
1073+
if (ceph_wbc.size_stable) {
1074+
len = min(len, ceph_wbc.i_size - offset);
10511075
} else if (i == locked_pages) {
10521076
/* writepages_finish() clears writeback pages
10531077
* according to the data length, so make sure
10541078
* data length covers all locked pages */
10551079
u64 min_len = len + 1 - PAGE_SIZE;
1056-
len = min(len, (u64)i_size_read(inode) - offset);
1080+
len = get_writepages_data_length(inode, pages[i - 1],
1081+
offset);
10571082
len = max(len, min_len);
10581083
}
10591084
dout("writepages got pages at %llu~%llu\n", offset, len);
@@ -1137,8 +1162,7 @@ static int ceph_writepages_start(struct address_space *mapping,
11371162
static int context_is_writeable_or_written(struct inode *inode,
11381163
struct ceph_snap_context *snapc)
11391164
{
1140-
struct ceph_snap_context *oldest = get_oldest_context(inode, NULL,
1141-
NULL, NULL);
1165+
struct ceph_snap_context *oldest = get_oldest_context(inode, NULL);
11421166
int ret = !oldest || snapc->seq <= oldest->seq;
11431167

11441168
ceph_put_snap_context(oldest);
@@ -1183,8 +1207,7 @@ static int ceph_update_writeable_page(struct file *file,
11831207
* this page is already dirty in another (older) snap
11841208
* context! is it writeable now?
11851209
*/
1186-
oldest = get_oldest_context(inode, NULL, NULL, NULL);
1187-
1210+
oldest = get_oldest_context(inode, NULL);
11881211
if (snapc->seq > oldest->seq) {
11891212
ceph_put_snap_context(oldest);
11901213
dout(" page %p snapc %p not current or oldest\n",

0 commit comments

Comments
 (0)