Skip to content

Commit 20f8299

Browse files
author
Andreas Gruenbacher
committed
gfs2: Rework read and page fault locking
So far, gfs2 has taken the inode glocks inside the ->readpage and ->readahead address space operations. Since commit d438834 ("fs: convert mpage_readpages to mpage_readahead"), gfs2_readahead is passed the pages to read ahead locked. With that, the current holder of the inode glock may be trying to lock one of those pages while gfs2_readahead is trying to take the inode glock, resulting in a deadlock. Fix that by moving the lock taking to the higher-level ->read_iter file and ->fault vm operations. This also gets rid of an ugly lock inversion workaround in gfs2_readpage. The cache consistency model of filesystems like gfs2 is such that if data is found in the page cache, the data is up to date and can be used without taking any filesystem locks. If a page is not cached, filesystem locks must be taken before populating the page cache. To avoid taking the inode glock when the data is already cached, gfs2_file_read_iter first tries to read the data with the IOCB_NOIO flag set. If that fails, the inode glock is taken and the operation is retried with the IOCB_NOIO flag cleared. Signed-off-by: Andreas Gruenbacher <[email protected]>
1 parent 41da51b commit 20f8299

File tree

2 files changed

+51
-46
lines changed

2 files changed

+51
-46
lines changed

fs/gfs2/aops.c

Lines changed: 1 addition & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -468,21 +468,10 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
468468
}
469469

470470

471-
/**
472-
* __gfs2_readpage - readpage
473-
* @file: The file to read a page for
474-
* @page: The page to read
475-
*
476-
* This is the core of gfs2's readpage. It's used by the internal file
477-
* reading code as in that case we already hold the glock. Also it's
478-
* called by gfs2_readpage() once the required lock has been granted.
479-
*/
480-
481471
static int __gfs2_readpage(void *file, struct page *page)
482472
{
483473
struct gfs2_inode *ip = GFS2_I(page->mapping->host);
484474
struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
485-
486475
int error;
487476

488477
if (i_blocksize(page->mapping->host) == PAGE_SIZE &&
@@ -505,36 +494,11 @@ static int __gfs2_readpage(void *file, struct page *page)
505494
* gfs2_readpage - read a page of a file
506495
* @file: The file to read
507496
* @page: The page of the file
508-
*
509-
* This deals with the locking required. We have to unlock and
510-
* relock the page in order to get the locking in the right
511-
* order.
512497
*/
513498

514499
static int gfs2_readpage(struct file *file, struct page *page)
515500
{
516-
struct address_space *mapping = page->mapping;
517-
struct gfs2_inode *ip = GFS2_I(mapping->host);
518-
struct gfs2_holder gh;
519-
int error;
520-
521-
unlock_page(page);
522-
gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
523-
error = gfs2_glock_nq(&gh);
524-
if (unlikely(error))
525-
goto out;
526-
error = AOP_TRUNCATED_PAGE;
527-
lock_page(page);
528-
if (page->mapping == mapping && !PageUptodate(page))
529-
error = __gfs2_readpage(file, page);
530-
else
531-
unlock_page(page);
532-
gfs2_glock_dq(&gh);
533-
out:
534-
gfs2_holder_uninit(&gh);
535-
if (error && error != AOP_TRUNCATED_PAGE)
536-
lock_page(page);
537-
return error;
501+
return __gfs2_readpage(file, page);
538502
}
539503

540504
/**
@@ -598,16 +562,9 @@ static void gfs2_readahead(struct readahead_control *rac)
598562
{
599563
struct inode *inode = rac->mapping->host;
600564
struct gfs2_inode *ip = GFS2_I(inode);
601-
struct gfs2_holder gh;
602565

603-
gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
604-
if (gfs2_glock_nq(&gh))
605-
goto out_uninit;
606566
if (!gfs2_is_stuffed(ip))
607567
mpage_readahead(rac, gfs2_block_map);
608-
gfs2_glock_dq(&gh);
609-
out_uninit:
610-
gfs2_holder_uninit(&gh);
611568
}
612569

613570
/**

fs/gfs2/file.c

Lines changed: 50 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -558,8 +558,29 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf)
558558
return block_page_mkwrite_return(ret);
559559
}
560560

561+
static vm_fault_t gfs2_fault(struct vm_fault *vmf)
562+
{
563+
struct inode *inode = file_inode(vmf->vma->vm_file);
564+
struct gfs2_inode *ip = GFS2_I(inode);
565+
struct gfs2_holder gh;
566+
vm_fault_t ret;
567+
int err;
568+
569+
gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
570+
err = gfs2_glock_nq(&gh);
571+
if (err) {
572+
ret = block_page_mkwrite_return(err);
573+
goto out_uninit;
574+
}
575+
ret = filemap_fault(vmf);
576+
gfs2_glock_dq(&gh);
577+
out_uninit:
578+
gfs2_holder_uninit(&gh);
579+
return ret;
580+
}
581+
561582
static const struct vm_operations_struct gfs2_vm_ops = {
562-
.fault = filemap_fault,
583+
.fault = gfs2_fault,
563584
.map_pages = filemap_map_pages,
564585
.page_mkwrite = gfs2_page_mkwrite,
565586
};
@@ -824,6 +845,9 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
824845

825846
static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
826847
{
848+
struct gfs2_inode *ip;
849+
struct gfs2_holder gh;
850+
size_t written = 0;
827851
ssize_t ret;
828852

829853
if (iocb->ki_flags & IOCB_DIRECT) {
@@ -832,7 +856,31 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
832856
return ret;
833857
iocb->ki_flags &= ~IOCB_DIRECT;
834858
}
835-
return generic_file_read_iter(iocb, to);
859+
iocb->ki_flags |= IOCB_NOIO;
860+
ret = generic_file_read_iter(iocb, to);
861+
iocb->ki_flags &= ~IOCB_NOIO;
862+
if (ret >= 0) {
863+
if (!iov_iter_count(to))
864+
return ret;
865+
written = ret;
866+
} else {
867+
if (ret != -EAGAIN)
868+
return ret;
869+
if (iocb->ki_flags & IOCB_NOWAIT)
870+
return ret;
871+
}
872+
ip = GFS2_I(iocb->ki_filp->f_mapping->host);
873+
gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
874+
ret = gfs2_glock_nq(&gh);
875+
if (ret)
876+
goto out_uninit;
877+
ret = generic_file_read_iter(iocb, to);
878+
if (ret > 0)
879+
written += ret;
880+
gfs2_glock_dq(&gh);
881+
out_uninit:
882+
gfs2_holder_uninit(&gh);
883+
return written ? written : ret;
836884
}
837885

838886
/**

0 commit comments

Comments
 (0)