Skip to content

Commit 2759e05

Browse files
committed
Merge tag 'ceph-for-5.2-rc4' of git://github.com/ceph/ceph-client
Pull ceph fixes from Ilya Dryomov: "A change to call iput() asynchronously to avoid a possible deadlock when iput_final() needs to wait for in-flight I/O (e.g. readahead) and a fixup for a cleanup that went into -rc1" * tag 'ceph-for-5.2-rc4' of git://github.com/ceph/ceph-client: ceph: fix error handling in ceph_get_caps() ceph: avoid iput_final() while holding mutex or in dispatch thread ceph: single workqueue for inode related works
2 parents 8e61f6f + 7b2f936 commit 2759e05

File tree

8 files changed

+156
-135
lines changed

8 files changed

+156
-135
lines changed

fs/ceph/caps.c

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2738,15 +2738,13 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
27382738
_got = 0;
27392739
ret = try_get_cap_refs(ci, need, want, endoff,
27402740
false, &_got);
2741-
if (ret == -EAGAIN) {
2741+
if (ret == -EAGAIN)
27422742
continue;
2743-
} else if (!ret) {
2744-
int err;
2745-
2743+
if (!ret) {
27462744
DEFINE_WAIT_FUNC(wait, woken_wake_function);
27472745
add_wait_queue(&ci->i_cap_wq, &wait);
27482746

2749-
while (!(err = try_get_cap_refs(ci, need, want, endoff,
2747+
while (!(ret = try_get_cap_refs(ci, need, want, endoff,
27502748
true, &_got))) {
27512749
if (signal_pending(current)) {
27522750
ret = -ERESTARTSYS;
@@ -2756,14 +2754,16 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
27562754
}
27572755

27582756
remove_wait_queue(&ci->i_cap_wq, &wait);
2759-
if (err == -EAGAIN)
2757+
if (ret == -EAGAIN)
27602758
continue;
27612759
}
2762-
if (ret == -ESTALE) {
2763-
/* session was killed, try renew caps */
2764-
ret = ceph_renew_caps(&ci->vfs_inode);
2765-
if (ret == 0)
2766-
continue;
2760+
if (ret < 0) {
2761+
if (ret == -ESTALE) {
2762+
/* session was killed, try renew caps */
2763+
ret = ceph_renew_caps(&ci->vfs_inode);
2764+
if (ret == 0)
2765+
continue;
2766+
}
27672767
return ret;
27682768
}
27692769

@@ -2992,8 +2992,10 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
29922992
}
29932993
if (complete_capsnap)
29942994
wake_up_all(&ci->i_cap_wq);
2995-
while (put-- > 0)
2996-
iput(inode);
2995+
while (put-- > 0) {
2996+
/* avoid calling iput_final() in osd dispatch threads */
2997+
ceph_async_iput(inode);
2998+
}
29972999
}
29983000

29993001
/*
@@ -3964,8 +3966,9 @@ void ceph_handle_caps(struct ceph_mds_session *session,
39643966
done:
39653967
mutex_unlock(&session->s_mutex);
39663968
done_unlocked:
3967-
iput(inode);
39683969
ceph_put_string(extra_info.pool_ns);
3970+
/* avoid calling iput_final() in mds dispatch threads */
3971+
ceph_async_iput(inode);
39693972
return;
39703973

39713974
flush_cap_releases:
@@ -4011,7 +4014,8 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
40114014
if (inode) {
40124015
dout("check_delayed_caps on %p\n", inode);
40134016
ceph_check_caps(ci, flags, NULL);
4014-
iput(inode);
4017+
/* avoid calling iput_final() in tick thread */
4018+
ceph_async_iput(inode);
40154019
}
40164020
}
40174021
spin_unlock(&mdsc->cap_delay_lock);

fs/ceph/file.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -791,7 +791,7 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
791791
if (aio_work) {
792792
INIT_WORK(&aio_work->work, ceph_aio_retry_work);
793793
aio_work->req = req;
794-
queue_work(ceph_inode_to_client(inode)->wb_wq,
794+
queue_work(ceph_inode_to_client(inode)->inode_wq,
795795
&aio_work->work);
796796
return;
797797
}

fs/ceph/inode.c

Lines changed: 83 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,7 @@
3333

3434
static const struct inode_operations ceph_symlink_iops;
3535

36-
static void ceph_invalidate_work(struct work_struct *work);
37-
static void ceph_writeback_work(struct work_struct *work);
38-
static void ceph_vmtruncate_work(struct work_struct *work);
36+
static void ceph_inode_work(struct work_struct *work);
3937

4038
/*
4139
* find or create an inode, given the ceph ino number
@@ -509,10 +507,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
509507
INIT_LIST_HEAD(&ci->i_snap_realm_item);
510508
INIT_LIST_HEAD(&ci->i_snap_flush_item);
511509

512-
INIT_WORK(&ci->i_wb_work, ceph_writeback_work);
513-
INIT_WORK(&ci->i_pg_inv_work, ceph_invalidate_work);
514-
515-
INIT_WORK(&ci->i_vmtruncate_work, ceph_vmtruncate_work);
510+
INIT_WORK(&ci->i_work, ceph_inode_work);
511+
ci->i_work_mask = 0;
516512

517513
ceph_fscache_inode_init(ci);
518514

@@ -1480,7 +1476,8 @@ static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req,
14801476
pr_err("fill_inode badness on %p got %d\n", in, rc);
14811477
err = rc;
14821478
}
1483-
iput(in);
1479+
/* avoid calling iput_final() in mds dispatch threads */
1480+
ceph_async_iput(in);
14841481
}
14851482

14861483
return err;
@@ -1678,8 +1675,11 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
16781675
&req->r_caps_reservation);
16791676
if (ret < 0) {
16801677
pr_err("fill_inode badness on %p\n", in);
1681-
if (d_really_is_negative(dn))
1682-
iput(in);
1678+
if (d_really_is_negative(dn)) {
1679+
/* avoid calling iput_final() in mds
1680+
* dispatch threads */
1681+
ceph_async_iput(in);
1682+
}
16831683
d_drop(dn);
16841684
err = ret;
16851685
goto next_item;
@@ -1689,7 +1689,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
16891689
if (ceph_security_xattr_deadlock(in)) {
16901690
dout(" skip splicing dn %p to inode %p"
16911691
" (security xattr deadlock)\n", dn, in);
1692-
iput(in);
1692+
ceph_async_iput(in);
16931693
skipped++;
16941694
goto next_item;
16951695
}
@@ -1740,57 +1740,87 @@ bool ceph_inode_set_size(struct inode *inode, loff_t size)
17401740
return ret;
17411741
}
17421742

1743+
/*
1744+
* Put reference to inode, but avoid calling iput_final() in current thread.
1745+
* iput_final() may wait for reahahead pages. The wait can cause deadlock in
1746+
* some contexts.
1747+
*/
1748+
void ceph_async_iput(struct inode *inode)
1749+
{
1750+
if (!inode)
1751+
return;
1752+
for (;;) {
1753+
if (atomic_add_unless(&inode->i_count, -1, 1))
1754+
break;
1755+
if (queue_work(ceph_inode_to_client(inode)->inode_wq,
1756+
&ceph_inode(inode)->i_work))
1757+
break;
1758+
/* queue work failed, i_count must be at least 2 */
1759+
}
1760+
}
1761+
17431762
/*
17441763
* Write back inode data in a worker thread. (This can't be done
17451764
* in the message handler context.)
17461765
*/
17471766
void ceph_queue_writeback(struct inode *inode)
17481767
{
1768+
struct ceph_inode_info *ci = ceph_inode(inode);
1769+
set_bit(CEPH_I_WORK_WRITEBACK, &ci->i_work_mask);
1770+
17491771
ihold(inode);
1750-
if (queue_work(ceph_inode_to_client(inode)->wb_wq,
1751-
&ceph_inode(inode)->i_wb_work)) {
1772+
if (queue_work(ceph_inode_to_client(inode)->inode_wq,
1773+
&ci->i_work)) {
17521774
dout("ceph_queue_writeback %p\n", inode);
17531775
} else {
1754-
dout("ceph_queue_writeback %p failed\n", inode);
1776+
dout("ceph_queue_writeback %p already queued, mask=%lx\n",
1777+
inode, ci->i_work_mask);
17551778
iput(inode);
17561779
}
17571780
}
17581781

1759-
static void ceph_writeback_work(struct work_struct *work)
1760-
{
1761-
struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
1762-
i_wb_work);
1763-
struct inode *inode = &ci->vfs_inode;
1764-
1765-
dout("writeback %p\n", inode);
1766-
filemap_fdatawrite(&inode->i_data);
1767-
iput(inode);
1768-
}
1769-
17701782
/*
17711783
* queue an async invalidation
17721784
*/
17731785
void ceph_queue_invalidate(struct inode *inode)
17741786
{
1787+
struct ceph_inode_info *ci = ceph_inode(inode);
1788+
set_bit(CEPH_I_WORK_INVALIDATE_PAGES, &ci->i_work_mask);
1789+
17751790
ihold(inode);
1776-
if (queue_work(ceph_inode_to_client(inode)->pg_inv_wq,
1777-
&ceph_inode(inode)->i_pg_inv_work)) {
1791+
if (queue_work(ceph_inode_to_client(inode)->inode_wq,
1792+
&ceph_inode(inode)->i_work)) {
17781793
dout("ceph_queue_invalidate %p\n", inode);
17791794
} else {
1780-
dout("ceph_queue_invalidate %p failed\n", inode);
1795+
dout("ceph_queue_invalidate %p already queued, mask=%lx\n",
1796+
inode, ci->i_work_mask);
17811797
iput(inode);
17821798
}
17831799
}
17841800

17851801
/*
1786-
* Invalidate inode pages in a worker thread. (This can't be done
1787-
* in the message handler context.)
1802+
* Queue an async vmtruncate. If we fail to queue work, we will handle
1803+
* the truncation the next time we call __ceph_do_pending_vmtruncate.
17881804
*/
1789-
static void ceph_invalidate_work(struct work_struct *work)
1805+
void ceph_queue_vmtruncate(struct inode *inode)
17901806
{
1791-
struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
1792-
i_pg_inv_work);
1793-
struct inode *inode = &ci->vfs_inode;
1807+
struct ceph_inode_info *ci = ceph_inode(inode);
1808+
set_bit(CEPH_I_WORK_VMTRUNCATE, &ci->i_work_mask);
1809+
1810+
ihold(inode);
1811+
if (queue_work(ceph_inode_to_client(inode)->inode_wq,
1812+
&ci->i_work)) {
1813+
dout("ceph_queue_vmtruncate %p\n", inode);
1814+
} else {
1815+
dout("ceph_queue_vmtruncate %p already queued, mask=%lx\n",
1816+
inode, ci->i_work_mask);
1817+
iput(inode);
1818+
}
1819+
}
1820+
1821+
static void ceph_do_invalidate_pages(struct inode *inode)
1822+
{
1823+
struct ceph_inode_info *ci = ceph_inode(inode);
17941824
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
17951825
u32 orig_gen;
17961826
int check = 0;
@@ -1842,44 +1872,6 @@ static void ceph_invalidate_work(struct work_struct *work)
18421872
out:
18431873
if (check)
18441874
ceph_check_caps(ci, 0, NULL);
1845-
iput(inode);
1846-
}
1847-
1848-
1849-
/*
1850-
* called by trunc_wq;
1851-
*
1852-
* We also truncate in a separate thread as well.
1853-
*/
1854-
static void ceph_vmtruncate_work(struct work_struct *work)
1855-
{
1856-
struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
1857-
i_vmtruncate_work);
1858-
struct inode *inode = &ci->vfs_inode;
1859-
1860-
dout("vmtruncate_work %p\n", inode);
1861-
__ceph_do_pending_vmtruncate(inode);
1862-
iput(inode);
1863-
}
1864-
1865-
/*
1866-
* Queue an async vmtruncate. If we fail to queue work, we will handle
1867-
* the truncation the next time we call __ceph_do_pending_vmtruncate.
1868-
*/
1869-
void ceph_queue_vmtruncate(struct inode *inode)
1870-
{
1871-
struct ceph_inode_info *ci = ceph_inode(inode);
1872-
1873-
ihold(inode);
1874-
1875-
if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq,
1876-
&ci->i_vmtruncate_work)) {
1877-
dout("ceph_queue_vmtruncate %p\n", inode);
1878-
} else {
1879-
dout("ceph_queue_vmtruncate %p failed, pending=%d\n",
1880-
inode, ci->i_truncate_pending);
1881-
iput(inode);
1882-
}
18831875
}
18841876

18851877
/*
@@ -1943,6 +1935,25 @@ void __ceph_do_pending_vmtruncate(struct inode *inode)
19431935
wake_up_all(&ci->i_cap_wq);
19441936
}
19451937

1938+
static void ceph_inode_work(struct work_struct *work)
1939+
{
1940+
struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
1941+
i_work);
1942+
struct inode *inode = &ci->vfs_inode;
1943+
1944+
if (test_and_clear_bit(CEPH_I_WORK_WRITEBACK, &ci->i_work_mask)) {
1945+
dout("writeback %p\n", inode);
1946+
filemap_fdatawrite(&inode->i_data);
1947+
}
1948+
if (test_and_clear_bit(CEPH_I_WORK_INVALIDATE_PAGES, &ci->i_work_mask))
1949+
ceph_do_invalidate_pages(inode);
1950+
1951+
if (test_and_clear_bit(CEPH_I_WORK_VMTRUNCATE, &ci->i_work_mask))
1952+
__ceph_do_pending_vmtruncate(inode);
1953+
1954+
iput(inode);
1955+
}
1956+
19461957
/*
19471958
* symlinks
19481959
*/

0 commit comments

Comments
 (0)