Skip to content

Commit 0ecca62

Browse files
committed
Merge tag 'ceph-for-5.16-rc1' of git://github.com/ceph/ceph-client
Pull ceph updates from Ilya Dryomov: "One notable change here is that async creates and unlinks introduced in 5.7 are now enabled by default. This should greatly speed up things like rm, tar and rsync. To opt out, wsync mount option can be used. Other than that we have a pile of bug fixes all across the filesystem from Jeff, Xiubo and Kotresh and a metrics infrastructure rework from Luis" * tag 'ceph-for-5.16-rc1' of git://github.com/ceph/ceph-client: ceph: add a new metric to keep track of remote object copies libceph, ceph: move ceph_osdc_copy_from() into cephfs code ceph: clean-up metrics data structures to reduce code duplication ceph: split 'metric' debugfs file into several files ceph: return the real size read when it hits EOF ceph: properly handle statfs on multifs setups ceph: shut down mount on bad mdsmap or fsmap decode ceph: fix mdsmap decode when there are MDS's beyond max_mds ceph: ignore the truncate when size won't change with Fx caps issued ceph: don't rely on error_string to validate blocklisted session. ceph: just use ci->i_version for fscache aux info ceph: shut down access to inode when async create fails ceph: refactor remove_session_caps_cb ceph: fix auth cap handling logic in remove_session_caps_cb ceph: drop private list from remove_session_caps_cb ceph: don't use -ESTALE as special return code in try_get_cap_refs ceph: print inode numbers instead of pointer values ceph: enable async dirops by default libceph: drop ->monmap and err initialization ceph: convert to noop_direct_IO
2 parents a27c085 + c02cb7b commit 0ecca62

File tree

18 files changed

+544
-479
lines changed

18 files changed

+544
-479
lines changed

fs/ceph/addr.c

Lines changed: 12 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -725,7 +725,7 @@ static int ceph_writepages_start(struct address_space *mapping,
725725
wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
726726
(wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD"));
727727

728-
if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
728+
if (ceph_inode_is_shutdown(inode)) {
729729
if (ci->i_wrbuffer_ref > 0) {
730730
pr_warn_ratelimited(
731731
"writepage_start %p %lld forced umount\n",
@@ -1146,12 +1146,12 @@ static struct ceph_snap_context *
11461146
ceph_find_incompatible(struct page *page)
11471147
{
11481148
struct inode *inode = page->mapping->host;
1149-
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
11501149
struct ceph_inode_info *ci = ceph_inode(inode);
11511150

1152-
if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
1153-
dout(" page %p forced umount\n", page);
1154-
return ERR_PTR(-EIO);
1151+
if (ceph_inode_is_shutdown(inode)) {
1152+
dout(" page %p %llx:%llx is shutdown\n", page,
1153+
ceph_vinop(inode));
1154+
return ERR_PTR(-ESTALE);
11551155
}
11561156

11571157
for (;;) {
@@ -1312,17 +1312,6 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
13121312
return copied;
13131313
}
13141314

1315-
/*
1316-
* we set .direct_IO to indicate direct io is supported, but since we
1317-
* intercept O_DIRECT reads and writes early, this function should
1318-
* never get called.
1319-
*/
1320-
static ssize_t ceph_direct_io(struct kiocb *iocb, struct iov_iter *iter)
1321-
{
1322-
WARN_ON(1);
1323-
return -EINVAL;
1324-
}
1325-
13261315
const struct address_space_operations ceph_aops = {
13271316
.readpage = ceph_readpage,
13281317
.readahead = ceph_readahead,
@@ -1333,7 +1322,7 @@ const struct address_space_operations ceph_aops = {
13331322
.set_page_dirty = ceph_set_page_dirty,
13341323
.invalidatepage = ceph_invalidatepage,
13351324
.releasepage = ceph_releasepage,
1336-
.direct_IO = ceph_direct_io,
1325+
.direct_IO = noop_direct_IO,
13371326
};
13381327

13391328
static void ceph_block_sigs(sigset_t *oldset)
@@ -1362,6 +1351,9 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
13621351
sigset_t oldset;
13631352
vm_fault_t ret = VM_FAULT_SIGBUS;
13641353

1354+
if (ceph_inode_is_shutdown(inode))
1355+
return ret;
1356+
13651357
ceph_block_sigs(&oldset);
13661358

13671359
dout("filemap_fault %p %llx.%llx %llu trying to get caps\n",
@@ -1453,6 +1445,9 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
14531445
sigset_t oldset;
14541446
vm_fault_t ret = VM_FAULT_SIGBUS;
14551447

1448+
if (ceph_inode_is_shutdown(inode))
1449+
return ret;
1450+
14561451
prealloc_cf = ceph_alloc_cap_flush();
14571452
if (!prealloc_cf)
14581453
return VM_FAULT_OOM;

fs/ceph/cache.c

Lines changed: 3 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,6 @@
1212
#include "super.h"
1313
#include "cache.h"
1414

15-
struct ceph_aux_inode {
16-
u64 version;
17-
u64 mtime_sec;
18-
u64 mtime_nsec;
19-
};
20-
2115
struct fscache_netfs ceph_cache_netfs = {
2216
.name = "ceph",
2317
.version = 0,
@@ -109,20 +103,14 @@ static enum fscache_checkaux ceph_fscache_inode_check_aux(
109103
void *cookie_netfs_data, const void *data, uint16_t dlen,
110104
loff_t object_size)
111105
{
112-
struct ceph_aux_inode aux;
113106
struct ceph_inode_info* ci = cookie_netfs_data;
114107
struct inode* inode = &ci->vfs_inode;
115108

116-
if (dlen != sizeof(aux) ||
109+
if (dlen != sizeof(ci->i_version) ||
117110
i_size_read(inode) != object_size)
118111
return FSCACHE_CHECKAUX_OBSOLETE;
119112

120-
memset(&aux, 0, sizeof(aux));
121-
aux.version = ci->i_version;
122-
aux.mtime_sec = inode->i_mtime.tv_sec;
123-
aux.mtime_nsec = inode->i_mtime.tv_nsec;
124-
125-
if (memcmp(data, &aux, sizeof(aux)) != 0)
113+
if (*(u64 *)data != ci->i_version)
126114
return FSCACHE_CHECKAUX_OBSOLETE;
127115

128116
dout("ceph inode 0x%p cached okay\n", ci);
@@ -139,7 +127,6 @@ void ceph_fscache_register_inode_cookie(struct inode *inode)
139127
{
140128
struct ceph_inode_info *ci = ceph_inode(inode);
141129
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
142-
struct ceph_aux_inode aux;
143130

144131
/* No caching for filesystem */
145132
if (!fsc->fscache)
@@ -151,14 +138,10 @@ void ceph_fscache_register_inode_cookie(struct inode *inode)
151138

152139
inode_lock_nested(inode, I_MUTEX_CHILD);
153140
if (!ci->fscache) {
154-
memset(&aux, 0, sizeof(aux));
155-
aux.version = ci->i_version;
156-
aux.mtime_sec = inode->i_mtime.tv_sec;
157-
aux.mtime_nsec = inode->i_mtime.tv_nsec;
158141
ci->fscache = fscache_acquire_cookie(fsc->fscache,
159142
&ceph_fscache_inode_object_def,
160143
&ci->i_vino, sizeof(ci->i_vino),
161-
&aux, sizeof(aux),
144+
&ci->i_version, sizeof(ci->i_version),
162145
ci, i_size_read(inode), false);
163146
}
164147
inode_unlock(inode);

fs/ceph/caps.c

Lines changed: 134 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1188,11 +1188,11 @@ void ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
11881188

11891189
lockdep_assert_held(&ci->i_ceph_lock);
11901190

1191-
fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
1191+
fsc = ceph_inode_to_client(&ci->vfs_inode);
11921192
WARN_ON_ONCE(ci->i_auth_cap == cap &&
11931193
!list_empty(&ci->i_dirty_item) &&
11941194
!fsc->blocklisted &&
1195-
READ_ONCE(fsc->mount_state) != CEPH_MOUNT_SHUTDOWN);
1195+
!ceph_inode_is_shutdown(&ci->vfs_inode));
11961196

11971197
__ceph_remove_cap(cap, queue_release);
11981198
}
@@ -1968,8 +1968,8 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
19681968
}
19691969
}
19701970

1971-
dout("check_caps %p file_want %s used %s dirty %s flushing %s"
1972-
" issued %s revoking %s retain %s %s%s\n", inode,
1971+
dout("check_caps %llx.%llx file_want %s used %s dirty %s flushing %s"
1972+
" issued %s revoking %s retain %s %s%s\n", ceph_vinop(inode),
19731973
ceph_cap_string(file_wanted),
19741974
ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps),
19751975
ceph_cap_string(ci->i_flushing_caps),
@@ -1990,7 +1990,8 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
19901990
(revoking & (CEPH_CAP_FILE_CACHE|
19911991
CEPH_CAP_FILE_LAZYIO)) && /* or revoking cache */
19921992
!tried_invalidate) {
1993-
dout("check_caps trying to invalidate on %p\n", inode);
1993+
dout("check_caps trying to invalidate on %llx.%llx\n",
1994+
ceph_vinop(inode));
19941995
if (try_nonblocking_invalidate(inode) < 0) {
19951996
dout("check_caps queuing invalidate\n");
19961997
queue_invalidate = true;
@@ -2629,9 +2630,9 @@ void ceph_take_cap_refs(struct ceph_inode_info *ci, int got,
26292630
*
26302631
* Returns 0 if caps were not able to be acquired (yet), 1 if succeed,
26312632
* or a negative error code. There are 3 speical error codes:
2632-
* -EAGAIN: need to sleep but non-blocking is specified
2633-
* -EFBIG: ask caller to call check_max_size() and try again.
2634-
* -ESTALE: ask caller to call ceph_renew_caps() and try again.
2633+
* -EAGAIN: need to sleep but non-blocking is specified
2634+
* -EFBIG: ask caller to call check_max_size() and try again.
2635+
* -EUCLEAN: ask caller to call ceph_renew_caps() and try again.
26352636
*/
26362637
enum {
26372638
/* first 8 bits are reserved for CEPH_FILE_MODE_FOO */
@@ -2679,7 +2680,7 @@ static int try_get_cap_refs(struct inode *inode, int need, int want,
26792680
dout("get_cap_refs %p endoff %llu > maxsize %llu\n",
26802681
inode, endoff, ci->i_max_size);
26812682
if (endoff > ci->i_requested_max_size)
2682-
ret = ci->i_auth_cap ? -EFBIG : -ESTALE;
2683+
ret = ci->i_auth_cap ? -EFBIG : -EUCLEAN;
26832684
goto out_unlock;
26842685
}
26852686
/*
@@ -2749,17 +2750,17 @@ static int try_get_cap_refs(struct inode *inode, int need, int want,
27492750
goto out_unlock;
27502751
}
27512752

2752-
if (READ_ONCE(mdsc->fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
2753-
dout("get_cap_refs %p forced umount\n", inode);
2754-
ret = -EIO;
2753+
if (ceph_inode_is_shutdown(inode)) {
2754+
dout("get_cap_refs %p inode is shutdown\n", inode);
2755+
ret = -ESTALE;
27552756
goto out_unlock;
27562757
}
27572758
mds_wanted = __ceph_caps_mds_wanted(ci, false);
27582759
if (need & ~mds_wanted) {
27592760
dout("get_cap_refs %p need %s > mds_wanted %s\n",
27602761
inode, ceph_cap_string(need),
27612762
ceph_cap_string(mds_wanted));
2762-
ret = -ESTALE;
2763+
ret = -EUCLEAN;
27632764
goto out_unlock;
27642765
}
27652766

@@ -2843,7 +2844,7 @@ int ceph_try_get_caps(struct inode *inode, int need, int want,
28432844

28442845
ret = try_get_cap_refs(inode, need, want, 0, flags, got);
28452846
/* three special error codes */
2846-
if (ret == -EAGAIN || ret == -EFBIG || ret == -ESTALE)
2847+
if (ret == -EAGAIN || ret == -EFBIG || ret == -EUCLEAN)
28472848
ret = 0;
28482849
return ret;
28492850
}
@@ -2926,7 +2927,7 @@ int ceph_get_caps(struct file *filp, int need, int want, loff_t endoff, int *got
29262927
}
29272928

29282929
if (ret < 0) {
2929-
if (ret == -EFBIG || ret == -ESTALE) {
2930+
if (ret == -EFBIG || ret == -EUCLEAN) {
29302931
int ret2 = ceph_wait_on_async_create(inode);
29312932
if (ret2 < 0)
29322933
return ret2;
@@ -2935,7 +2936,7 @@ int ceph_get_caps(struct file *filp, int need, int want, loff_t endoff, int *got
29352936
check_max_size(inode, endoff);
29362937
continue;
29372938
}
2938-
if (ret == -ESTALE) {
2939+
if (ret == -EUCLEAN) {
29392940
/* session was killed, try renew caps */
29402941
ret = ceph_renew_caps(inode, flags);
29412942
if (ret == 0)
@@ -4315,7 +4316,7 @@ static void flush_dirty_session_caps(struct ceph_mds_session *s)
43154316
i_dirty_item);
43164317
inode = &ci->vfs_inode;
43174318
ihold(inode);
4318-
dout("flush_dirty_caps %p\n", inode);
4319+
dout("flush_dirty_caps %llx.%llx\n", ceph_vinop(inode));
43194320
spin_unlock(&mdsc->cap_dirty_lock);
43204321
ceph_check_caps(ci, CHECK_CAPS_FLUSH, NULL);
43214322
iput(inode);
@@ -4560,3 +4561,119 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
45604561
spin_unlock(&dentry->d_lock);
45614562
return ret;
45624563
}
4564+
4565+
static int remove_capsnaps(struct ceph_mds_client *mdsc, struct inode *inode)
4566+
{
4567+
struct ceph_inode_info *ci = ceph_inode(inode);
4568+
struct ceph_cap_snap *capsnap;
4569+
int capsnap_release = 0;
4570+
4571+
lockdep_assert_held(&ci->i_ceph_lock);
4572+
4573+
dout("removing capsnaps, ci is %p, inode is %p\n", ci, inode);
4574+
4575+
while (!list_empty(&ci->i_cap_snaps)) {
4576+
capsnap = list_first_entry(&ci->i_cap_snaps,
4577+
struct ceph_cap_snap, ci_item);
4578+
__ceph_remove_capsnap(inode, capsnap, NULL, NULL);
4579+
ceph_put_snap_context(capsnap->context);
4580+
ceph_put_cap_snap(capsnap);
4581+
capsnap_release++;
4582+
}
4583+
wake_up_all(&ci->i_cap_wq);
4584+
wake_up_all(&mdsc->cap_flushing_wq);
4585+
return capsnap_release;
4586+
}
4587+
4588+
int ceph_purge_inode_cap(struct inode *inode, struct ceph_cap *cap, bool *invalidate)
4589+
{
4590+
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
4591+
struct ceph_mds_client *mdsc = fsc->mdsc;
4592+
struct ceph_inode_info *ci = ceph_inode(inode);
4593+
bool is_auth;
4594+
bool dirty_dropped = false;
4595+
int iputs = 0;
4596+
4597+
lockdep_assert_held(&ci->i_ceph_lock);
4598+
4599+
dout("removing cap %p, ci is %p, inode is %p\n",
4600+
cap, ci, &ci->vfs_inode);
4601+
4602+
is_auth = (cap == ci->i_auth_cap);
4603+
__ceph_remove_cap(cap, false);
4604+
if (is_auth) {
4605+
struct ceph_cap_flush *cf;
4606+
4607+
if (ceph_inode_is_shutdown(inode)) {
4608+
if (inode->i_data.nrpages > 0)
4609+
*invalidate = true;
4610+
if (ci->i_wrbuffer_ref > 0)
4611+
mapping_set_error(&inode->i_data, -EIO);
4612+
}
4613+
4614+
spin_lock(&mdsc->cap_dirty_lock);
4615+
4616+
/* trash all of the cap flushes for this inode */
4617+
while (!list_empty(&ci->i_cap_flush_list)) {
4618+
cf = list_first_entry(&ci->i_cap_flush_list,
4619+
struct ceph_cap_flush, i_list);
4620+
list_del_init(&cf->g_list);
4621+
list_del_init(&cf->i_list);
4622+
if (!cf->is_capsnap)
4623+
ceph_free_cap_flush(cf);
4624+
}
4625+
4626+
if (!list_empty(&ci->i_dirty_item)) {
4627+
pr_warn_ratelimited(
4628+
" dropping dirty %s state for %p %lld\n",
4629+
ceph_cap_string(ci->i_dirty_caps),
4630+
inode, ceph_ino(inode));
4631+
ci->i_dirty_caps = 0;
4632+
list_del_init(&ci->i_dirty_item);
4633+
dirty_dropped = true;
4634+
}
4635+
if (!list_empty(&ci->i_flushing_item)) {
4636+
pr_warn_ratelimited(
4637+
" dropping dirty+flushing %s state for %p %lld\n",
4638+
ceph_cap_string(ci->i_flushing_caps),
4639+
inode, ceph_ino(inode));
4640+
ci->i_flushing_caps = 0;
4641+
list_del_init(&ci->i_flushing_item);
4642+
mdsc->num_cap_flushing--;
4643+
dirty_dropped = true;
4644+
}
4645+
spin_unlock(&mdsc->cap_dirty_lock);
4646+
4647+
if (dirty_dropped) {
4648+
mapping_set_error(inode->i_mapping, -EIO);
4649+
4650+
if (ci->i_wrbuffer_ref_head == 0 &&
4651+
ci->i_wr_ref == 0 &&
4652+
ci->i_dirty_caps == 0 &&
4653+
ci->i_flushing_caps == 0) {
4654+
ceph_put_snap_context(ci->i_head_snapc);
4655+
ci->i_head_snapc = NULL;
4656+
}
4657+
}
4658+
4659+
if (atomic_read(&ci->i_filelock_ref) > 0) {
4660+
/* make further file lock syscall return -EIO */
4661+
ci->i_ceph_flags |= CEPH_I_ERROR_FILELOCK;
4662+
pr_warn_ratelimited(" dropping file locks for %p %lld\n",
4663+
inode, ceph_ino(inode));
4664+
}
4665+
4666+
if (!ci->i_dirty_caps && ci->i_prealloc_cap_flush) {
4667+
cf = ci->i_prealloc_cap_flush;
4668+
ci->i_prealloc_cap_flush = NULL;
4669+
if (!cf->is_capsnap)
4670+
ceph_free_cap_flush(cf);
4671+
}
4672+
4673+
if (!list_empty(&ci->i_cap_snaps))
4674+
iputs = remove_capsnaps(mdsc, inode);
4675+
}
4676+
if (dirty_dropped)
4677+
++iputs;
4678+
return iputs;
4679+
}

0 commit comments

Comments
 (0)