Skip to content

Commit 37d4e84

Browse files
committed
Merge tag 'ceph-for-5.5-rc2' of git://github.com/ceph/ceph-client
Pull ceph fixes from Ilya Dryomov: "A fix to avoid a corner case when scheduling cap reclaim in batches from Xiubo, a patch to add some observability into cap waiters from Jeff and a couple of cleanups" * tag 'ceph-for-5.5-rc2' of git://github.com/ceph/ceph-client: ceph: add more debug info when decoding mdsmap ceph: switch to global cap helper ceph: trigger the reclaim work once there has enough pending caps ceph: show tasks waiting on caps in debugfs caps file ceph: convert int fields in ceph_mount_options to unsigned int
2 parents ae4b064 + da08e1e commit 37d4e84

File tree

7 files changed

+85
-42
lines changed

7 files changed

+85
-42
lines changed

fs/ceph/caps.c

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1011,18 +1011,13 @@ static int __ceph_is_single_caps(struct ceph_inode_info *ci)
10111011
return rb_first(&ci->i_caps) == rb_last(&ci->i_caps);
10121012
}
10131013

1014-
static int __ceph_is_any_caps(struct ceph_inode_info *ci)
1015-
{
1016-
return !RB_EMPTY_ROOT(&ci->i_caps);
1017-
}
1018-
10191014
int ceph_is_any_caps(struct inode *inode)
10201015
{
10211016
struct ceph_inode_info *ci = ceph_inode(inode);
10221017
int ret;
10231018

10241019
spin_lock(&ci->i_ceph_lock);
1025-
ret = __ceph_is_any_caps(ci);
1020+
ret = __ceph_is_any_real_caps(ci);
10261021
spin_unlock(&ci->i_ceph_lock);
10271022

10281023
return ret;
@@ -1099,15 +1094,16 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
10991094
if (removed)
11001095
ceph_put_cap(mdsc, cap);
11011096

1102-
/* when reconnect denied, we remove session caps forcibly,
1103-
* i_wr_ref can be non-zero. If there are ongoing write,
1104-
* keep i_snap_realm.
1105-
*/
1106-
if (!__ceph_is_any_caps(ci) && ci->i_wr_ref == 0 && ci->i_snap_realm)
1107-
drop_inode_snap_realm(ci);
1097+
if (!__ceph_is_any_real_caps(ci)) {
1098+
/* when reconnect denied, we remove session caps forcibly,
1099+
* i_wr_ref can be non-zero. If there are ongoing write,
1100+
* keep i_snap_realm.
1101+
*/
1102+
if (ci->i_wr_ref == 0 && ci->i_snap_realm)
1103+
drop_inode_snap_realm(ci);
11081104

1109-
if (!__ceph_is_any_real_caps(ci))
11101105
__cap_delay_cancel(mdsc, ci);
1106+
}
11111107
}
11121108

11131109
struct cap_msg_args {
@@ -2764,7 +2760,19 @@ int ceph_get_caps(struct file *filp, int need, int want,
27642760
if (ret == -EAGAIN)
27652761
continue;
27662762
if (!ret) {
2763+
struct ceph_mds_client *mdsc = fsc->mdsc;
2764+
struct cap_wait cw;
27672765
DEFINE_WAIT_FUNC(wait, woken_wake_function);
2766+
2767+
cw.ino = inode->i_ino;
2768+
cw.tgid = current->tgid;
2769+
cw.need = need;
2770+
cw.want = want;
2771+
2772+
spin_lock(&mdsc->caps_list_lock);
2773+
list_add(&cw.list, &mdsc->cap_wait_list);
2774+
spin_unlock(&mdsc->caps_list_lock);
2775+
27682776
add_wait_queue(&ci->i_cap_wq, &wait);
27692777

27702778
flags |= NON_BLOCKING;
@@ -2778,6 +2786,11 @@ int ceph_get_caps(struct file *filp, int need, int want,
27782786
}
27792787

27802788
remove_wait_queue(&ci->i_cap_wq, &wait);
2789+
2790+
spin_lock(&mdsc->caps_list_lock);
2791+
list_del(&cw.list);
2792+
spin_unlock(&mdsc->caps_list_lock);
2793+
27812794
if (ret == -EAGAIN)
27822795
continue;
27832796
}
@@ -2928,7 +2941,7 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
29282941
ci->i_head_snapc = NULL;
29292942
}
29302943
/* see comment in __ceph_remove_cap() */
2931-
if (!__ceph_is_any_caps(ci) && ci->i_snap_realm)
2944+
if (!__ceph_is_any_real_caps(ci) && ci->i_snap_realm)
29322945
drop_inode_snap_realm(ci);
29332946
}
29342947
spin_unlock(&ci->i_ceph_lock);

fs/ceph/debugfs.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ static int caps_show(struct seq_file *s, void *p)
139139
struct ceph_fs_client *fsc = s->private;
140140
struct ceph_mds_client *mdsc = fsc->mdsc;
141141
int total, avail, used, reserved, min, i;
142+
struct cap_wait *cw;
142143

143144
ceph_reservation_status(fsc, &total, &avail, &used, &reserved, &min);
144145
seq_printf(s, "total\t\t%d\n"
@@ -166,6 +167,18 @@ static int caps_show(struct seq_file *s, void *p)
166167
}
167168
mutex_unlock(&mdsc->mutex);
168169

170+
seq_printf(s, "\n\nWaiters:\n--------\n");
171+
seq_printf(s, "tgid ino need want\n");
172+
seq_printf(s, "-----------------------------------------------------\n");
173+
174+
spin_lock(&mdsc->caps_list_lock);
175+
list_for_each_entry(cw, &mdsc->cap_wait_list, list) {
176+
seq_printf(s, "%-13d0x%-17lx%-17s%-17s\n", cw->tgid, cw->ino,
177+
ceph_cap_string(cw->need),
178+
ceph_cap_string(cw->want));
179+
}
180+
spin_unlock(&mdsc->caps_list_lock);
181+
169182
return 0;
170183
}
171184

fs/ceph/mds_client.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2015,7 +2015,7 @@ void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr)
20152015
if (!nr)
20162016
return;
20172017
val = atomic_add_return(nr, &mdsc->cap_reclaim_pending);
2018-
if (!(val % CEPH_CAPS_PER_RELEASE)) {
2018+
if ((val % CEPH_CAPS_PER_RELEASE) < nr) {
20192019
atomic_set(&mdsc->cap_reclaim_pending, 0);
20202020
ceph_queue_cap_reclaim_work(mdsc);
20212021
}
@@ -2032,12 +2032,13 @@ int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req,
20322032
struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
20332033
struct ceph_mount_options *opt = req->r_mdsc->fsc->mount_options;
20342034
size_t size = sizeof(struct ceph_mds_reply_dir_entry);
2035-
int order, num_entries;
2035+
unsigned int num_entries;
2036+
int order;
20362037

20372038
spin_lock(&ci->i_ceph_lock);
20382039
num_entries = ci->i_files + ci->i_subdirs;
20392040
spin_unlock(&ci->i_ceph_lock);
2040-
num_entries = max(num_entries, 1);
2041+
num_entries = max(num_entries, 1U);
20412042
num_entries = min(num_entries, opt->max_readdir);
20422043

20432044
order = get_order(size * num_entries);
@@ -4168,6 +4169,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
41684169
INIT_DELAYED_WORK(&mdsc->delayed_work, delayed_work);
41694170
mdsc->last_renew_caps = jiffies;
41704171
INIT_LIST_HEAD(&mdsc->cap_delay_list);
4172+
INIT_LIST_HEAD(&mdsc->cap_wait_list);
41714173
spin_lock_init(&mdsc->cap_delay_lock);
41724174
INIT_LIST_HEAD(&mdsc->snap_flush_list);
41734175
spin_lock_init(&mdsc->snap_flush_lock);

fs/ceph/mds_client.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,14 @@ struct ceph_quotarealm_inode {
340340
struct inode *inode;
341341
};
342342

343+
struct cap_wait {
344+
struct list_head list;
345+
unsigned long ino;
346+
pid_t tgid;
347+
int need;
348+
int want;
349+
};
350+
343351
/*
344352
* mds client state
345353
*/
@@ -416,6 +424,7 @@ struct ceph_mds_client {
416424
spinlock_t caps_list_lock;
417425
struct list_head caps_list; /* unused (reserved or
418426
unreserved) */
427+
struct list_head cap_wait_list;
419428
int caps_total_count; /* total caps allocated */
420429
int caps_use_count; /* in use */
421430
int caps_use_max; /* max used caps */

fs/ceph/mdsmap.c

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
158158
void *pexport_targets = NULL;
159159
struct ceph_timespec laggy_since;
160160
struct ceph_mds_info *info;
161+
bool laggy;
161162

162163
ceph_decode_need(p, end, sizeof(u64) + 1, bad);
163164
global_id = ceph_decode_64(p);
@@ -190,6 +191,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
190191
if (err)
191192
goto corrupt;
192193
ceph_decode_copy(p, &laggy_since, sizeof(laggy_since));
194+
laggy = laggy_since.tv_sec != 0 || laggy_since.tv_nsec != 0;
193195
*p += sizeof(u32);
194196
ceph_decode_32_safe(p, end, namelen, bad);
195197
*p += namelen;
@@ -207,10 +209,11 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
207209
*p = info_end;
208210
}
209211

210-
dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s\n",
212+
dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s%s\n",
211213
i+1, n, global_id, mds, inc,
212214
ceph_pr_addr(&addr),
213-
ceph_mds_state_name(state));
215+
ceph_mds_state_name(state),
216+
laggy ? "(laggy)" : "");
214217

215218
if (mds < 0 || state <= 0)
216219
continue;
@@ -230,8 +233,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
230233
info->global_id = global_id;
231234
info->state = state;
232235
info->addr = addr;
233-
info->laggy = (laggy_since.tv_sec != 0 ||
234-
laggy_since.tv_nsec != 0);
236+
info->laggy = laggy;
235237
info->num_export_targets = num_export_targets;
236238
if (num_export_targets) {
237239
info->export_targets = kcalloc(num_export_targets,
@@ -355,6 +357,8 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
355357
m->m_damaged = false;
356358
}
357359
bad_ext:
360+
dout("mdsmap_decode m_enabled: %d, m_damaged: %d, m_num_laggy: %d\n",
361+
!!m->m_enabled, !!m->m_damaged, m->m_num_laggy);
358362
*p = end;
359363
dout("mdsmap_decode success epoch %u\n", m->m_epoch);
360364
return m;

fs/ceph/super.c

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -172,10 +172,10 @@ static const struct fs_parameter_enum ceph_mount_param_enums[] = {
172172
static const struct fs_parameter_spec ceph_mount_param_specs[] = {
173173
fsparam_flag_no ("acl", Opt_acl),
174174
fsparam_flag_no ("asyncreaddir", Opt_asyncreaddir),
175-
fsparam_u32 ("caps_max", Opt_caps_max),
175+
fsparam_s32 ("caps_max", Opt_caps_max),
176176
fsparam_u32 ("caps_wanted_delay_max", Opt_caps_wanted_delay_max),
177177
fsparam_u32 ("caps_wanted_delay_min", Opt_caps_wanted_delay_min),
178-
fsparam_s32 ("write_congestion_kb", Opt_congestion_kb),
178+
fsparam_u32 ("write_congestion_kb", Opt_congestion_kb),
179179
fsparam_flag_no ("copyfrom", Opt_copyfrom),
180180
fsparam_flag_no ("dcache", Opt_dcache),
181181
fsparam_flag_no ("dirstat", Opt_dirstat),
@@ -187,8 +187,8 @@ static const struct fs_parameter_spec ceph_mount_param_specs[] = {
187187
fsparam_flag_no ("quotadf", Opt_quotadf),
188188
fsparam_u32 ("rasize", Opt_rasize),
189189
fsparam_flag_no ("rbytes", Opt_rbytes),
190-
fsparam_s32 ("readdir_max_bytes", Opt_readdir_max_bytes),
191-
fsparam_s32 ("readdir_max_entries", Opt_readdir_max_entries),
190+
fsparam_u32 ("readdir_max_bytes", Opt_readdir_max_bytes),
191+
fsparam_u32 ("readdir_max_entries", Opt_readdir_max_entries),
192192
fsparam_enum ("recover_session", Opt_recover_session),
193193
fsparam_flag_no ("require_active_mds", Opt_require_active_mds),
194194
fsparam_u32 ("rsize", Opt_rsize),
@@ -328,7 +328,9 @@ static int ceph_parse_mount_param(struct fs_context *fc,
328328
fsopt->caps_wanted_delay_max = result.uint_32;
329329
break;
330330
case Opt_caps_max:
331-
fsopt->caps_max = result.uint_32;
331+
if (result.int_32 < 0)
332+
goto out_of_range;
333+
fsopt->caps_max = result.int_32;
332334
break;
333335
case Opt_readdir_max_entries:
334336
if (result.uint_32 < 1)
@@ -547,25 +549,25 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
547549
seq_show_option(m, "recover_session", "clean");
548550

549551
if (fsopt->wsize != CEPH_MAX_WRITE_SIZE)
550-
seq_printf(m, ",wsize=%d", fsopt->wsize);
552+
seq_printf(m, ",wsize=%u", fsopt->wsize);
551553
if (fsopt->rsize != CEPH_MAX_READ_SIZE)
552-
seq_printf(m, ",rsize=%d", fsopt->rsize);
554+
seq_printf(m, ",rsize=%u", fsopt->rsize);
553555
if (fsopt->rasize != CEPH_RASIZE_DEFAULT)
554-
seq_printf(m, ",rasize=%d", fsopt->rasize);
556+
seq_printf(m, ",rasize=%u", fsopt->rasize);
555557
if (fsopt->congestion_kb != default_congestion_kb())
556-
seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb);
558+
seq_printf(m, ",write_congestion_kb=%u", fsopt->congestion_kb);
557559
if (fsopt->caps_max)
558560
seq_printf(m, ",caps_max=%d", fsopt->caps_max);
559561
if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
560-
seq_printf(m, ",caps_wanted_delay_min=%d",
562+
seq_printf(m, ",caps_wanted_delay_min=%u",
561563
fsopt->caps_wanted_delay_min);
562564
if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT)
563-
seq_printf(m, ",caps_wanted_delay_max=%d",
565+
seq_printf(m, ",caps_wanted_delay_max=%u",
564566
fsopt->caps_wanted_delay_max);
565567
if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT)
566-
seq_printf(m, ",readdir_max_entries=%d", fsopt->max_readdir);
568+
seq_printf(m, ",readdir_max_entries=%u", fsopt->max_readdir);
567569
if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT)
568-
seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes);
570+
seq_printf(m, ",readdir_max_bytes=%u", fsopt->max_readdir_bytes);
569571
if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
570572
seq_show_option(m, "snapdirname", fsopt->snapdir_name);
571573

fs/ceph/super.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -73,16 +73,16 @@
7373
#define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */
7474

7575
struct ceph_mount_options {
76-
int flags;
76+
unsigned int flags;
7777

78-
int wsize; /* max write size */
79-
int rsize; /* max read size */
80-
int rasize; /* max readahead */
81-
int congestion_kb; /* max writeback in flight */
82-
int caps_wanted_delay_min, caps_wanted_delay_max;
78+
unsigned int wsize; /* max write size */
79+
unsigned int rsize; /* max read size */
80+
unsigned int rasize; /* max readahead */
81+
unsigned int congestion_kb; /* max writeback in flight */
82+
unsigned int caps_wanted_delay_min, caps_wanted_delay_max;
8383
int caps_max;
84-
int max_readdir; /* max readdir result (entires) */
85-
int max_readdir_bytes; /* max readdir result (bytes) */
84+
unsigned int max_readdir; /* max readdir result (entries) */
85+
unsigned int max_readdir_bytes; /* max readdir result (bytes) */
8686

8787
/*
8888
* everything above this point can be memcmp'd; everything below

0 commit comments

Comments
 (0)