Skip to content

Commit 5ba8c92

Browse files
authored
Merge pull request ceph#57911 from gardran/wip-gdran-mds-better-clease-handling
mds: some optimizations around client Capability and Lease tracking Reviewed-by: Venky Shankar <[email protected]>
2 parents 03d4a0e + 2140fbf commit 5ba8c92

File tree

11 files changed

+125
-118
lines changed

11 files changed

+125
-118
lines changed

src/include/elist.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ class elist {
4545

4646
bool empty() const { return _prev == this; }
4747
bool is_on_list() const { return !empty(); }
48+
bool is_singular() const {
49+
return is_on_list() && _prev == _next;
50+
}
4851

4952
bool remove_myself() {
5053
if (_next == this) {

src/mds/CDentry.cc

Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -500,22 +500,30 @@ void CDentry::decode_lock_state(int type, const bufferlist& bl)
500500
}
501501

502502

503-
ClientLease *CDentry::add_client_lease(client_t c, Session *session)
504-
{
505-
ClientLease *l;
506-
if (client_lease_map.count(c))
507-
l = client_lease_map[c];
508-
else {
509-
dout(20) << __func__ << " client." << c << " on " << lock << dendl;
510-
if (client_lease_map.empty()) {
503+
MEMPOOL_DEFINE_OBJECT_FACTORY(ClientLease, mds_client_lease, mds_co);
504+
505+
client_t ClientLease::get_client() const
506+
{
507+
return session->get_client();
508+
}
509+
510+
ClientLease *CDentry::add_client_lease(Session *session)
511+
{
512+
client_t client = session->get_client();
513+
ClientLease* l = nullptr;
514+
auto it = client_leases.lower_bound(client);
515+
if (it == client_leases.end() || it->get_client() != client) {
516+
l = new ClientLease(this, session);
517+
dout(20) << __func__ << " client." << client << " on " << lock << dendl;
518+
if (client_leases.empty()) {
511519
get(PIN_CLIENTLEASE);
512520
lock.get_client_lease();
513521
}
514-
l = client_lease_map[c] = new ClientLease(c, this);
522+
client_leases.insert_before(it, *l);
515523
l->seq = ++session->lease_seq;
516-
524+
} else {
525+
l = &(*it);
517526
}
518-
519527
return l;
520528
}
521529

@@ -524,15 +532,14 @@ void CDentry::remove_client_lease(ClientLease *l, Locker *locker)
524532
ceph_assert(l->parent == this);
525533

526534
bool gather = false;
535+
dout(20) << __func__ << " client." << l->get_client() << " on " << lock << dendl;
527536

528-
dout(20) << __func__ << " client." << l->client << " on " << lock << dendl;
529-
530-
client_lease_map.erase(l->client);
531537
l->item_lease.remove_myself();
532538
l->item_session_lease.remove_myself();
539+
client_leases.erase(client_leases.iterator_to(*l));
533540
delete l;
534541

535-
if (client_lease_map.empty()) {
542+
if (client_leases.empty()) {
536543
gather = !lock.is_stable();
537544
lock.put_client_lease();
538545
put(PIN_CLIENTLEASE);
@@ -544,8 +551,8 @@ void CDentry::remove_client_lease(ClientLease *l, Locker *locker)
544551

545552
void CDentry::remove_client_leases(Locker *locker)
546553
{
547-
while (!client_lease_map.empty())
548-
remove_client_lease(client_lease_map.begin()->second, locker);
554+
while (!client_leases.empty())
555+
remove_client_lease(&(*client_leases.begin()), locker);
549556
}
550557

551558
void CDentry::_put()

src/mds/CDentry.h

Lines changed: 43 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,14 @@
1717

1818
#include <string>
1919
#include <string_view>
20-
#include <set>
2120

2221
#include "include/counter.h"
2322
#include "include/types.h"
2423
#include "include/buffer_fwd.h"
2524
#include "include/lru.h"
2625
#include "include/elist.h"
2726
#include "include/filepath.h"
27+
#include <boost/intrusive/set.hpp>
2828

2929
#include "BatchOp.h"
3030
#include "MDSCacheObject.h"
@@ -38,9 +38,35 @@ class CDir;
3838
class Locker;
3939
class CDentry;
4040
class LogSegment;
41-
4241
class Session;
4342

43+
struct ClientLease : public boost::intrusive::set_base_hook<>
44+
{
45+
MEMPOOL_CLASS_HELPERS();
46+
47+
ClientLease(CDentry *p, Session *s) :
48+
parent(p), session(s),
49+
item_session_lease(this),
50+
item_lease(this) { }
51+
ClientLease() = delete;
52+
client_t get_client() const;
53+
54+
CDentry *parent;
55+
Session *session;
56+
57+
ceph_seq_t seq = 0;
58+
utime_t ttl;
59+
xlist<ClientLease*>::item item_session_lease; // per-session list
60+
xlist<ClientLease*>::item item_lease; // global list
61+
};
62+
struct client_is_key
63+
{
64+
typedef client_t type;
65+
const type operator() (const ClientLease& l) const {
66+
return l.get_client();
67+
}
68+
};
69+
4470
// define an ordering
4571
bool operator<(const CDentry& l, const CDentry& r);
4672

@@ -324,27 +350,25 @@ class CDentry : public MDSCacheObject, public LRUObject, public Counter<CDentry>
324350
// replicas (on clients)
325351

326352
bool is_any_leases() const {
327-
return !client_lease_map.empty();
353+
return !client_leases.empty();
328354
}
329355
const ClientLease *get_client_lease(client_t c) const {
330-
if (client_lease_map.count(c))
331-
return client_lease_map.find(c)->second;
332-
return 0;
356+
auto it = client_leases.find(c);
357+
if (it != client_leases.end())
358+
return &(*it);
359+
return nullptr;
333360
}
334361
ClientLease *get_client_lease(client_t c) {
335-
if (client_lease_map.count(c))
336-
return client_lease_map.find(c)->second;
337-
return 0;
362+
auto it = client_leases.find(c);
363+
if (it != client_leases.end())
364+
return &(*it);
365+
return nullptr;
338366
}
339367
bool have_client_lease(client_t c) const {
340-
const ClientLease *l = get_client_lease(c);
341-
if (l)
342-
return true;
343-
else
344-
return false;
368+
return client_leases.count(c);
345369
}
346370

347-
ClientLease *add_client_lease(client_t c, Session *session);
371+
ClientLease *add_client_lease(Session *session);
348372
void remove_client_lease(ClientLease *r, Locker *locker); // returns remaining mask (if any), and kicks locker eval_gathers
349373
void remove_client_leases(Locker *locker);
350374

@@ -373,7 +397,10 @@ class CDentry : public MDSCacheObject, public LRUObject, public Counter<CDentry>
373397
SimpleLock lock; // FIXME referenced containers not in mempool
374398
LocalLockC versionlock; // FIXME referenced containers not in mempool
375399

376-
mempool::mds_co::map<client_t,ClientLease*> client_lease_map;
400+
typedef boost::intrusive::set<
401+
ClientLease, boost::intrusive::key_of_value<client_is_key>> ClientLeaseMap;
402+
ClientLeaseMap client_leases;
403+
377404
std::map<int, std::unique_ptr<BatchOp>> batch_ops;
378405

379406
ceph_tid_t reintegration_reqid = 0;

src/mds/CDir.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -510,7 +510,7 @@ void CDir::remove_dentry(CDentry *dn)
510510
dout(12) << __func__ << " " << *dn << dendl;
511511

512512
// there should be no client leases at this point!
513-
ceph_assert(dn->client_lease_map.empty());
513+
ceph_assert(dn->client_leases.empty());
514514

515515
if (state_test(CDir::STATE_DNPINNEDFRAG)) {
516516
dn->put(CDentry::PIN_FRAGMENTING);

src/mds/Capability.cc

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,8 +151,7 @@ void Capability::revoke_info::generate_test_instances(std::list<Capability::revo
151151
* Capability
152152
*/
153153
Capability::Capability(CInode *i, Session *s, uint64_t id) :
154-
item_session_caps(this), item_snaprealm_caps(this),
155-
item_revoking_caps(this), item_client_revoking_caps(this),
154+
item_session_caps(this),
156155
lock_caches(member_offset(MDLockCache, item_cap_lock_cache)),
157156
inode(i), session(s), cap_id(id)
158157
{

src/mds/Capability.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -336,9 +336,9 @@ class Capability : public Counter<Capability> {
336336
int64_t last_rsize = 0;
337337

338338
xlist<Capability*>::item item_session_caps;
339-
xlist<Capability*>::item item_snaprealm_caps;
340-
xlist<Capability*>::item item_revoking_caps;
341-
xlist<Capability*>::item item_client_revoking_caps;
339+
elist<Capability*>::item item_snaprealm_caps;
340+
elist<Capability*>::item item_revoking_caps;
341+
elist<Capability*>::item item_client_revoking_caps;
342342

343343
elist<MDLockCache*> lock_caches;
344344
int get_lock_cache_allowed() const { return lock_cache_allowed; }

src/mds/Locker.cc

Lines changed: 34 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,9 @@ class LockerLogContext : public MDSLogContextBase {
7272
};
7373

7474
Locker::Locker(MDSRank *m, MDCache *c) :
75-
need_snapflush_inodes(member_offset(CInode, item_to_flush)), mds(m), mdcache(c) {}
75+
revoking_caps(member_offset(Capability, item_revoking_caps)),
76+
need_snapflush_inodes(member_offset(CInode, item_to_flush)),
77+
mds(m), mdcache(c) {}
7678

7779

7880
void Locker::dispatch(const cref_t<Message> &m)
@@ -2629,9 +2631,11 @@ int Locker::issue_caps(CInode *in, Capability *only_cap)
26292631

26302632
int op = (before & ~after) ? CEPH_CAP_OP_REVOKE : CEPH_CAP_OP_GRANT;
26312633
if (op == CEPH_CAP_OP_REVOKE) {
2632-
if (mds->logger) mds->logger->inc(l_mdss_ceph_cap_op_revoke);
2634+
if (mds->logger) mds->logger->inc(l_mdss_ceph_cap_op_revoke);
26332635
revoking_caps.push_back(&cap->item_revoking_caps);
2634-
revoking_caps_by_client[cap->get_client()].push_back(&cap->item_client_revoking_caps);
2636+
auto em = revoking_caps_by_client.emplace(cap->get_client(),
2637+
member_offset(Capability, item_client_revoking_caps));
2638+
em.first->second.push_back(&cap->item_client_revoking_caps);
26352639
cap->set_last_revoke_stamp(ceph_clock_now());
26362640
cap->reset_num_revoke_warnings();
26372641
} else {
@@ -2670,7 +2674,7 @@ void Locker::issue_truncate(CInode *in)
26702674
cap->get_mseq(),
26712675
mds->get_osd_epoch_barrier());
26722676
in->encode_cap_message(m, cap);
2673-
mds->send_message_client_counted(m, p.first);
2677+
mds->send_message_client_counted(m, cap->get_session());
26742678
}
26752679

26762680
// should we increase max_size?
@@ -3160,7 +3164,7 @@ void Locker::share_inode_max_size(CInode *in, Capability *only_cap)
31603164
cap->get_mseq(),
31613165
mds->get_osd_epoch_barrier());
31623166
in->encode_cap_message(m, cap);
3163-
mds->send_message_client_counted(m, client);
3167+
mds->send_message_client_counted(m, cap->get_session());
31643168
}
31653169
if (only_cap)
31663170
break;
@@ -4311,42 +4315,33 @@ void Locker::remove_client_cap(CInode *in, Capability *cap, bool kill)
43114315
try_eval(in, CEPH_CAP_LOCKS);
43124316
}
43134317

4314-
4315-
/**
4316-
* Return true if any currently revoking caps exceed the
4317-
* session_timeout threshold.
4318-
*/
4319-
bool Locker::any_late_revoking_caps(xlist<Capability*> const &revoking,
4320-
double timeout) const
4318+
std::set<client_t> Locker::get_late_revoking_clients(double timeout)
43214319
{
4322-
xlist<Capability*>::const_iterator p = revoking.begin();
4323-
if (p.end()) {
4320+
auto any_late_revoking = [timeout](elist<Capability*> &revoking) {
4321+
auto p = revoking.begin();
4322+
if (p.end())
43244323
// No revoking caps at the moment
43254324
return false;
4326-
} else {
4327-
utime_t now = ceph_clock_now();
4328-
utime_t age = now - (*p)->get_last_revoke_stamp();
4329-
if (age <= timeout) {
4330-
return false;
4331-
} else {
4332-
return true;
4333-
}
4334-
}
4335-
}
43364325

4337-
std::set<client_t> Locker::get_late_revoking_clients(double timeout) const
4338-
{
4339-
std::set<client_t> result;
4326+
utime_t now = ceph_clock_now();
4327+
return now - (*p)->get_last_revoke_stamp() > timeout;
4328+
};
43404329

4341-
if (any_late_revoking_caps(revoking_caps, timeout)) {
4330+
std::set<client_t> result;
4331+
if (!any_late_revoking(revoking_caps)) {
4332+
// Fast path: no misbehaving clients, execute in O(1)
4333+
} else {
43424334
// Slow path: execute in O(N_clients)
4343-
for (auto &p : revoking_caps_by_client) {
4344-
if (any_late_revoking_caps(p.second, timeout)) {
4345-
result.insert(p.first);
4335+
for (auto it = revoking_caps_by_client.begin();
4336+
it != revoking_caps_by_client.end(); ) {
4337+
if (it->second.empty()) {
4338+
revoking_caps_by_client.erase(it++);
4339+
continue;
43464340
}
4341+
if (any_late_revoking(it->second))
4342+
result.insert(it->first);
4343+
++it;
43474344
}
4348-
} else {
4349-
// Fast path: no misbehaving clients, execute in O(1)
43504345
}
43514346
return result;
43524347
}
@@ -4378,11 +4373,10 @@ void Locker::caps_tick()
43784373
}
43794374
}
43804375

4381-
dout(20) << __func__ << " " << revoking_caps.size() << " revoking caps" << dendl;
43824376

43834377
now = ceph_clock_now();
43844378
int n = 0;
4385-
for (xlist<Capability*>::iterator p = revoking_caps.begin(); !p.end(); ++p) {
4379+
for (auto p = revoking_caps.begin(); !p.end(); ++p) {
43864380
Capability *cap = *p;
43874381

43884382
utime_t age = now - cap->get_last_revoke_stamp();
@@ -4508,7 +4502,7 @@ void Locker::issue_client_lease(CDentry *dn, CInode *in, const MDRequestRef& mdr
45084502
ceph_assert(!in);
45094503
}
45104504
// issue a dentry lease
4511-
ClientLease *l = dn->add_client_lease(client, session);
4505+
ClientLease *l = dn->add_client_lease(session);
45124506
session->touch_lease(l);
45134507

45144508
int pool = 1; // fixme.. do something smart!
@@ -4537,20 +4531,17 @@ void Locker::issue_client_lease(CDentry *dn, CInode *in, const MDRequestRef& mdr
45374531
void Locker::revoke_client_leases(SimpleLock *lock)
45384532
{
45394533
CDentry *dn = static_cast<CDentry*>(lock->get_parent());
4540-
for (map<client_t, ClientLease*>::iterator p = dn->client_lease_map.begin();
4541-
p != dn->client_lease_map.end();
4542-
++p) {
4543-
ClientLease *l = p->second;
4544-
4534+
for (ClientLease& l : dn->client_leases) {
4535+
45454536
ceph_assert(lock->get_type() == CEPH_LOCK_DN);
45464537

45474538
CDentry *dn = static_cast<CDentry*>(lock->get_parent());
45484539
int mask = 1 | CEPH_LOCK_DN; // old and new bits
45494540

45504541
// i should also revoke the dir ICONTENT lease, if they have it!
45514542
CInode *diri = dn->get_dir()->get_inode();
4552-
auto lease = make_message<MClientLease>(CEPH_MDS_LEASE_REVOKE, l->seq, mask, diri->ino(), diri->first, CEPH_NOSNAP, dn->get_name());
4553-
mds->send_message_client_counted(lease, l->client);
4543+
auto lease = make_message<MClientLease>(CEPH_MDS_LEASE_REVOKE, l.seq, mask, diri->ino(), diri->first, CEPH_NOSNAP, dn->get_name());
4544+
mds->send_message_client_counted(lease, l.session);
45544545
}
45554546
}
45564547

src/mds/Locker.h

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ class Locker {
151151

152152
void remove_client_cap(CInode *in, Capability *cap, bool kill=false);
153153

154-
std::set<client_t> get_late_revoking_clients(double timeout) const;
154+
std::set<client_t> get_late_revoking_clients(double timeout);
155155

156156
void snapflush_nudge(CInode *in);
157157
void mark_need_snapflush_inode(CInode *in);
@@ -249,9 +249,9 @@ class Locker {
249249
xlist<ScatterLock*> updated_scatterlocks;
250250

251251
// Maintain a global list to quickly find if any caps are late revoking
252-
xlist<Capability*> revoking_caps;
252+
elist<Capability*> revoking_caps;
253253
// Maintain a per-client list to find clients responsible for late ones quickly
254-
std::map<client_t, xlist<Capability*> > revoking_caps_by_client;
254+
std::map<client_t, elist<Capability*> > revoking_caps_by_client;
255255

256256
elist<CInode*> need_snapflush_inodes;
257257

@@ -267,7 +267,6 @@ class Locker {
267267

268268
void handle_quiesce_failure(const MDRequestRef& mdr, std::string_view& marker);
269269

270-
bool any_late_revoking_caps(xlist<Capability*> const &revoking, double timeout) const;
271270
uint64_t calc_new_max_size(const CInode::inode_const_ptr& pi, uint64_t size);
272271
__u32 get_xattr_total_length(CInode::mempool_xattr_map &xattr);
273272
void decode_new_xattrs(CInode::mempool_inode *inode,

0 commit comments

Comments
 (0)