Skip to content

Commit 7c994bf

Browse files
authored
Merge pull request ceph#62416 from kamoltat/wip-ksirivad-fix-connection-score
2 parents 089cd8e + eadcb8c commit 7c994bf

File tree

3 files changed

+74
-16
lines changed

3 files changed

+74
-16
lines changed

src/mon/Elector.cc

Lines changed: 63 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -453,22 +453,37 @@ void Elector::handle_nak(MonOpRequestRef op)
453453

454454
void Elector::begin_peer_ping(int peer)
455455
{
456-
dout(20) << __func__ << " against " << peer << dendl;
456+
dout(20) << __func__ << " with " << peer << dendl;
457457
if (live_pinging.count(peer)) {
458458
dout(20) << peer << " already in live_pinging ... return " << dendl;
459459
return;
460460
}
461-
462-
if (!mon->get_quorum_mon_features().contains_all(
461+
// Check if quorum feature is not set and we are in
462+
// STATE_INIT, STATE_PROBING, STATE_SYNCHRONIZING or STATE_ELECTING
463+
if (mon->get_quorum_mon_features().empty() &&
464+
(!mon->is_leader() && !mon->is_peon() && !mon->is_shutdown())) {
465+
dout(10) << "quorum mon feature is not yet set, "
466+
<< " we might need to wait until we form a quorum"
467+
<< dendl;
468+
pending_pings.insert(peer);
469+
return;
470+
} else if (!mon->get_quorum_mon_features().contains_all(
463471
ceph::features::mon::FEATURE_PINGING)) {
472+
dout(10) << "mon quorum does not support pinging .. return" << dendl;
464473
return;
465474
}
466475

476+
pending_pings.erase(peer);
467477
peer_tracker.report_live_connection(peer, 0); // init this peer as existing
468478
live_pinging.insert(peer);
469479
dead_pinging.erase(peer);
470480
peer_acked_ping[peer] = ceph_clock_now();
471-
if (!send_peer_ping(peer)) return;
481+
if (!send_peer_ping(peer)) {
482+
dout(20) << "send_peer_ping failed ..."
483+
<< " no need to schedule ping_check" << dendl;
484+
return;
485+
}
486+
dout(30) << "schedule ping_check against peer: " << peer << dendl;
472487
mon->timer.add_event_after(ping_timeout / PING_DIVISOR,
473488
new C_MonContext{mon, [this, peer](int) {
474489
ping_check(peer);
@@ -497,32 +512,55 @@ bool Elector::send_peer_ping(int peer, const utime_t *n)
497512
MMonPing *ping = new MMonPing(MMonPing::PING, now, peer_tracker.get_encoded_bl());
498513
mon->messenger->send_to_mon(ping, mon->monmap->get_addrs(peer));
499514
peer_sent_ping[peer] = now;
515+
dout(20) << " sent ping successfully to peer: " << peer << dendl;
500516
return true;
501517
}
502518

519+
void Elector::process_pending_pings()
520+
{
521+
dout(10) << __func__ << " processing "
522+
<< pending_pings.size() << " pending pings" << dendl;
523+
524+
// Make a copy since begin_peer_ping will modify the set
525+
std::set<int> peers_to_ping = pending_pings;
526+
for (int peer : peers_to_ping) {
527+
begin_peer_ping(peer);
528+
}
529+
}
530+
503531
void Elector::ping_check(int peer)
504532
{
505-
dout(20) << __func__ << " to peer " << peer << dendl;
533+
dout(20) << __func__ << "ing peer " << peer << dendl;
506534

507535
if (!live_pinging.count(peer) &&
508536
!dead_pinging.count(peer)) {
509-
dout(20) << __func__ << peer << " is no longer marked for pinging" << dendl;
537+
dout(20) << peer << " is no longer marked for pinging ... return" << dendl;
510538
return;
511539
}
512540
utime_t now = ceph_clock_now();
513541
utime_t& acked_ping = peer_acked_ping[peer];
514542
utime_t& newest_ping = peer_sent_ping[peer];
515543
if (!acked_ping.is_zero() && acked_ping < now - ping_timeout) {
544+
dout(20) << "peer " << peer << " has not acked a ping in "
545+
<< now - acked_ping << " seconds" << dendl;
516546
peer_tracker.report_dead_connection(peer, now - acked_ping);
517547
acked_ping = now;
518548
begin_dead_ping(peer);
519549
return;
520550
}
521551

522552
if (acked_ping == newest_ping) {
523-
if (!send_peer_ping(peer, &now)) return;
553+
dout(20) << "peer " << peer
554+
<< " has not acked the newest ping"
555+
<< " .. sending another ping" << dendl;
556+
if (!send_peer_ping(peer, &now)) {
557+
dout(20) << "send_peer_ping failed ..."
558+
<< " no need to schedule " << __func__ << dendl;
559+
return;
560+
}
524561
}
525562

563+
dout(30) << "schedule " << __func__ << " against peer: "<< peer << dendl;
526564
mon->timer.add_event_after(ping_timeout / PING_DIVISOR,
527565
new C_MonContext{mon, [this, peer](int) {
528566
ping_check(peer);
@@ -533,11 +571,13 @@ void Elector::begin_dead_ping(int peer)
533571
{
534572
dout(20) << __func__ << " to peer " << peer << dendl;
535573
if (dead_pinging.count(peer)) {
574+
dout(20) << peer << " already in dead_pinging ... return" << dendl;
536575
return;
537576
}
538577

539578
live_pinging.erase(peer);
540579
dead_pinging.insert(peer);
580+
dout(30) << "schedule dead_ping against peer: " << peer << dendl;
541581
mon->timer.add_event_after(ping_timeout,
542582
new C_MonContext{mon, [this, peer](int) {
543583
dead_ping(peer);
@@ -558,6 +598,7 @@ void Elector::dead_ping(int peer)
558598

559599
peer_tracker.report_dead_connection(peer, now - acked_ping);
560600
acked_ping = now;
601+
dout(30) << "schedule " << __func__ << " against peer: " << peer << dendl;
561602
mon->timer.add_event_after(ping_timeout,
562603
new C_MonContext{mon, [this, peer](int) {
563604
dead_ping(peer);
@@ -574,13 +615,15 @@ void Elector::handle_ping(MonOpRequestRef op)
574615
switch(m->op) {
575616
case MMonPing::PING:
576617
{
618+
dout(30) << "recieved PING from "
619+
<< prank << ", sending PING_REPLY back!" << dendl;
577620
MMonPing *reply = new MMonPing(MMonPing::PING_REPLY, m->stamp, peer_tracker.get_encoded_bl());
578621
m->get_connection()->send_message(reply);
579622
}
580623
break;
581624

582625
case MMonPing::PING_REPLY:
583-
626+
dout(30) << "recieved PING_REPLY from " << prank << dendl;
584627
const utime_t& previous_acked = peer_acked_ping[prank];
585628
const utime_t& newest = peer_sent_ping[prank];
586629

@@ -591,17 +634,23 @@ void Elector::handle_ping(MonOpRequestRef op)
591634
}
592635

593636
if (m->stamp > previous_acked) {
594-
dout(20) << "m->stamp > previous_acked" << dendl;
637+
dout(30) << "recieved good PING_REPLY!" << dendl;
595638
peer_tracker.report_live_connection(prank, m->stamp - previous_acked);
596639
peer_acked_ping[prank] = m->stamp;
597-
} else{
598-
dout(20) << "m->stamp <= previous_acked .. we don't report_live_connection" << dendl;
640+
} else {
641+
dout(30) << "recieved bad PING_REPLY! it's the same or older "
642+
<< "than the most recent ack we got." << dendl;
599643
}
600644
utime_t now = ceph_clock_now();
601-
dout(30) << "now: " << now << " m->stamp: " << m->stamp << " ping_timeout: "
602-
<< ping_timeout << " PING_DIVISOR: " << PING_DIVISOR << dendl;
603645
if (now - m->stamp > ping_timeout / PING_DIVISOR) {
604-
if (!send_peer_ping(prank, &now)) return;
646+
dout(30) << "peer " << prank << " has not acked a ping in "
647+
<< now - m->stamp << " seconds, which is more than the "
648+
<< ping_timeout / PING_DIVISOR << " seconds limit."
649+
<< " Sending another ping ..." << dendl;
650+
if (!send_peer_ping(prank, &now)) {
651+
dout(10) << "send_peer_ping failed ..." << dendl;
652+
return;
653+
}
605654
}
606655
break;
607656
}

src/mon/Elector.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@ class Elector : public ElectionOwner, RankProvider {
3939
* @defgroup Elector_h_class Elector
4040
* @{
4141
*/
42+
43+
private:
44+
std::set<int> pending_pings; // Monitors waiting for quorum features to be established
45+
4246
ElectionLogic logic;
4347
// connectivity validation and scoring
4448
ConnectionTracker peer_tracker;
@@ -406,6 +410,11 @@ class Elector : public ElectionOwner, RankProvider {
406410
disallowed_leaders = dl;
407411
return true;
408412
}
413+
/**
414+
* process all pending pings when quorum is established
415+
*
416+
*/
417+
void process_pending_pings();
409418
void dump_connection_scores(Formatter *f) {
410419
f->open_object_section("connection scores");
411420
peer_tracker.dump(f);

src/mon/Monitor.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2296,7 +2296,7 @@ void Monitor::win_election(epoch_t epoch, const set<int>& active, uint64_t featu
22962296
encode(m, bl);
22972297
t->put(MONITOR_STORE_PREFIX, "last_metadata", bl);
22982298
}
2299-
2299+
elector.process_pending_pings();
23002300
finish_election();
23012301
if (monmap->size() > 1 &&
23022302
monmap->get_epoch() > 0) {
@@ -2349,7 +2349,7 @@ void Monitor::lose_election(epoch_t epoch, set<int> &q, int l,
23492349
_finish_svc_election();
23502350

23512351
logger->inc(l_mon_election_lose);
2352-
2352+
elector.process_pending_pings();
23532353
finish_election();
23542354
}
23552355

0 commit comments

Comments
 (0)