Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 45 additions & 1 deletion src/yb/tools/yb-admin-multi-master-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,14 @@ namespace {

static const char* const kAdminToolName = "yb-admin";

// Normalize heartbeat lag values to a fixed placeholder for deterministic comparison.
// Replaces values like "123ms" or "0ms" at end of lines with "<lag>", and preserves "N/A".
// Follows the NormalizeLogLine() pattern from conflict_resolve_keys_verification-itest.cc.
std::string NormalizeLagValues(const std::string& output) {
std::regex lag_pattern(R"(\d+ms)");
return std::regex_replace(output, lag_pattern, "<lag>");
}

} // namespace

class YBAdminMultiMasterTest : public ExternalMiniClusterITestBase {
Expand Down Expand Up @@ -79,7 +87,8 @@ TEST_F(YBAdminMultiMasterTest, InitialMasterAddresses) {
admin_path, "--master_addresses", cluster_->GetMasterAddresses(),
"list_all_masters"), &output2));
LOG(INFO) << "full master_addresses: list_all_masters: " << output2;
ASSERT_EQ(output1, output2);
// Normalize heartbeat lag values since they are non-deterministic.
ASSERT_EQ(NormalizeLagValues(output1), NormalizeLagValues(output2));

output1.clear();
output2.clear();
Expand Down Expand Up @@ -336,5 +345,40 @@ TEST_F(YBAdminMultiMasterTest, TestMasterLeaderStepdown) {
}, 5s, "Master leader stepdown"));
}

TEST_F(YBAdminMultiMasterTest, ListAllMastersShowsHeartbeatLag) {
const int kNumMasters = 3;
const auto admin_path = GetToolPath(kAdminToolName);
ASSERT_NO_FATALS(StartCluster({}, {}, 1 /* num tservers */, kNumMasters));

std::string output;
ASSERT_OK(Subprocess::Call(ToStringVector(
admin_path, "--master_addresses", cluster_->GetMasterAddresses(),
"list_all_masters"), &output));
LOG(INFO) << "list_all_masters:\n" << output;

auto lines = StringSplit(output, '\n');
// Header + 3 masters.
ASSERT_GE(lines.size(), kNumMasters + 1);

// Verify header contains "Heartbeat Lag".
ASSERT_NE(lines[0].find("Heartbeat Lag"), std::string::npos)
<< "Header missing 'Heartbeat Lag': " << lines[0];

// Verify each master row has a lag value: leader should show 0ms, followers should show Nms.
bool found_leader = false;
for (size_t i = 1; i <= static_cast<size_t>(kNumMasters); ++i) {
const auto& line = lines[i];
if (line.find("LEADER") != std::string::npos) {
found_leader = true;
ASSERT_NE(line.find("0ms"), std::string::npos)
<< "Leader should show 0ms lag: " << line;
} else if (line.find("FOLLOWER") != std::string::npos) {
ASSERT_NE(line.find("ms"), std::string::npos)
<< "Follower should show lag in ms: " << line;
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Test assumes followers always have lag data

Medium Severity

The test requires all follower lines to contain ms, but the implementation outputs N/A for followers when has_lag_data is true but their UUID isn't found in the heartbeat map (line 1509 of yb-admin_client.cc). A follower missing from the heartbeat response would fail this assertion despite being valid output from the implementation.

Fix in Cursor Fix in Web

}
}
ASSERT_TRUE(found_leader) << "Expected to find a LEADER in output";
}

} // namespace tools
} // namespace yb
54 changes: 49 additions & 5 deletions src/yb/tools/yb-admin_client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1444,15 +1444,45 @@ Result<std::unordered_set<std::string>> ClusterAdminClient::ListAllKnownTabletSe
Status ClusterAdminClient::ListAllMasters() {
const auto lresp = VERIFY_RESULT(GetAllMasters());

// Fetch heartbeat delay info (best-effort, only available on leader).
std::unordered_map<std::string, int64_t> lag_by_uuid;
bool has_lag_data = false;
{
rpc::RpcController rpc;
rpc.set_timeout(timeout_);
master::GetMasterHeartbeatDelaysRequestPB hb_req;
master::GetMasterHeartbeatDelaysResponsePB hb_resp;
auto s = master_admin_proxy_->GetMasterHeartbeatDelays(hb_req, &hb_resp, &rpc);
if (s.ok() && !hb_resp.has_error()) {
has_lag_data = true;
for (const auto& delay : hb_resp.heartbeat_delay()) {
lag_by_uuid[delay.master_uuid()] = delay.last_heartbeat_delta_ms();
}
}
}

// Identify leader UUID.
std::string leader_uuid;
for (const auto& master : lresp.masters()) {
if (master.has_role() && master.role() == PeerRole::LEADER &&
master.has_instance_id()) {
leader_uuid = master.instance_id().permanent_uuid();
break;
}
}

cout << RightPadToUuidWidth("Master UUID") << kColumnSep
<< RightPadToWidth(kRpcHostPortHeading, kHostPortColWidth) << kColumnSep
<< RightPadToWidth("State", kSmallColWidth) << kColumnSep
<< "Role" << kColumnSep << RightPadToWidth(kBroadcastHeading, kHostPortColWidth) << endl;
<< RightPadToWidth("Role", kSmallColWidth) << kColumnSep
<< RightPadToWidth(kBroadcastHeading, kHostPortColWidth) << kColumnSep
<< "Heartbeat Lag" << endl;

for (const auto& master : lresp.masters()) {
const auto master_reg = master.has_registration() ? &master.registration() : nullptr;
cout << (master.has_instance_id() ? master.instance_id().permanent_uuid()
: RightPadToUuidWidth("UNKNOWN_UUID")) << kColumnSep;
const auto uuid = master.has_instance_id()
? master.instance_id().permanent_uuid() : "";
cout << (uuid.empty() ? RightPadToUuidWidth("UNKNOWN_UUID") : uuid) << kColumnSep;
cout << RightPadToWidth(
master_reg ? FormatFirstHostPort(master_reg->private_rpc_addresses())
: "UNKNOWN", kHostPortColWidth)
Expand All @@ -1461,10 +1491,24 @@ Status ClusterAdminClient::ListAllMasters() {
PBEnumToString(master.error().code()) : "ALIVE"),
kSmallColWidth)
<< kColumnSep;
cout << (master.has_role() ? PBEnumToString(master.role()) : "UNKNOWN") << kColumnSep;
cout << RightPadToWidth(
master.has_role() ? PBEnumToString(master.role()) : "UNKNOWN",
kSmallColWidth)
<< kColumnSep;
cout << RightPadToWidth(
master_reg ? FormatFirstHostPort(master_reg->broadcast_addresses()) : "UNKNOWN",
kHostPortColWidth) << endl;
kHostPortColWidth) << kColumnSep;
// Heartbeat lag column.
if (!has_lag_data) {
cout << "N/A";
} else if (!uuid.empty() && uuid == leader_uuid) {
cout << "0ms";
} else if (auto it = lag_by_uuid.find(uuid); it != lag_by_uuid.end()) {
cout << it->second << "ms";
} else {
cout << "N/A";
}
cout << endl;
}

return Status::OK();
Expand Down