From 93b65a17e59f28ffa12efa51c391090278bc550d Mon Sep 17 00:00:00 2001 From: "David W. Hankins" Date: Fri, 19 Jan 2024 09:58:47 -0800 Subject: [PATCH 1/6] Add monitoring instrumentation around `dhcp-disable` state. PROBLEM ======= We're tracking reported issues in our environment where Kea servers are acting like they are being left disabled by unkonwn causes. Discovering the root cause of that problem has been hampered by poor visibility; * It is normal for servers to go without receiving packets. When the `pkt?-received` counter stops incrementing, we cannot determine reliably if this is due to e.g. DHCP relay agent misconfiguration or a Kea service issue. It frustrates debugging. * Kea does not increment any counter to indicate it is disabled, providing us with no clear monitoring predicate to detect the situation early. * It is consequently hours or days before this is noticed, by which time debug logs have rotated. This results in the situation of there being no outward indication of disabled state. SOLUTION ======== In this change, we resolve our visibility problems; * A new `pkt?-raw-received` counter is incremented at the earliest point of execution we know a packet has been received (in `runOne()`). * A new `pkt?-dhcp-disabled` counter counts packets dropped due to the service being in a disabled state at the time of reception. * A WARN level log is emitted for packets dropped while disabled. To alleviate spam concerns, a 1-in-100 sample is used. * To facilitate this change, the `StatsMgr` now provides a `getInteger()` method. * `NetworkState` internal boolean constructs are exposed as monitoring metrics. This allows us to observe the normal process of disabling service during e.g. HA reconnects and recovery, and separately detect extended unexpected disabled states prior to sufficient packet level impacts (consider e.g. DHCP anycast servers which may not receive packets to count as disabled for extended periods - we want to know it had been disabled for several days before the anycast route changes, not after). This "belt and suspenders" approach should close our monitoring gaps and provide attention to faulty services as they present. DISCUSSION ========== The `pkt?-raw-received` counters are a glaring admission of guilt, and we should use `pkt?-received` in these places. The exsisting `pkt?-received` counters should be renamed to `pkt?-processed`, as they indicate execution of the respective `processPacket()` function - still a useful monitoring indicator that e.g. the thread handoff succeeded. Probably there should also be a counter for the case where the callback addition failed instead of the present debug log. **HOWEVER**, existing Kea users may rely on the present behavior and names of metrics. A monitoring predicate using the current `pkt?-received` metrics may trigger automatic remediation processes for Kea servers unintentionally left disabled for too long; these predicate configurations would have to change or else the remediation would be defeated. This may not be an ideal user experience. Consequently, I've offered the unfortunate solution you see here; `pkt?-raw-received`, but I'm extremely unhappy with it; I'd be happy to modify the situation to whatever ISC prefers to offer their users in this case. --- ChangeLog | 9 ++ src/bin/dhcp4/dhcp4_srv.cc | 16 +++ src/bin/dhcp4/dhcp4_srv.h | 2 + .../dhcp4/tests/ctrl_dhcp4_srv_unittest.cc | 12 ++ src/bin/dhcp4/tests/dhcp4_test_utils.cc | 3 + src/bin/dhcp4/tests/dora_unittest.cc | 11 ++ src/bin/dhcp4/tests/inform_unittest.cc | 4 + src/bin/dhcp6/dhcp6_srv.cc | 16 +++ src/bin/dhcp6/dhcp6_srv.h | 2 + .../dhcp6/tests/ctrl_dhcp6_srv_unittest.cc | 12 ++ src/lib/dhcpsrv/network_state.cc | 24 ++++ src/lib/dhcpsrv/network_state.h | 3 + .../dhcpsrv/tests/network_state_unittest.cc | 114 ++++++++++++++++++ src/lib/stats/stats_mgr.cc | 19 +++ src/lib/stats/stats_mgr.h | 14 +++ src/lib/stats/tests/stats_mgr_unittest.cc | 1 + 16 files changed, 262 insertions(+) diff --git a/ChangeLog b/ChangeLog index 8da4e5a2284..020af9f455b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +2201. [func] dhankins + New metrics pkt4-dhcp-disabled, pkt4-raw-received, disabled-by-db, + disabled-by-ha-local, disabled-by-ha-remote, disabled-by-user, and + disabled-globally provide monitoring level visibility into the network_state + manager, and closes a monitoring gap where packets dropped while disabled + are not counted by any metric. + While disabled, a WARN log is emitted every DISABLE_WARN_PACKET{4|6}(100) + dropped packets, in addition to the existing DEBUG log. + 2200. [func] piotrek Kea now supports new DHCPv4 option code 121, Classless Static Route option defined in RFC 3442. diff --git a/src/bin/dhcp4/dhcp4_srv.cc b/src/bin/dhcp4/dhcp4_srv.cc index 612e8aa58b7..8e49c82dae1 100644 --- a/src/bin/dhcp4/dhcp4_srv.cc +++ b/src/bin/dhcp4/dhcp4_srv.cc @@ -122,6 +122,8 @@ struct Dhcp4Hooks { /// List of statistics which is initialized to 0 during the DHCPv4 /// server startup. std::set dhcp4_statistics = { + "pkt4-dhcp-disabled", + "pkt4-raw-received", "pkt4-received", "pkt4-discover-received", "pkt4-offer-received", @@ -1096,10 +1098,24 @@ Dhcpv4Srv::runOne() { return; } + isc::stats::StatsMgr& stats_mgr = isc::stats::StatsMgr::instance(); + + // Count raw packets received as soon as we know a packet has been read. + stats_mgr.addValue("pkt4-raw-received", static_cast(1)); + // If the DHCP service has been globally disabled, drop the packet. if (!network_state_->isServiceEnabled()) { LOG_DEBUG(bad_packet4_logger, DBGLVL_PKT_HANDLING, DHCP4_PACKET_DROP_0008) .arg(query->getLabel()); + + // Log a warning every DISABLE_WARN_PACKET4. + if (stats_mgr.getInteger("pkt4-dhcp-disabled") % DISABLE_WARN_PACKET4 == 0) { + LOG_WARN(dhcp4_logger, DHCP4_PACKET_DROP_0008) + .arg(query->getLabel()); + } + + // Count packets dropped due to (hopefully temporary) service disabled. + stats_mgr.addValue("pkt4-dhcp-disabled", static_cast(1)); return; } else { if (MultiThreadingMgr::instance().getMode()) { diff --git a/src/bin/dhcp4/dhcp4_srv.h b/src/bin/dhcp4/dhcp4_srv.h index 0ed55adfe51..f0770b1ffff 100644 --- a/src/bin/dhcp4/dhcp4_srv.h +++ b/src/bin/dhcp4/dhcp4_srv.h @@ -40,6 +40,8 @@ namespace isc { namespace dhcp { +static const int64_t DISABLE_WARN_PACKET4 = 100; + /// @brief DHCPv4 message exchange. /// /// This class represents the DHCPv4 message exchange. The message exchange diff --git a/src/bin/dhcp4/tests/ctrl_dhcp4_srv_unittest.cc b/src/bin/dhcp4/tests/ctrl_dhcp4_srv_unittest.cc index 14ffe8d4ff6..158e7b953e4 100644 --- a/src/bin/dhcp4/tests/ctrl_dhcp4_srv_unittest.cc +++ b/src/bin/dhcp4/tests/ctrl_dhcp4_srv_unittest.cc @@ -634,11 +634,23 @@ TEST_F(CtrlChannelDhcpv4SrvTest, controlChannelStats) { " \"name\":\"bogus\" }}", response); EXPECT_EQ("{ \"arguments\": { }, \"result\": 0 }", response); + // Set max sample count to 1 to match test expectation (disabled state + // gauges normally have 2 samples). + sendUnixCommand("{ \"command\" : \"statistic-sample-count-set-all\", " + " \"arguments\": { \"max-samples\": 1 }}", response); + EXPECT_EQ("{ \"result\": 0, \"text\": \"All statistics count limit are set.\" }", response); // Check statistic-get-all sendUnixCommand("{ \"command\" : \"statistic-get-all\", " " \"arguments\": {}}", response); std::set initial_stats = { + "disabled-by-db", + "disabled-by-ha-local", + "disabled-by-ha-remote", + "disabled-by-user", + "disabled-globally", + "pkt4-dhcp-disabled", + "pkt4-raw-received", "pkt4-received", "pkt4-discover-received", "pkt4-offer-received", diff --git a/src/bin/dhcp4/tests/dhcp4_test_utils.cc b/src/bin/dhcp4/tests/dhcp4_test_utils.cc index c3aa7c44c89..f9238ea67ae 100644 --- a/src/bin/dhcp4/tests/dhcp4_test_utils.cc +++ b/src/bin/dhcp4/tests/dhcp4_test_utils.cc @@ -994,14 +994,17 @@ Dhcpv4SrvTest::pretendReceivingPkt(NakedDhcpv4Srv& srv, const std::string& confi using namespace isc::stats; StatsMgr& mgr = StatsMgr::instance(); + ObservationPtr pkt4_raw_received = mgr.getObservation("pkt4-raw-received"); ObservationPtr pkt4_rcvd = mgr.getObservation("pkt4-received"); ObservationPtr tested_stat = mgr.getObservation(stat_name); // All expected statistics must be present. + ASSERT_TRUE(pkt4_raw_received); ASSERT_TRUE(pkt4_rcvd); ASSERT_TRUE(tested_stat); // They also must have expected values. + EXPECT_EQ(1, pkt4_raw_received->getInteger().first); EXPECT_EQ(1, pkt4_rcvd->getInteger().first); EXPECT_EQ(1, tested_stat->getInteger().first); } diff --git a/src/bin/dhcp4/tests/dora_unittest.cc b/src/bin/dhcp4/tests/dora_unittest.cc index 9520f14d2b2..ec9e6476642 100644 --- a/src/bin/dhcp4/tests/dora_unittest.cc +++ b/src/bin/dhcp4/tests/dora_unittest.cc @@ -2504,6 +2504,8 @@ DORATest::statisticsDORA() { // Ok, let's check the statistics. using namespace isc::stats; StatsMgr& mgr = StatsMgr::instance(); + ObservationPtr pkt4_dhcp_disabled = mgr.getObservatoin("pkt4-dhcp-disabled"); + ObservationPtr pkt4_raw_received = mgr.getObservation("pkt4-raw-received"); ObservationPtr pkt4_received = mgr.getObservation("pkt4-received"); ObservationPtr pkt4_discover_received = mgr.getObservation("pkt4-discover-received"); ObservationPtr pkt4_offer_sent = mgr.getObservation("pkt4-offer-sent"); @@ -2512,6 +2514,8 @@ DORATest::statisticsDORA() { ObservationPtr pkt4_sent = mgr.getObservation("pkt4-sent"); // All expected statistics must be present. + ASSERT_TRUE(pkt4_dhcp_disabled); + ASSERT_TRUE(pkt4_raw_received); ASSERT_TRUE(pkt4_received); ASSERT_TRUE(pkt4_discover_received); ASSERT_TRUE(pkt4_offer_sent); @@ -2520,6 +2524,8 @@ DORATest::statisticsDORA() { ASSERT_TRUE(pkt4_sent); // They also must have expected values. + EXPECT_EQ(0, pkt4_dhcp_disabled->getInteger().first); + EXPECT_EQ(2, pkt4_raw_received->getInteger().first); EXPECT_EQ(2, pkt4_received->getInteger().first); EXPECT_EQ(1, pkt4_discover_received->getInteger().first); EXPECT_EQ(1, pkt4_offer_sent->getInteger().first); @@ -2535,6 +2541,8 @@ DORATest::statisticsDORA() { ASSERT_NO_THROW(client.doRequest()); // Let's see if the stats are properly updated. + EXPECT_EQ(0, pkt4_dhcp_disabled->getInteger().first); + EXPECT_EQ(5, pkt4_raw_received->getInteger().first); EXPECT_EQ(5, pkt4_received->getInteger().first); EXPECT_EQ(1, pkt4_discover_received->getInteger().first); EXPECT_EQ(1, pkt4_offer_sent->getInteger().first); @@ -2576,6 +2584,7 @@ DORATest::statisticsNAK() { using namespace isc::stats; StatsMgr& mgr = StatsMgr::instance(); + ObservationPtr pkt4_raw_received = mgr.getObservation("pkt4-raw-received"); ObservationPtr pkt4_received = mgr.getObservation("pkt4-received"); ObservationPtr pkt4_request_received = mgr.getObservation("pkt4-request-received"); ObservationPtr pkt4_ack_sent = mgr.getObservation("pkt4-ack-sent"); @@ -2583,6 +2592,7 @@ DORATest::statisticsNAK() { ObservationPtr pkt4_sent = mgr.getObservation("pkt4-sent"); // All expected statistics must be present. + ASSERT_TRUE(pkt4_raw_received); ASSERT_TRUE(pkt4_received); ASSERT_TRUE(pkt4_request_received); ASSERT_FALSE(pkt4_ack_sent); // No acks were sent, no such statistic expected. @@ -2590,6 +2600,7 @@ DORATest::statisticsNAK() { ASSERT_TRUE(pkt4_sent); // They also must have expected values. + EXPECT_EQ(1, pkt4_raw_received->getInteger().first); EXPECT_EQ(1, pkt4_received->getInteger().first); EXPECT_EQ(1, pkt4_request_received->getInteger().first); EXPECT_EQ(1, pkt4_nak_sent->getInteger().first); diff --git a/src/bin/dhcp4/tests/inform_unittest.cc b/src/bin/dhcp4/tests/inform_unittest.cc index 50697d57178..411cbf61fa5 100644 --- a/src/bin/dhcp4/tests/inform_unittest.cc +++ b/src/bin/dhcp4/tests/inform_unittest.cc @@ -623,18 +623,21 @@ TEST_F(InformTest, statisticsInform) { // Ok, let's check the statistics. using namespace isc::stats; StatsMgr& mgr = StatsMgr::instance(); + ObservationPtr pkt4_raw_received = mgr.getObservation("pkt4-raw-received"); ObservationPtr pkt4_received = mgr.getObservation("pkt4-received"); ObservationPtr pkt4_inform_received = mgr.getObservation("pkt4-inform-received"); ObservationPtr pkt4_ack_sent = mgr.getObservation("pkt4-ack-sent"); ObservationPtr pkt4_sent = mgr.getObservation("pkt4-sent"); // All expected statistics must be present. + ASSERT_TRUE(pkt4_raw_received); ASSERT_TRUE(pkt4_received); ASSERT_TRUE(pkt4_inform_received); ASSERT_TRUE(pkt4_ack_sent); ASSERT_TRUE(pkt4_sent); // And they must have expected values. + EXPECT_EQ(1, pkt4_raw_received->getInteger().first); EXPECT_EQ(1, pkt4_received->getInteger().first); EXPECT_EQ(1, pkt4_inform_received->getInteger().first); EXPECT_EQ(1, pkt4_ack_sent->getInteger().first); @@ -648,6 +651,7 @@ TEST_F(InformTest, statisticsInform) { ASSERT_NO_THROW(client.doInform()); // Let's see if the stats are properly updated. + EXPECT_EQ(5, pkt4_raw_received->getInteger().first); EXPECT_EQ(5, pkt4_received->getInteger().first); EXPECT_EQ(5, pkt4_inform_received->getInteger().first); EXPECT_EQ(5, pkt4_ack_sent->getInteger().first); diff --git a/src/bin/dhcp6/dhcp6_srv.cc b/src/bin/dhcp6/dhcp6_srv.cc index 60324d0c3f0..b6435e92d7f 100644 --- a/src/bin/dhcp6/dhcp6_srv.cc +++ b/src/bin/dhcp6/dhcp6_srv.cc @@ -178,6 +178,8 @@ createStatusCode(const Pkt6& pkt, const Option6IA& ia, const uint16_t status_cod /// List of statistics which is initialized to 0 during the DHCPv6 /// server startup. std::set dhcp6_statistics = { + "pkt6-dhcp-disabled", + "pkt6-raw-received", "pkt6-received", "pkt6-solicit-received", "pkt6-advertise-received", @@ -684,10 +686,24 @@ Dhcpv6Srv::runOne() { return; } + isc::stats::StatsMgr& stats_mgr = isc::stats::StatsMgr::instance(); + + // Count raw packets received as soon as we know a packet has been read. + stats_mgr.addValue("pkt6-raw-received", static_cast(1)); + // If the DHCP service has been globally disabled, drop the packet. if (!network_state_->isServiceEnabled()) { LOG_DEBUG(bad_packet6_logger, DBGLVL_PKT_HANDLING, DHCP6_PACKET_DROP_DHCP_DISABLED) .arg(query->getLabel()); + + // Log a warning every DISABLE_WARN_PACKET6. + if (stats_mgr.getInteger("pkt6-dhcp-disabled") % DISABLE_WARN_PACKET6 == 0) { + LOG_WARN(dhcp6_logger, DHCP6_PACKET_DROP_DHCP_DISABLED) + .arg(query->getLabel()); + } + + // Count packets dropped due to (hopefully temporary) service disabled. + stats_mgr.addValue("pkt6-dhcp-disabled", static_cast(1)); return; } else { if (MultiThreadingMgr::instance().getMode()) { diff --git a/src/bin/dhcp6/dhcp6_srv.h b/src/bin/dhcp6/dhcp6_srv.h index e2c255d8095..bb8a080b357 100644 --- a/src/bin/dhcp6/dhcp6_srv.h +++ b/src/bin/dhcp6/dhcp6_srv.h @@ -43,6 +43,8 @@ namespace isc { namespace dhcp { +static const int64_t DISABLE_WARN_PACKET6 = 100; + /// @brief This exception is thrown when DHCP server hits the error which should /// result in discarding the message being processed. class DHCPv6DiscardMessageError : public Exception { diff --git a/src/bin/dhcp6/tests/ctrl_dhcp6_srv_unittest.cc b/src/bin/dhcp6/tests/ctrl_dhcp6_srv_unittest.cc index 421cf13ccff..5569e5e32ff 100644 --- a/src/bin/dhcp6/tests/ctrl_dhcp6_srv_unittest.cc +++ b/src/bin/dhcp6/tests/ctrl_dhcp6_srv_unittest.cc @@ -1392,11 +1392,23 @@ TEST_F(CtrlChannelDhcpv6SrvTest, controlChannelStats) { " \"name\":\"bogus\" }}", response); EXPECT_EQ("{ \"arguments\": { }, \"result\": 0 }", response); + // Set max sample count to 1 to match test expectation (disabled state + // gauges normally have 2 samples). + sendUnixCommand("{ \"command\" : \"statistic-sample-count-set-all\", " + " \"arguments\": { \"max-samples\": 1 }}", response); + EXPECT_EQ("{ \"result\": 0, \"text\": \"All statistics count limit are set.\" }", response); // Check statistic-get-all sendUnixCommand("{ \"command\" : \"statistic-get-all\", " " \"arguments\": {}}", response); std::set initial_stats = { + "disabled-by-db", + "disabled-by-ha-local", + "disabled-by-ha-remote", + "disabled-by-user", + "disabled-globally", + "pkt6-dhcp-disabled", + "pkt6-raw-received", "pkt6-received", "pkt6-solicit-received", "pkt6-advertise-received", diff --git a/src/lib/dhcpsrv/network_state.cc b/src/lib/dhcpsrv/network_state.cc index 521d9ed82b6..17942429f15 100644 --- a/src/lib/dhcpsrv/network_state.cc +++ b/src/lib/dhcpsrv/network_state.cc @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -31,6 +32,7 @@ class NetworkStateImpl : public boost::enable_shared_from_this disabled_subnets_(), disabled_networks_(), timer_mgr_(TimerMgr::instance()), disabled_by_origin_(), disabled_by_db_connection_(0) { + updateStats(); } /// @brief Destructor. @@ -76,6 +78,7 @@ class NetworkStateImpl : public boost::enable_shared_from_this globally_disabled_ = false; } } + updateStats(); } /// @brief Reset internal counters for a database connection origin. @@ -86,6 +89,7 @@ class NetworkStateImpl : public boost::enable_shared_from_this if (disabled_by_origin_.empty()) { globally_disabled_ = false; } + updateStats(); } /// @brief Enables DHCP service for an origin. @@ -168,6 +172,26 @@ class NetworkStateImpl : public boost::enable_shared_from_this /// @brief Flag which indicates the state has been disabled by a DB /// connection loss. uint32_t disabled_by_db_connection_; + +private: + /// @private + + /// @brief Update monitoring metrics to expose disable states. + /// + /// A 0 value is used for false, 1 for true. + void updateStats() { + isc::stats::StatsMgr::instance().setValue("disabled-globally", + static_cast(globally_disabled_)); + isc::stats::StatsMgr::instance().setValue("disabled-by-user", + static_cast(disabled_by_origin_.contains(USER_COMMAND))); + isc::stats::StatsMgr::instance().setValue("disabled-by-ha-local", + static_cast(disabled_by_origin_.contains(HA_LOCAL_COMMAND))); + isc::stats::StatsMgr::instance().setValue("disabled-by-ha-remote", + static_cast(disabled_by_origin_.contains(HA_REMOTE_COMMAND))); + // Expose the disabled-by-db-connection counter by direct value. + isc::stats::StatsMgr::instance().setValue("disabled-by-db", + static_cast(disabled_by_db_connection_)); + } }; NetworkState::NetworkState(const NetworkState::ServerType& server_type) diff --git a/src/lib/dhcpsrv/network_state.h b/src/lib/dhcpsrv/network_state.h index 861d1a78cb3..81d0d320d86 100644 --- a/src/lib/dhcpsrv/network_state.h +++ b/src/lib/dhcpsrv/network_state.h @@ -200,6 +200,9 @@ class NetworkState { private: + /// @brief Update monitoring metrics to expose disable states. + void updateStats(); + /// @brief Pointer to the @c NetworkState implementation. boost::shared_ptr impl_; diff --git a/src/lib/dhcpsrv/tests/network_state_unittest.cc b/src/lib/dhcpsrv/tests/network_state_unittest.cc index 84f26c6ee35..6a3a0b83977 100644 --- a/src/lib/dhcpsrv/tests/network_state_unittest.cc +++ b/src/lib/dhcpsrv/tests/network_state_unittest.cc @@ -142,6 +142,9 @@ class NetworkStateTest : public ::testing::Test { /// from different origins and that it results in each timer being scheduled. void multipleDifferentOriginsDelayedEnableServiceTest(); + /// @brief This test verifies that monitoring metrics are updated. + void monitoringMetricsUpdatedTest(); + /// @brief Runs IO service with a timeout. /// /// @param timeout_ms Timeout for running IO service in milliseconds. @@ -603,6 +606,113 @@ NetworkStateTest::multipleDifferentOriginsDelayedEnableServiceTest() { EXPECT_FALSE(state.isDelayedEnableService()); } +void +NetworkStateTest::monitoringMetricsUpdatedTest() { + NetworkState state(NetworkState::DHCPv4); + isc::stats::StatsMgr& mgr = isc::stats::StatsMgr::instance(); + isc::stats::ObservationPtr disabled_globally = mgr.getObservation("disabled-globally"); + isc::stats::ObservationPtr disabled_by_user = mgr.getObservation("disabled-by-user"); + isc::stats::ObservationPtr disabled_by_ha_local = mgr.getObservation("disabled-by-ha-local"); + isc::stats::ObservationPtr disabled_by_ha_remote = mgr.getObservation("disabled-by-ha-remote"); + isc::stats::ObservationPtr disabled_by_db = mgr.getObservation("disabled-by-db"); + + ASSERT_TRUE(disabled_globally); + ASSERT_TRUE(disabled_by_user); + ASSERT_TRUE(disabled_by_ha_local); + ASSERT_TRUE(disabled_by_ha_remote); + ASSERT_TRUE(disabled_by_db); + + // Initial state. + EXPECT_TRUE(state.isServiceEnabled()); + EXPECT_EQ(0, disabled_globally->getInteger().first); + EXPECT_EQ(0, disabled_by_user->getInteger().first); + EXPECT_EQ(0, disabled_by_ha_local->getInteger().first); + EXPECT_EQ(0, disabled_by_ha_remote->getInteger().first); + EXPECT_EQ(0, disabled_by_db->getInteger().first); + + // Disable by user. + state.disableService(NetworkState::Origin::USER_COMMAND); + + EXPECT_FALSE(state.isServiceEnabled()); + EXPECT_EQ(1, disabled_globally->getInteger().first); + EXPECT_EQ(1, disabled_by_user->getInteger().first); + EXPECT_EQ(0, disabled_by_ha_local->getInteger().first); + EXPECT_EQ(0, disabled_by_ha_remote->getInteger().first); + EXPECT_EQ(0, disabled_by_db->getInteger().first); + + // Disable by ha local. + state.disableService(NetworkState::Origin::HA_LOCAL_COMMAND); + + EXPECT_FALSE(state.isServiceEnabled()); + EXPECT_EQ(1, disabled_globally->getInteger().first); + EXPECT_EQ(1, disabled_by_user->getInteger().first); + EXPECT_EQ(1, disabled_by_ha_local->getInteger().first); + EXPECT_EQ(0, disabled_by_ha_remote->getInteger().first); + EXPECT_EQ(0, disabled_by_db->getInteger().first); + + // Disable by ha remote. + state.disableService(NetworkState::Origin::HA_REMOTE_COMMAND); + + EXPECT_FALSE(state.isServiceEnabled()); + EXPECT_EQ(1, disabled_globally->getInteger().first); + EXPECT_EQ(1, disabled_by_user->getInteger().first); + EXPECT_EQ(1, disabled_by_ha_local->getInteger().first); + EXPECT_EQ(1, disabled_by_ha_remote->getInteger().first); + EXPECT_EQ(0, disabled_by_db->getInteger().first); + + // Enable by user. + state.enableService(NetworkState::Origin::USER_COMMAND); + + EXPECT_FALSE(state.isServiceEnabled()); + EXPECT_EQ(1, disabled_globally->getInteger().first); + EXPECT_EQ(0, disabled_by_user->getInteger().first); + EXPECT_EQ(1, disabled_by_ha_local->getInteger().first); + EXPECT_EQ(1, disabled_by_ha_remote->getInteger().first); + EXPECT_EQ(0, disabled_by_db->getInteger().first); + + // Enable by ha. + state.enableService(NetworkState::Origin::HA_LOCAL_COMMAND); + state.enableService(NetworkState::Origin::HA_REMOTE_COMMAND); + + EXPECT_TRUE(state.isServiceEnabled()); + EXPECT_EQ(0, disabled_globally->getInteger().first); + EXPECT_EQ(0, disabled_by_user->getInteger().first); + EXPECT_EQ(0, disabled_by_ha_local->getInteger().first); + EXPECT_EQ(0, disabled_by_ha_remote->getInteger().first); + EXPECT_EQ(0, disabled_by_db->getInteger().first); + + // DB connection disables are a counter. + state.disableService(NetworkState::Origin::DB_CONNECTION); + state.disableService(NetworkState::Origin::DB_CONNECTION); + state.disableService(NetworkState::Origin::DB_CONNECTION); + + EXPECT_FALSE(state.isServiceEnabled()); + EXPECT_EQ(1, disabled_globally->getInteger().first); + EXPECT_EQ(0, disabled_by_user->getInteger().first); + EXPECT_EQ(0, disabled_by_ha_local->getInteger().first); + EXPECT_EQ(0, disabled_by_ha_remote->getInteger().first); + EXPECT_EQ(3, disabled_by_db->getInteger().first); + + state.enableService(NetworkState::Origin::DB_CONNECTION); + state.enableService(NetworkState::Origin::DB_CONNECTION); + + EXPECT_FALSE(state.isServiceEnabled()); + EXPECT_EQ(1, disabled_globally->getInteger().first); + EXPECT_EQ(0, disabled_by_user->getInteger().first); + EXPECT_EQ(0, disabled_by_ha_local->getInteger().first); + EXPECT_EQ(0, disabled_by_ha_remote->getInteger().first); + EXPECT_EQ(1, disabled_by_db->getInteger().first); + + state.enableService(NetworkState::Origin::DB_CONNECTION); + + EXPECT_TRUE(state.isServiceEnabled()); + EXPECT_EQ(0, disabled_globally->getInteger().first); + EXPECT_EQ(0, disabled_by_user->getInteger().first); + EXPECT_EQ(0, disabled_by_ha_local->getInteger().first); + EXPECT_EQ(0, disabled_by_ha_remote->getInteger().first); + EXPECT_EQ(0, disabled_by_db->getInteger().first); +} + // Test invocations. TEST_F(NetworkStateTest, defaultTest) { @@ -749,4 +859,8 @@ TEST_F(NetworkStateTest, multipleDifferentOriginsDelayedEnableServiceTestMultiTh multipleDifferentOriginsDelayedEnableServiceTest(); } +TEST_F(NetworkStateTest, monitoringMetricsUpdatedTest) { + monitoringMetricsUpdatedTest(); +} + } // end of anonymous namespace diff --git a/src/lib/stats/stats_mgr.cc b/src/lib/stats/stats_mgr.cc index b420cabc826..366f03a7504 100644 --- a/src/lib/stats/stats_mgr.cc +++ b/src/lib/stats/stats_mgr.cc @@ -273,6 +273,25 @@ StatsMgr::removeAllInternal() { global_->clear(); } +int64_t +StatsMgr::getInteger(const std::string& name) const { + if (MultiThreadingMgr::instance().getMode()) { + lock_guard lock(*mutex_); + return (getIntegerInternal(name)); + } else { + return (getIntegerInternal(name)); + } +} + +int64_t +StatsMgr::getIntegerInternal(const std::string& name) const { + ObservationPtr existing = getObservationInternal(name); + if (existing) { + return existing->getInteger().first; + } + return static_cast(0); +} + ConstElementPtr StatsMgr::get(const string& name) const { MultiThreadingLock lock(*mutex_); diff --git a/src/lib/stats/stats_mgr.h b/src/lib/stats/stats_mgr.h index f49ab8ab7c5..e9514f7b67c 100644 --- a/src/lib/stats/stats_mgr.h +++ b/src/lib/stats/stats_mgr.h @@ -249,6 +249,11 @@ class StatsMgr : public boost::noncopyable { /// @return number of recorded statistics. size_t count() const; + /// @brief Returns the most recent integer value for a monitoring variable. + /// + /// @return integer value as a 64 bit integer + int64_t getInteger(const std::string& name) const; + /// @brief Returns a single statistic as a JSON structure. /// /// @return JSON structures representing a single statistic @@ -715,6 +720,15 @@ class StatsMgr : public boost::noncopyable { /// @private + /// @brief Returns the most recent integer value for a monitoring variable. + /// + /// Should be called in a thread safe context. + /// + /// @return integer value as a 64 bit integer + int64_t getIntegerInternal(const std::string& name) const; + + /// @private + /// @brief Returns a single statistic as a JSON structure. /// /// Should be called in a thread safe context. diff --git a/src/lib/stats/tests/stats_mgr_unittest.cc b/src/lib/stats/tests/stats_mgr_unittest.cc index 734d134c8dd..22f595e3ac9 100644 --- a/src/lib/stats/tests/stats_mgr_unittest.cc +++ b/src/lib/stats/tests/stats_mgr_unittest.cc @@ -76,6 +76,7 @@ TEST_F(StatsMgrTest, integerStat) { isc::util::clockToText(alpha->getInteger().second) + "\" ] ] }"; EXPECT_EQ(exp, StatsMgr::instance().get("alpha")->str()); + EXPECT_EQ(alpha->getInteger().first, StatsMgr::instance().getInteger("alpha")); } // Test checks whether it's possible to record and later report From e0392c46b69cdddb56a3be195c07e5cb02bf8a9e Mon Sep 17 00:00:00 2001 From: "David W. Hankins" Date: Mon, 22 Jan 2024 08:14:30 -0800 Subject: [PATCH 2/6] Include namespace of origin constants. --- src/lib/dhcpsrv/network_state.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lib/dhcpsrv/network_state.cc b/src/lib/dhcpsrv/network_state.cc index 17942429f15..84b9d0c8aa1 100644 --- a/src/lib/dhcpsrv/network_state.cc +++ b/src/lib/dhcpsrv/network_state.cc @@ -183,11 +183,11 @@ class NetworkStateImpl : public boost::enable_shared_from_this isc::stats::StatsMgr::instance().setValue("disabled-globally", static_cast(globally_disabled_)); isc::stats::StatsMgr::instance().setValue("disabled-by-user", - static_cast(disabled_by_origin_.contains(USER_COMMAND))); + static_cast(disabled_by_origin_.contains(NetworkState::USER_COMMAND))); isc::stats::StatsMgr::instance().setValue("disabled-by-ha-local", - static_cast(disabled_by_origin_.contains(HA_LOCAL_COMMAND))); + static_cast(disabled_by_origin_.contains(NetworkState::HA_LOCAL_COMMAND))); isc::stats::StatsMgr::instance().setValue("disabled-by-ha-remote", - static_cast(disabled_by_origin_.contains(HA_REMOTE_COMMAND))); + static_cast(disabled_by_origin_.contains(NetworkState::HA_REMOTE_COMMAND))); // Expose the disabled-by-db-connection counter by direct value. isc::stats::StatsMgr::instance().setValue("disabled-by-db", static_cast(disabled_by_db_connection_)); From 323435db2b1f4852555a27995302a6616ab786ec Mon Sep 17 00:00:00 2001 From: "David W. Hankins" Date: Thu, 25 Jan 2024 15:54:50 -0800 Subject: [PATCH 3/6] Use `std::unordered_set<>.count()` instead of `.contains()`. The contains() method was added in C++20, while count() will always return 0 or 1 and goes back to C++11. --- src/lib/dhcpsrv/network_state.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lib/dhcpsrv/network_state.cc b/src/lib/dhcpsrv/network_state.cc index 84b9d0c8aa1..bb63ccdc5f7 100644 --- a/src/lib/dhcpsrv/network_state.cc +++ b/src/lib/dhcpsrv/network_state.cc @@ -183,11 +183,11 @@ class NetworkStateImpl : public boost::enable_shared_from_this isc::stats::StatsMgr::instance().setValue("disabled-globally", static_cast(globally_disabled_)); isc::stats::StatsMgr::instance().setValue("disabled-by-user", - static_cast(disabled_by_origin_.contains(NetworkState::USER_COMMAND))); + static_cast(disabled_by_origin_.count(NetworkState::USER_COMMAND))); isc::stats::StatsMgr::instance().setValue("disabled-by-ha-local", - static_cast(disabled_by_origin_.contains(NetworkState::HA_LOCAL_COMMAND))); + static_cast(disabled_by_origin_.count(NetworkState::HA_LOCAL_COMMAND))); isc::stats::StatsMgr::instance().setValue("disabled-by-ha-remote", - static_cast(disabled_by_origin_.contains(NetworkState::HA_REMOTE_COMMAND))); + static_cast(disabled_by_origin_.count(NetworkState::HA_REMOTE_COMMAND))); // Expose the disabled-by-db-connection counter by direct value. isc::stats::StatsMgr::instance().setValue("disabled-by-db", static_cast(disabled_by_db_connection_)); From 34e3453c22c3470b846c1435d69638559079e1b5 Mon Sep 17 00:00:00 2001 From: "David W. Hankins" Date: Thu, 25 Jan 2024 15:56:38 -0800 Subject: [PATCH 4/6] Fix compilation errors. --- src/bin/dhcp4/tests/dora_unittest.cc | 2 +- .../dhcpsrv/tests/network_state_unittest.cc | 25 ++++++++++--------- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/src/bin/dhcp4/tests/dora_unittest.cc b/src/bin/dhcp4/tests/dora_unittest.cc index ec9e6476642..ae1b7848bab 100644 --- a/src/bin/dhcp4/tests/dora_unittest.cc +++ b/src/bin/dhcp4/tests/dora_unittest.cc @@ -2504,7 +2504,7 @@ DORATest::statisticsDORA() { // Ok, let's check the statistics. using namespace isc::stats; StatsMgr& mgr = StatsMgr::instance(); - ObservationPtr pkt4_dhcp_disabled = mgr.getObservatoin("pkt4-dhcp-disabled"); + ObservationPtr pkt4_dhcp_disabled = mgr.getObservation("pkt4-dhcp-disabled"); ObservationPtr pkt4_raw_received = mgr.getObservation("pkt4-raw-received"); ObservationPtr pkt4_received = mgr.getObservation("pkt4-received"); ObservationPtr pkt4_discover_received = mgr.getObservation("pkt4-discover-received"); diff --git a/src/lib/dhcpsrv/tests/network_state_unittest.cc b/src/lib/dhcpsrv/tests/network_state_unittest.cc index 6a3a0b83977..f94ebf4a944 100644 --- a/src/lib/dhcpsrv/tests/network_state_unittest.cc +++ b/src/lib/dhcpsrv/tests/network_state_unittest.cc @@ -12,6 +12,7 @@ #include #include #include +#include using namespace isc; using namespace isc::asiolink; @@ -631,7 +632,7 @@ NetworkStateTest::monitoringMetricsUpdatedTest() { EXPECT_EQ(0, disabled_by_db->getInteger().first); // Disable by user. - state.disableService(NetworkState::Origin::USER_COMMAND); + state.disableService(NetworkState::USER_COMMAND); EXPECT_FALSE(state.isServiceEnabled()); EXPECT_EQ(1, disabled_globally->getInteger().first); @@ -641,7 +642,7 @@ NetworkStateTest::monitoringMetricsUpdatedTest() { EXPECT_EQ(0, disabled_by_db->getInteger().first); // Disable by ha local. - state.disableService(NetworkState::Origin::HA_LOCAL_COMMAND); + state.disableService(NetworkState::HA_LOCAL_COMMAND); EXPECT_FALSE(state.isServiceEnabled()); EXPECT_EQ(1, disabled_globally->getInteger().first); @@ -651,7 +652,7 @@ NetworkStateTest::monitoringMetricsUpdatedTest() { EXPECT_EQ(0, disabled_by_db->getInteger().first); // Disable by ha remote. - state.disableService(NetworkState::Origin::HA_REMOTE_COMMAND); + state.disableService(NetworkState::HA_REMOTE_COMMAND); EXPECT_FALSE(state.isServiceEnabled()); EXPECT_EQ(1, disabled_globally->getInteger().first); @@ -661,7 +662,7 @@ NetworkStateTest::monitoringMetricsUpdatedTest() { EXPECT_EQ(0, disabled_by_db->getInteger().first); // Enable by user. - state.enableService(NetworkState::Origin::USER_COMMAND); + state.enableService(NetworkState::USER_COMMAND); EXPECT_FALSE(state.isServiceEnabled()); EXPECT_EQ(1, disabled_globally->getInteger().first); @@ -671,8 +672,8 @@ NetworkStateTest::monitoringMetricsUpdatedTest() { EXPECT_EQ(0, disabled_by_db->getInteger().first); // Enable by ha. - state.enableService(NetworkState::Origin::HA_LOCAL_COMMAND); - state.enableService(NetworkState::Origin::HA_REMOTE_COMMAND); + state.enableService(NetworkState::HA_LOCAL_COMMAND); + state.enableService(NetworkState::HA_REMOTE_COMMAND); EXPECT_TRUE(state.isServiceEnabled()); EXPECT_EQ(0, disabled_globally->getInteger().first); @@ -682,9 +683,9 @@ NetworkStateTest::monitoringMetricsUpdatedTest() { EXPECT_EQ(0, disabled_by_db->getInteger().first); // DB connection disables are a counter. - state.disableService(NetworkState::Origin::DB_CONNECTION); - state.disableService(NetworkState::Origin::DB_CONNECTION); - state.disableService(NetworkState::Origin::DB_CONNECTION); + state.disableService(NetworkState::DB_CONNECTION); + state.disableService(NetworkState::DB_CONNECTION); + state.disableService(NetworkState::DB_CONNECTION); EXPECT_FALSE(state.isServiceEnabled()); EXPECT_EQ(1, disabled_globally->getInteger().first); @@ -693,8 +694,8 @@ NetworkStateTest::monitoringMetricsUpdatedTest() { EXPECT_EQ(0, disabled_by_ha_remote->getInteger().first); EXPECT_EQ(3, disabled_by_db->getInteger().first); - state.enableService(NetworkState::Origin::DB_CONNECTION); - state.enableService(NetworkState::Origin::DB_CONNECTION); + state.enableService(NetworkState::DB_CONNECTION); + state.enableService(NetworkState::DB_CONNECTION); EXPECT_FALSE(state.isServiceEnabled()); EXPECT_EQ(1, disabled_globally->getInteger().first); @@ -703,7 +704,7 @@ NetworkStateTest::monitoringMetricsUpdatedTest() { EXPECT_EQ(0, disabled_by_ha_remote->getInteger().first); EXPECT_EQ(1, disabled_by_db->getInteger().first); - state.enableService(NetworkState::Origin::DB_CONNECTION); + state.enableService(NetworkState::DB_CONNECTION); EXPECT_TRUE(state.isServiceEnabled()); EXPECT_EQ(0, disabled_globally->getInteger().first); From e7269217e71f8cc8766283d7a0c4ad76cae83a9d Mon Sep 17 00:00:00 2001 From: "David W. Hankins" Date: Tue, 30 Jan 2024 13:34:36 -0800 Subject: [PATCH 5/6] Keep the StatsMgr instance on the stack. --- src/lib/dhcpsrv/network_state.cc | 36 +++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/src/lib/dhcpsrv/network_state.cc b/src/lib/dhcpsrv/network_state.cc index bb63ccdc5f7..cbf1304f724 100644 --- a/src/lib/dhcpsrv/network_state.cc +++ b/src/lib/dhcpsrv/network_state.cc @@ -180,16 +180,36 @@ class NetworkStateImpl : public boost::enable_shared_from_this /// /// A 0 value is used for false, 1 for true. void updateStats() { - isc::stats::StatsMgr::instance().setValue("disabled-globally", + isc::stats::StatsMgr& stats_mgr = isc::stats::StatsMgr::instance(); + + stats_mgr.setValue("disabled-globally", static_cast(globally_disabled_)); - isc::stats::StatsMgr::instance().setValue("disabled-by-user", - static_cast(disabled_by_origin_.count(NetworkState::USER_COMMAND))); - isc::stats::StatsMgr::instance().setValue("disabled-by-ha-local", - static_cast(disabled_by_origin_.count(NetworkState::HA_LOCAL_COMMAND))); - isc::stats::StatsMgr::instance().setValue("disabled-by-ha-remote", - static_cast(disabled_by_origin_.count(NetworkState::HA_REMOTE_COMMAND))); + + bool user = false, ha_local = false, ha_remote = false; + for (auto origin : disabled_by_origin_) { + switch (origin) { + case NetworkState::USER_COMMAND: + user = true; + break; + default: + if ((NetworkState::HA_LOCAL_COMMAND <= origin) && + (origin < NetworkState::HA_REMOTE_COMMAND)) { + ha_local = true; + } else if ((NetworkState::HA_LOCAL_COMMAND <= origin) && + (origin < NetworkState::DB_CONNECTION)) { + ha_remote = true; + } + break; + } + } + stats_mgr.setValue("disabled-by-user", static_cast(user)); + stats_mgr.setValue("disabled-by-ha-local", + static_cast(ha_local)); + stats_mgr.setValue("disabled-by-ha-remote", + static_cast(ha_remote)); + // Expose the disabled-by-db-connection counter by direct value. - isc::stats::StatsMgr::instance().setValue("disabled-by-db", + stats_mgr.setValue("disabled-by-db", static_cast(disabled_by_db_connection_)); } }; From 6e0ce9da7d73f95b53063e45f95aa1122363e03f Mon Sep 17 00:00:00 2001 From: "David W. Hankins" Date: Tue, 30 Jan 2024 13:34:51 -0800 Subject: [PATCH 6/6] Reference gitlab in Changelog. --- ChangeLog | 1 + 1 file changed, 1 insertion(+) diff --git a/ChangeLog b/ChangeLog index 020af9f455b..744d8c37924 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,7 @@ are not counted by any metric. While disabled, a WARN log is emitted every DISABLE_WARN_PACKET{4|6}(100) dropped packets, in addition to the existing DEBUG log. + (Gitlab #3228) 2200. [func] piotrek Kea now supports new DHCPv4 option code 121, Classless Static