@@ -13,6 +13,7 @@ import (
1313 "net/http"
1414 "net/url"
1515 "os"
16+ "os/exec"
1617 "path/filepath"
1718 "sort"
1819 "strings"
@@ -1030,6 +1031,7 @@ func readRke2ServerFromConfig(configDir string) (string, []string, error) {
10301031 v := viper .New ()
10311032 usedPaths := []string {}
10321033 readCount := 0
1034+ v .SetConfigType ("yaml" ) // Required before MergeConfig with readers
10331035
10341036 readAndMerge := func (path string ) error {
10351037 data , err := os .ReadFile (path )
@@ -1140,36 +1142,24 @@ func CollectKubeVipHealth() (*KubeVipHealth, error) {
11401142 Msg ("Auto-detected kube-vip floating IPs from pods" )
11411143 }
11421144
1143- floatingIPs := uniqueStrings (append (K8sHealthConfig .K8s .Floating_ips , detectedFloatingIPs ... ))
1145+ // Prefer auto-detected floating IPs from kube-vip pods; fall back to config only if none found.
1146+ floatingIPs := detectedFloatingIPs
1147+ if len (floatingIPs ) == 0 && len (K8sHealthConfig .K8s .Floating_ips ) > 0 {
1148+ floatingIPs = append (floatingIPs , K8sHealthConfig .K8s .Floating_ips ... )
1149+ }
1150+ floatingIPs = uniqueStrings (floatingIPs )
11441151
11451152 if health .PodsAvailable {
11461153 alarmCheckUp ("kube_vip_pods" , "Kube-VIP pods detected in kube-system." , false )
11471154 if len (floatingIPs ) > 0 {
11481155 for _ , floatingIp := range floatingIPs {
11491156 check := FloatingIPCheck {IP : floatingIp , TestType : "kube-vip" }
1150- pinger , err := probing .NewPinger (floatingIp )
1151- if err != nil {
1152- log .Error ().
1153- Str ("component" , "k8sHealth" ).
1154- Str ("operation" , "collect_kube_vip_health" ).
1155- Str ("floating_ip" , floatingIp ).
1156- Err (err ).
1157- Msg ("Error creating pinger for Kube-VIP IP" )
1158- check .IsAvailable = false
1159- // Optionally add a message to the check or health.Error
1157+ ok , errMsg := pingFloatingIP (floatingIp )
1158+ check .IsAvailable = ok
1159+ if ok {
1160+ alarmCheckUp ("floating_ip_kube_vip_" + floatingIp , fmt .Sprintf ("Kube-VIP Floating IP %s is reachable." , floatingIp ), false )
11601161 } else {
1161- // Use unprivileged ping to avoid raw socket permission issues
1162- pinger .SetPrivileged (false )
1163- pinger .Count = 1
1164- pinger .Timeout = 3 * time .Second // Reduced timeout for quicker checks
1165- err = pinger .Run ()
1166- if err != nil {
1167- check .IsAvailable = false
1168- alarmCheckDown ("floating_ip_kube_vip_" + floatingIp , fmt .Sprintf ("Kube-VIP Floating IP %s is not reachable: %v" , floatingIp , err ), false , "" , "" )
1169- } else {
1170- check .IsAvailable = true
1171- alarmCheckUp ("floating_ip_kube_vip_" + floatingIp , fmt .Sprintf ("Kube-VIP Floating IP %s is reachable." , floatingIp ), false )
1172- }
1162+ alarmCheckDown ("floating_ip_kube_vip_" + floatingIp , fmt .Sprintf ("Kube-VIP Floating IP %s is not reachable: %s" , floatingIp , errMsg ), false , "" , "" )
11731163 }
11741164 health .FloatingIPChecks = append (health .FloatingIPChecks , check )
11751165 }
@@ -1784,6 +1774,46 @@ func CollectRKE2Information() *RKE2Info {
17841774 return info
17851775}
17861776
1777+ // pingFloatingIP tries multiple strategies to avoid false negatives due to permission or firewall rules.
1778+ // Order: privileged ICMP -> unprivileged UDP -> system ping binary.
1779+ func pingFloatingIP (ip string ) (bool , string ) {
1780+ tryPing := func (privileged bool ) (bool , string ) {
1781+ pinger , err := probing .NewPinger (ip )
1782+ if err != nil {
1783+ return false , err .Error ()
1784+ }
1785+ pinger .Count = 1
1786+ pinger .Timeout = 3 * time .Second
1787+ pinger .SetPrivileged (privileged )
1788+ if err := pinger .Run (); err != nil {
1789+ return false , err .Error ()
1790+ }
1791+ if pinger .Statistics ().PacketsRecv > 0 {
1792+ return true , ""
1793+ }
1794+ return false , "no packets received"
1795+ }
1796+
1797+ // 1) Try privileged raw ICMP
1798+ if ok , errMsg := tryPing (true ); ok {
1799+ return true , ""
1800+ } else if ! strings .Contains (errMsg , "ermission" ) { // if not a permission issue, keep error but still try fallback
1801+ // proceed but remember the error
1802+ }
1803+
1804+ // 2) Try unprivileged UDP ping
1805+ if ok , errMsg := tryPing (false ); ok {
1806+ return true , ""
1807+ } else {
1808+ // 3) Fallback to system ping (setuid helper)
1809+ cmd := exec .Command ("ping" , "-c" , "1" , "-W" , "1" , ip )
1810+ if err := cmd .Run (); err == nil {
1811+ return true , ""
1812+ }
1813+ return false , errMsg
1814+ }
1815+ }
1816+
17871817// isK8sAlarmEnabled returns the effective alarm toggle, defaulting to the global
17881818// alarm setting when k8sHealth-specific config is not provided.
17891819func isK8sAlarmEnabled () bool {
0 commit comments