Skip to content

Commit 17978c2

Browse files
mattmattoxclaude
andcommitted
feat(network): add IP forwarding monitor to detect disabled ip_forward (Issue #13)
- New network-ip-forwarding monitor reads /proc/sys/net/ipv4/ip_forward and /proc/sys/net/ipv6/conf/all/forwarding to detect when IP forwarding is disabled, which silently breaks Kubernetes overlay networking - Optional per-interface forwarding checks via glob pattern - Configurable ProcPath for containerized deployments (/host/proc) - IPv4 disabled = Error severity, IPv6 disabled = Warning severity - Comprehensive test suite with 13 test functions covering all scenarios - Helm chart: monitor config, PrometheusRule alert, configurable values Generated with Claude Code Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 8d555e8 commit 17978c2

File tree

5 files changed

+1117
-0
lines changed

5 files changed

+1117
-0
lines changed

helm/node-doctor/templates/configmap.yaml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,20 @@ data:
7979
ioHealthCheck: {{ .Values.monitors.disk.ioHealthCheck }}
8080
{{- end }}
8181
82+
{{- if .Values.monitors.ipForwarding.enabled }}
83+
# IP Forwarding Monitor - Detects disabled ip_forward
84+
- name: ip-forwarding-check
85+
type: network-ip-forwarding
86+
enabled: true
87+
interval: {{ .Values.monitors.ipForwarding.interval }}
88+
timeout: {{ .Values.monitors.ipForwarding.timeout }}
89+
config:
90+
checkIPv4: {{ .Values.monitors.ipForwarding.checkIPv4 }}
91+
checkIPv6: {{ .Values.monitors.ipForwarding.checkIPv6 }}
92+
checkPerInterface: {{ .Values.monitors.ipForwarding.checkPerInterface }}
93+
procPath: /host/proc
94+
{{- end }}
95+
8296
{{- if .Values.overlayTest.enabled }}
8397
# CNI Connectivity Monitor - Tests overlay network connectivity
8498
# Uses overlay-test pods for accurate CNI testing

helm/node-doctor/templates/prometheusrule.yaml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,23 @@ spec:
103103
{{- with .Values.prometheusRule.critical.readOnlyFilesystem.annotations }}
104104
{{- toYaml . | nindent 12 }}
105105
{{- end }}
106+
107+
- alert: NodeDoctorIPForwardingDisabled
108+
expr: |
109+
node_doctor_monitor_condition_status{condition_type="IPForwardingDisabled"} == 1
110+
for: {{ .Values.prometheusRule.critical.ipForwardingDisabled.for }}
111+
labels:
112+
severity: critical
113+
component: network
114+
{{- with .Values.prometheusRule.critical.ipForwardingDisabled.labels }}
115+
{{- toYaml . | nindent 12 }}
116+
{{- end }}
117+
annotations:
118+
summary: "IP forwarding disabled on {{`{{ $labels.node }}`}}"
119+
description: "Node {{`{{ $labels.node }}`}} has IP forwarding disabled. Kubernetes pod networking will not function."
120+
{{- with .Values.prometheusRule.critical.ipForwardingDisabled.annotations }}
121+
{{- toYaml . | nindent 12 }}
122+
{{- end }}
106123
{{- end }}
107124

108125
{{- if .Values.prometheusRule.warning.enabled }}

helm/node-doctor/values.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,10 @@ prometheusRule:
167167
for: 1m
168168
labels: {}
169169
annotations: {}
170+
ipForwardingDisabled:
171+
for: 2m
172+
labels: {}
173+
annotations: {}
170174

171175
# Warning alerts - should be investigated soon
172176
warning:
@@ -342,6 +346,14 @@ monitors:
342346
readOnlyDetection: true
343347
ioHealthCheck: true
344348

349+
ipForwarding:
350+
enabled: true
351+
interval: 30s
352+
timeout: 5s
353+
checkIPv4: true
354+
checkIPv6: true
355+
checkPerInterface: false
356+
345357
# Exporters configuration
346358
exporters:
347359
kubernetes:
Lines changed: 327 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,327 @@
1+
// Package network provides network health monitoring capabilities.
2+
package network
3+
4+
import (
5+
"context"
6+
"fmt"
7+
"os"
8+
"path/filepath"
9+
"slices"
10+
"strings"
11+
12+
"github.com/supporttools/node-doctor/pkg/monitors"
13+
"github.com/supporttools/node-doctor/pkg/types"
14+
)
15+
16+
const (
17+
// Default configuration values for IP forwarding monitor
18+
defaultCheckIPv4 = true
19+
defaultCheckIPv6 = true
20+
defaultCheckPerInterface = false
21+
defaultProcPath = "/proc"
22+
23+
// Proc filesystem paths (relative to ProcPath)
24+
ipv4ForwardPath = "sys/net/ipv4/ip_forward"
25+
ipv6ForwardPath = "sys/net/ipv6/conf/all/forwarding"
26+
perInterfacePattern = "sys/net/ipv4/conf/*/forwarding"
27+
)
28+
29+
// IPForwardingConfig holds the configuration for the IP forwarding monitor.
30+
type IPForwardingConfig struct {
31+
// CheckIPv4 enables checking /proc/sys/net/ipv4/ip_forward.
32+
CheckIPv4 bool
33+
// CheckIPv6 enables checking /proc/sys/net/ipv6/conf/all/forwarding.
34+
CheckIPv6 bool
35+
// CheckPerInterface enables checking per-interface forwarding settings.
36+
CheckPerInterface bool
37+
// Interfaces limits per-interface checks to these specific interfaces.
38+
// Empty means check all interfaces found via glob.
39+
Interfaces []string
40+
// ProcPath is the base path for the proc filesystem.
41+
// Defaults to "/proc", but can be set to "/host/proc" for containerized deployments.
42+
ProcPath string
43+
}
44+
45+
// IPForwardingMonitor monitors IP forwarding settings required for Kubernetes networking.
46+
type IPForwardingMonitor struct {
47+
name string
48+
config *IPForwardingConfig
49+
50+
*monitors.BaseMonitor
51+
}
52+
53+
// init registers the IP forwarding monitor with the monitor registry.
54+
func init() {
55+
monitors.MustRegister(monitors.MonitorInfo{
56+
Type: "network-ip-forwarding",
57+
Factory: NewIPForwardingMonitor,
58+
Validator: ValidateIPForwardingConfig,
59+
Description: "Monitors IP forwarding settings required for Kubernetes networking",
60+
DefaultConfig: &types.MonitorConfig{
61+
Name: "ip-forwarding-check",
62+
Type: "network-ip-forwarding",
63+
Enabled: true,
64+
IntervalString: "30s",
65+
TimeoutString: "5s",
66+
Config: map[string]any{
67+
"checkIPv4": true,
68+
"checkIPv6": true,
69+
"checkPerInterface": false,
70+
"procPath": "/proc",
71+
},
72+
},
73+
})
74+
}
75+
76+
// NewIPForwardingMonitor creates a new IP forwarding monitor instance.
77+
func NewIPForwardingMonitor(ctx context.Context, config types.MonitorConfig) (types.Monitor, error) {
78+
ipfConfig, err := parseIPForwardingConfig(config.Config)
79+
if err != nil {
80+
return nil, fmt.Errorf("failed to parse ip forwarding config: %w", err)
81+
}
82+
83+
baseMonitor, err := monitors.NewBaseMonitor(config.Name, config.Interval, config.Timeout)
84+
if err != nil {
85+
return nil, fmt.Errorf("failed to create base monitor: %w", err)
86+
}
87+
88+
monitor := &IPForwardingMonitor{
89+
name: config.Name,
90+
config: ipfConfig,
91+
BaseMonitor: baseMonitor,
92+
}
93+
94+
if err := baseMonitor.SetCheckFunc(monitor.checkIPForwarding); err != nil {
95+
return nil, fmt.Errorf("failed to set check function: %w", err)
96+
}
97+
98+
return monitor, nil
99+
}
100+
101+
// parseIPForwardingConfig parses the IP forwarding monitor configuration from a map.
102+
func parseIPForwardingConfig(configMap map[string]any) (*IPForwardingConfig, error) {
103+
config := &IPForwardingConfig{
104+
CheckIPv4: defaultCheckIPv4,
105+
CheckIPv6: defaultCheckIPv6,
106+
CheckPerInterface: defaultCheckPerInterface,
107+
ProcPath: defaultProcPath,
108+
}
109+
110+
if configMap == nil {
111+
return config, nil
112+
}
113+
114+
if v, ok := configMap["checkIPv4"]; ok {
115+
boolVal, ok := v.(bool)
116+
if !ok {
117+
return nil, fmt.Errorf("checkIPv4 must be a boolean, got %T", v)
118+
}
119+
config.CheckIPv4 = boolVal
120+
}
121+
122+
if v, ok := configMap["checkIPv6"]; ok {
123+
boolVal, ok := v.(bool)
124+
if !ok {
125+
return nil, fmt.Errorf("checkIPv6 must be a boolean, got %T", v)
126+
}
127+
config.CheckIPv6 = boolVal
128+
}
129+
130+
if v, ok := configMap["checkPerInterface"]; ok {
131+
boolVal, ok := v.(bool)
132+
if !ok {
133+
return nil, fmt.Errorf("checkPerInterface must be a boolean, got %T", v)
134+
}
135+
config.CheckPerInterface = boolVal
136+
}
137+
138+
if v, ok := configMap["interfaces"]; ok {
139+
switch val := v.(type) {
140+
case []any:
141+
for _, item := range val {
142+
strVal, ok := item.(string)
143+
if !ok {
144+
return nil, fmt.Errorf("interfaces must be a list of strings, got %T element", item)
145+
}
146+
config.Interfaces = append(config.Interfaces, strVal)
147+
}
148+
case []string:
149+
config.Interfaces = val
150+
default:
151+
return nil, fmt.Errorf("interfaces must be a list of strings, got %T", v)
152+
}
153+
}
154+
155+
if v, ok := configMap["procPath"]; ok {
156+
strVal, ok := v.(string)
157+
if !ok {
158+
return nil, fmt.Errorf("procPath must be a string, got %T", v)
159+
}
160+
config.ProcPath = strVal
161+
}
162+
163+
return config, nil
164+
}
165+
166+
// ValidateIPForwardingConfig validates the IP forwarding monitor configuration.
167+
func ValidateIPForwardingConfig(config types.MonitorConfig) error {
168+
_, err := parseIPForwardingConfig(config.Config)
169+
return err
170+
}
171+
172+
// checkIPForwarding performs the IP forwarding health check.
173+
func (m *IPForwardingMonitor) checkIPForwarding(ctx context.Context) (*types.Status, error) {
174+
status := types.NewStatus(m.name)
175+
176+
var disabledSettings []string
177+
178+
// Check IPv4 forwarding
179+
if m.config.CheckIPv4 {
180+
ipv4Path := filepath.Join(m.config.ProcPath, ipv4ForwardPath)
181+
enabled, err := readForwardingSetting(ipv4Path)
182+
if err != nil {
183+
status.AddEvent(types.NewEvent(
184+
types.EventError,
185+
"IPForwardingReadError",
186+
fmt.Sprintf("Failed to read IPv4 forwarding setting from %s: %v", ipv4Path, err),
187+
))
188+
disabledSettings = append(disabledSettings, "ipv4.ip_forward (unreadable)")
189+
} else if !enabled {
190+
disabledSettings = append(disabledSettings, "net.ipv4.ip_forward=0")
191+
status.AddEvent(types.NewEvent(
192+
types.EventError,
193+
"IPv4ForwardingDisabled",
194+
fmt.Sprintf("IPv4 forwarding is disabled (net.ipv4.ip_forward=0). "+
195+
"Kubernetes pod networking will not function. "+
196+
"Remediate with: sysctl -w net.ipv4.ip_forward=1"),
197+
))
198+
}
199+
}
200+
201+
// Check IPv6 forwarding
202+
if m.config.CheckIPv6 {
203+
ipv6Path := filepath.Join(m.config.ProcPath, ipv6ForwardPath)
204+
enabled, err := readForwardingSetting(ipv6Path)
205+
if err != nil {
206+
// IPv6 may not be available on all systems, treat as warning
207+
status.AddEvent(types.NewEvent(
208+
types.EventWarning,
209+
"IPForwardingReadError",
210+
fmt.Sprintf("Failed to read IPv6 forwarding setting from %s: %v", ipv6Path, err),
211+
))
212+
} else if !enabled {
213+
disabledSettings = append(disabledSettings, "net.ipv6.conf.all.forwarding=0")
214+
status.AddEvent(types.NewEvent(
215+
types.EventWarning,
216+
"IPv6ForwardingDisabled",
217+
fmt.Sprintf("IPv6 forwarding is disabled (net.ipv6.conf.all.forwarding=0). "+
218+
"IPv6 pod networking may not function. "+
219+
"Remediate with: sysctl -w net.ipv6.conf.all.forwarding=1"),
220+
))
221+
}
222+
}
223+
224+
// Check per-interface forwarding
225+
if m.config.CheckPerInterface {
226+
ifaceDisabled := m.checkPerInterfaceForwarding(status)
227+
disabledSettings = append(disabledSettings, ifaceDisabled...)
228+
}
229+
230+
// Set condition based on results
231+
if len(disabledSettings) > 0 {
232+
status.AddCondition(types.NewCondition(
233+
"IPForwardingDisabled",
234+
types.ConditionTrue,
235+
"ForwardingDisabled",
236+
fmt.Sprintf("IP forwarding disabled: %s", strings.Join(disabledSettings, ", ")),
237+
))
238+
} else {
239+
status.AddCondition(types.NewCondition(
240+
"IPForwardingDisabled",
241+
types.ConditionFalse,
242+
"ForwardingEnabled",
243+
"All checked IP forwarding settings are enabled",
244+
))
245+
status.AddEvent(types.NewEvent(
246+
types.EventInfo,
247+
"IPForwardingHealthy",
248+
"All IP forwarding settings are correctly enabled for Kubernetes networking",
249+
))
250+
}
251+
252+
return status, nil
253+
}
254+
255+
// checkPerInterfaceForwarding checks per-interface IPv4 forwarding settings.
256+
// Returns a list of disabled settings descriptions.
257+
func (m *IPForwardingMonitor) checkPerInterfaceForwarding(status *types.Status) []string {
258+
var disabled []string
259+
260+
pattern := filepath.Join(m.config.ProcPath, perInterfacePattern)
261+
matches, err := filepath.Glob(pattern)
262+
if err != nil {
263+
status.AddEvent(types.NewEvent(
264+
types.EventWarning,
265+
"IPForwardingGlobError",
266+
fmt.Sprintf("Failed to glob per-interface forwarding files: %v", err),
267+
))
268+
return nil
269+
}
270+
271+
for _, match := range matches {
272+
// Extract interface name from path
273+
ifaceName := extractInterfaceName(match)
274+
275+
// Skip loopback and special interfaces
276+
if ifaceName == "lo" || ifaceName == "all" || ifaceName == "default" || ifaceName == "" {
277+
continue
278+
}
279+
280+
// If specific interfaces are configured, filter
281+
if len(m.config.Interfaces) > 0 && !slices.Contains(m.config.Interfaces, ifaceName) {
282+
continue
283+
}
284+
285+
enabled, err := readForwardingSetting(match)
286+
if err != nil {
287+
continue // Skip unreadable interfaces
288+
}
289+
290+
if !enabled {
291+
setting := fmt.Sprintf("net.ipv4.conf.%s.forwarding=0", ifaceName)
292+
disabled = append(disabled, setting)
293+
status.AddEvent(types.NewEvent(
294+
types.EventWarning,
295+
"InterfaceForwardingDisabled",
296+
fmt.Sprintf("IPv4 forwarding disabled on interface %s. "+
297+
"Remediate with: sysctl -w net.ipv4.conf.%s.forwarding=1", ifaceName, ifaceName),
298+
))
299+
}
300+
}
301+
302+
return disabled
303+
}
304+
305+
// extractInterfaceName extracts the interface name from a per-interface proc path.
306+
// e.g., "/proc/sys/net/ipv4/conf/eth0/forwarding" -> "eth0"
307+
func extractInterfaceName(path string) string {
308+
parts := strings.Split(path, string(os.PathSeparator))
309+
for i, part := range parts {
310+
if part == "conf" && i+1 < len(parts) {
311+
return parts[i+1]
312+
}
313+
}
314+
return ""
315+
}
316+
317+
// readForwardingSetting reads a forwarding sysctl file and returns whether forwarding is enabled.
318+
func readForwardingSetting(path string) (bool, error) {
319+
data, err := os.ReadFile(path)
320+
if err != nil {
321+
return false, fmt.Errorf("failed to read %s: %w", path, err)
322+
}
323+
324+
value := strings.TrimSpace(string(data))
325+
return value == "1", nil
326+
}
327+

0 commit comments

Comments
 (0)