Skip to content

Commit aba49a9

Browse files
Aprazoraauren
authored andcommitted
fix(NSC): harden Network Services Controller against panics, races, and sync errors
This combines five defensive fixes in the Network Services Controller: 1. shuffle(): check rand.Int error before dereferencing result - rand.Int returns (nil, err) on failure, but the result was dereferenced before the error check, causing a nil panic 2. NodePort healthcheck: add RWMutex to protect shared maps - UpdateServicesInfo writes serviceInfoMap/endpointsInfoMap from the sync goroutine while HTTP handlers read concurrently 3. setupIpvsFirewall: use continue instead of return in dual-stack loop - return nil after clearing one IP family's chain skipped the second family entirely on dual-stack nodes 4. setupMangleTableRule/cleanupMangleTableRule: add nil check for ParseIP - net.ParseIP result was used without nil check, causing panic on malformed IP strings from service annotations 5. synctypeIpvs: track errors across both sync steps for heartbeat - err from syncIpvsServices was overwritten by syncHairpinIptablesRules, masking IPVS failures from the health check system
1 parent 01f8216 commit aba49a9

File tree

2 files changed

+21
-3
lines changed

2 files changed

+21
-3
lines changed

pkg/controllers/proxy/network_services_controller.go

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -383,15 +383,21 @@ func (nsc *NetworkServicesController) Run(healthChan chan<- *healthcheck.Control
383383
// and we don't want to duplicate the effort, so this is a slimmer version of doSync()
384384
klog.V(1).Info("Performing requested sync of ipvs services")
385385
nsc.mu.Lock()
386+
var syncErrors bool
386387
err = nsc.syncIpvsServices(nsc.getServiceMap(), nsc.endpointsMap)
387388
if err != nil {
388389
klog.Errorf("error during ipvs sync in network service controller. Error: %v", err)
390+
syncErrors = true
389391
}
390392
err = nsc.syncHairpinIptablesRules()
391393
if err != nil {
392394
klog.Errorf("error syncing hairpin iptables rules: %v", err)
395+
syncErrors = true
393396
}
394397
nsc.mu.Unlock()
398+
if syncErrors {
399+
err = fmt.Errorf("one or more errors during ipvs sync")
400+
}
395401
}
396402
if err == nil {
397403
healthcheck.SendHeartBeat(healthChan, healthcheck.NetworkServicesController)
@@ -482,9 +488,9 @@ func (nsc *NetworkServicesController) setupIpvsFirewall() error {
482488
return fmt.Errorf("failed to run iptables command: %s", err.Error())
483489
}
484490

485-
// config.IpvsPermitAll: true then create INPUT/KUBE-ROUTER-SERVICE Chain creation else return
491+
// config.IpvsPermitAll: true then create INPUT/KUBE-ROUTER-SERVICE Chain creation else skip
486492
if !nsc.ipvsPermitAll {
487-
return nil
493+
continue
488494
}
489495

490496
var comment string
@@ -981,10 +987,11 @@ func parseSchedFlags(value string) schedFlags {
981987
func shuffle(endPoints []endpointSliceInfo) []endpointSliceInfo {
982988
for index1 := range endPoints {
983989
randBitInt, err := rand.Int(rand.Reader, big.NewInt(int64(index1+1)))
984-
index2 := randBitInt.Int64()
985990
if err != nil {
986991
klog.Warningf("unable to get a random int: %v", err)
992+
continue
987993
}
994+
index2 := randBitInt.Int64()
988995
endPoints[index1], endPoints[index2] = endPoints[index2], endPoints[index1]
989996
}
990997
return endPoints
@@ -1561,6 +1568,9 @@ func (nsc *NetworkServicesController) setupMangleTableRule(ip string, protocol s
15611568
var iptablesCmdHandler utils.IPTablesHandler
15621569
tcpMSS := nsc.mtu
15631570
parsedIP := net.ParseIP(ip)
1571+
if parsedIP == nil {
1572+
return fmt.Errorf("invalid IP address: %s", ip)
1573+
}
15641574
if parsedIP.To4() != nil {
15651575
iptablesCmdHandler = nsc.iptablesCmdHandlers[v1.IPv4Protocol]
15661576
tcpMSS -= 2*ipv4.HeaderLen + tcpHeaderMinLen
@@ -1669,6 +1679,9 @@ func (nsc *NetworkServicesController) cleanupMangleTableRule(ip string, protocol
16691679
var iptablesCmdHandler utils.IPTablesHandler
16701680
tcpMSS := nsc.mtu
16711681
parsedIP := net.ParseIP(ip)
1682+
if parsedIP == nil {
1683+
return fmt.Errorf("invalid IP address: %s", ip)
1684+
}
16721685
if parsedIP.To4() != nil {
16731686
iptablesCmdHandler = nsc.iptablesCmdHandlers[v1.IPv4Protocol]
16741687
tcpMSS -= 2*ipv4.HeaderLen + tcpHeaderMinLen

pkg/controllers/proxy/nodeport_healthcheck.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ type serviceHealthCheck struct {
2424
}
2525

2626
type nphcServicesInfo struct {
27+
mu sync.RWMutex
2728
serviceInfoMap serviceInfoMap
2829
endpointsInfoMap endpointSliceInfoMap
2930
}
@@ -36,8 +37,10 @@ type nphcHandler struct {
3637
func (nphc *nodePortHealthCheckController) UpdateServicesInfo(serviceInfoMap serviceInfoMap,
3738
endpointsInfoMap endpointSliceInfoMap) error {
3839
klog.V(1).Info("Running UpdateServicesInfo for NodePort health check")
40+
nphc.mu.Lock()
3941
nphc.serviceInfoMap = serviceInfoMap
4042
nphc.endpointsInfoMap = endpointsInfoMap
43+
nphc.mu.Unlock()
4144

4245
newActiveServices := make(map[int]bool)
4346

@@ -141,7 +144,9 @@ func (nphc *nodePortHealthCheckController) stopHealthCheck(nodePort int) error {
141144
}
142145

143146
func (npHandler *nphcHandler) Handler(w http.ResponseWriter, r *http.Request) {
147+
npHandler.nphc.mu.RLock()
144148
eps := npHandler.nphc.endpointsInfoMap[npHandler.svcHC.serviceID]
149+
npHandler.nphc.mu.RUnlock()
145150
endpointsOnNode := hasActiveEndpoints(eps)
146151

147152
var numActiveEndpoints int8

0 commit comments

Comments
 (0)