44 "errors"
55 "fmt"
66 "strings"
7+ "sync"
78 "time"
89
910 "github.com/Azure/azure-container-networking/common"
@@ -18,12 +19,12 @@ import (
1819const (
1920 reconcileDuration = time .Duration (5 * time .Minute )
2021
21- contextBackground = "BACKGROUND"
22- contextApplyDP = "APPLY-DP"
23- contextAddNetPol = "ADD-NETPOL"
24- contextAddNetPolBootup = "BOOTUP-ADD-NETPOL"
25- contextAddNetPolCIDRPrecaution = "ADD-NETPOL-CIDR -PRECAUTION"
26- contextDelNetPol = "DEL-NETPOL"
22+ contextBackground = "BACKGROUND"
23+ contextApplyDP = "APPLY-DP"
24+ contextAddNetPol = "ADD-NETPOL"
25+ contextAddNetPolBootup = "BOOTUP-ADD-NETPOL"
26+ contextAddNetPolPrecaution = "ADD-NETPOL-PRECAUTION"
27+ contextDelNetPol = "DEL-NETPOL"
2728)
2829
2930var (
@@ -48,6 +49,11 @@ type Config struct {
4849 * policies.PolicyManagerCfg
4950}
5051
52+ type removePolicyInfo struct {
53+ sync.Mutex
54+ previousRemovePolicyIPSetsFailed bool
55+ }
56+
5157type DataPlane struct {
5258 * Config
5359 applyInBackground bool
@@ -64,7 +70,10 @@ type DataPlane struct {
6470 endpointQuery * endpointQuery
6571 applyInfo * applyInfo
6672 netPolQueue * netPolQueue
67- stopChannel <- chan struct {}
73+ // removePolicyInfo tracks when a policy was removed yet had ApplyIPSet failures.
74+ // This field is only relevant for Linux.
75+ removePolicyInfo removePolicyInfo
76+ stopChannel <- chan struct {}
6877}
6978
7079func NewDataPlane (nodeName string , ioShim * common.IOShim , cfg * Config , stopChannel <- chan struct {}) (* DataPlane , error ) {
@@ -335,6 +344,9 @@ func (dp *DataPlane) applyDataPlaneNow(context string) error {
335344 }
336345 klog .Infof ("[DataPlane] [ApplyDataPlane] [%s] finished applying ipsets" , context )
337346
347+ // see comment in RemovePolicy() for why this is here
348+ dp .setRemovePolicyFailure (false )
349+
338350 if dp .applyInBackground {
339351 dp .applyInfo .Lock ()
340352 dp .applyInfo .numBatches = 0
@@ -472,26 +484,17 @@ func (dp *DataPlane) addPolicies(netPols []*policies.NPMNetworkPolicy) error {
472484 }
473485 }
474486
475- if ! util .IsWindowsDP () {
476- for _ , netPol := range netPols {
477- if ! (netPol .HasCIDRRules () && dp .ipsetMgr .PreviousApplyFailed ()) {
478- continue
479- }
480-
481- if inBootupPhase {
482- // this should never happen because bootup phase is for windows, but just in case, we don't want to applyDataplaneNow() or else there will be a deadlock on dp.applyInfo
483- msg := fmt .Sprintf ("[DataPlane] [%s] at risk of improperly applying a CIDR policy which is removed then readded" , contextAddNetPolCIDRPrecaution )
484- klog .Warning (msg )
485- metrics .SendErrorLogAndMetric (util .DaemonDataplaneID , msg )
486- break
487- }
488-
487+ if dp .hadRemovePolicyFailure () {
488+ if inBootupPhase {
489+ // this should never happen because bootup phase is for windows, but just in case, we don't want to applyDataplaneNow() or else there will be a deadlock on dp.applyInfo
490+ msg := fmt .Sprintf ("[DataPlane] [%s] at risk of improperly applying a policy which is removed then readded" , contextAddNetPolPrecaution )
491+ klog .Warning (msg )
492+ metrics .SendErrorLogAndMetric (util .DaemonDataplaneID , msg )
493+ } else {
489494 // prevent #2977
490- if err := dp .applyDataPlaneNow (contextAddNetPolCIDRPrecaution ); err != nil {
495+ if err := dp .applyDataPlaneNow (contextAddNetPolPrecaution ); err != nil {
491496 return err // nolint:wrapcheck // unnecessary to wrap error since the provided context is included in the error
492497 }
493-
494- break
495498 }
496499 }
497500
@@ -531,6 +534,9 @@ func (dp *DataPlane) addPolicies(netPols []*policies.NPMNetworkPolicy) error {
531534 }
532535 klog .Infof ("[DataPlane] [%s] finished applying ipsets" , contextAddNetPolBootup )
533536
537+ // see comment in RemovePolicy() for why this is here
538+ dp .setRemovePolicyFailure (false )
539+
534540 dp .applyInfo .numBatches = 0
535541 }
536542
@@ -627,7 +633,16 @@ func (dp *DataPlane) RemovePolicy(policyKey string) error {
627633 return err
628634 }
629635
630- return dp .applyDataPlaneNow (contextApplyDP )
636+ if err := dp .applyDataPlaneNow (contextDelNetPol ); err != nil {
637+ // Failed to apply IPSets while removing this policy.
638+ // Consider this removepolicy call a failure until apply IPSets is successful.
639+ // Related to #2977
640+ klog .Info ("[DataPlane] remove policy has failed to apply ipsets. setting remove policy failure" )
641+ dp .setRemovePolicyFailure (true )
642+ return err // nolint:wrapcheck // unnecessary to wrap error since the provided context is included in the error
643+ }
644+
645+ return nil
631646}
632647
633648// UpdatePolicy takes in updated policy object, calculates the delta and applies changes
@@ -749,3 +764,23 @@ func (dp *DataPlane) deleteIPSetsAndReferences(sets []*ipsets.TranslatedIPSet, n
749764 }
750765 return nil
751766}
767+
768+ func (dp * DataPlane ) setRemovePolicyFailure (failed bool ) {
769+ if util .IsWindowsDP () {
770+ return
771+ }
772+
773+ dp .removePolicyInfo .Lock ()
774+ defer dp .removePolicyInfo .Unlock ()
775+ dp .removePolicyInfo .previousRemovePolicyIPSetsFailed = failed
776+ }
777+
778+ func (dp * DataPlane ) hadRemovePolicyFailure () bool {
779+ if util .IsWindowsDP () {
780+ return false
781+ }
782+
783+ dp .removePolicyInfo .Lock ()
784+ defer dp .removePolicyInfo .Unlock ()
785+ return dp .removePolicyInfo .previousRemovePolicyIPSetsFailed
786+ }
0 commit comments