Skip to content

Commit bd299fe

Browse files
fix: [NPM-WIN] ignore irrelevant errors from ipsetmanager (#1741)
* ignore certain errors * remove changes to updatePod tracking (on/off-node)
1 parent 3e5915f commit bd299fe

File tree

3 files changed

+47
-19
lines changed

3 files changed

+47
-19
lines changed

npm/pkg/dataplane/dataplane-test-cases_windows_test.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ const (
1515
podCrudTag Tag = "pod-crud"
1616
nsCrudTag Tag = "namespace-crud"
1717
netpolCrudTag Tag = "netpol-crud"
18+
reconcileTag Tag = "reconcile"
1819
)
1920

2021
const (
@@ -155,6 +156,7 @@ func getAllSerialTests() []*SerialTestCase {
155156
TestCaseMetadata: &TestCaseMetadata{
156157
Tags: []Tag{
157158
podCrudTag,
159+
reconcileTag,
158160
},
159161
DpCfg: defaultWindowsDPCfg,
160162
InitialEndpoints: nil,
@@ -482,6 +484,33 @@ func getAllSerialTests() []*SerialTestCase {
482484
},
483485
},
484486
},
487+
{
488+
Description: "issue 1613: remove last instance of label, then reconcile IPSets, then apply DP",
489+
Actions: []*Action{
490+
CreateEndpoint(endpoint1, ip1),
491+
CreatePod("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}),
492+
ApplyDP(),
493+
UpdatePodLabels("x", "a", ip1, thisNode, map[string]string{"k1": "v1"}, nil),
494+
ReconcileDP(),
495+
ApplyDP(),
496+
},
497+
TestCaseMetadata: &TestCaseMetadata{
498+
Tags: []Tag{
499+
podCrudTag,
500+
reconcileTag,
501+
},
502+
DpCfg: defaultWindowsDPCfg,
503+
InitialEndpoints: nil,
504+
ExpectedSetPolicies: []*hcn.SetPolicySetting{
505+
dptestutils.SetPolicy(emptySet),
506+
dptestutils.SetPolicy(allNamespaces, emptySet.GetHashedName(), nsXSet.GetHashedName()),
507+
dptestutils.SetPolicy(nsXSet, ip1),
508+
},
509+
ExpectedEnpdointACLs: map[string][]*hnswrapper.FakeEndpointPolicy{
510+
endpoint1: {},
511+
},
512+
},
513+
},
485514
}
486515
}
487516

npm/pkg/dataplane/dataplane.go

Lines changed: 12 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -219,31 +219,25 @@ func (dp *DataPlane) ApplyDataPlane() error {
219219
err := dp.refreshPodEndpoints()
220220
if err != nil {
221221
metrics.SendErrorLogAndMetric(util.DaemonDataplaneID, "[DataPlane] failed to refresh endpoints while updating pods. err: [%s]", err.Error())
222-
return fmt.Errorf("[DataPlane] failed to refresh endpoints while updating pods. err: [%w]", err)
222+
// return as success since this can be retried irrespective of other operations
223+
return nil
223224
}
224225

225226
// lock updatePodCache while driving goal state to kernel
226227
// prevents another ApplyDataplane call from updating the same pods
227228
dp.updatePodCache.Lock()
228229
defer dp.updatePodCache.Unlock()
229230

230-
var aggregateErr error
231231
for podKey, pod := range dp.updatePodCache.cache {
232232
err := dp.updatePod(pod)
233233
if err != nil {
234-
if aggregateErr == nil {
235-
aggregateErr = fmt.Errorf("failed to update pod while applying the dataplane. key: [%s], err: [%w]", podKey, err)
236-
} else {
237-
aggregateErr = fmt.Errorf("failed to update pod while applying the dataplane. key: [%s], err: [%s]. previous err: [%w]", podKey, err.Error(), aggregateErr)
238-
}
234+
// move on to the next and later return as success since this can be retried irrespective of other operations
239235
metrics.SendErrorLogAndMetric(util.DaemonDataplaneID, "failed to update pod while applying the dataplane. key: [%s], err: [%s]", podKey, err.Error())
240236
continue
241237
}
238+
242239
delete(dp.updatePodCache.cache, podKey)
243240
}
244-
if aggregateErr != nil {
245-
return fmt.Errorf("[DataPlane] error while updating pods: %w", aggregateErr)
246-
}
247241
}
248242
return nil
249243
}
@@ -395,17 +389,18 @@ func (dp *DataPlane) createIPSetsAndReferences(sets []*ipsets.TranslatedIPSet, n
395389
}
396390

397391
func (dp *DataPlane) deleteIPSetsAndReferences(sets []*ipsets.TranslatedIPSet, netpolName string, referenceType ipsets.ReferenceType) error {
392+
for _, set := range sets {
393+
prefixName := set.Metadata.GetPrefixName()
394+
if err := dp.ipsetMgr.DeleteReference(prefixName, netpolName, referenceType); err != nil {
395+
// with current implementation of DeleteReference(), err will be ipsets.ErrSetDoesNotExist
396+
klog.Infof("[DataPlane] ignoring delete reference on non-existent set. ipset: %s. netpol: %s. referenceType: %s", prefixName, netpolName, referenceType)
397+
}
398+
}
399+
398400
npmErrorString := npmerrors.DeleteSelectorReference
399401
if referenceType == ipsets.NetPolType {
400402
npmErrorString = npmerrors.DeleteNetPolReference
401403
}
402-
for _, set := range sets {
403-
// TODO ignore set does not exist error
404-
err := dp.ipsetMgr.DeleteReference(set.Metadata.GetPrefixName(), netpolName, referenceType)
405-
if err != nil {
406-
return npmerrors.Errorf(npmErrorString, false, fmt.Sprintf("[DataPlane] failed to deleteIPSetReferences with err: %s", err.Error()))
407-
}
408-
}
409404

410405
// Check if any list sets are provided with members to delete
411406
// NOTE: every translated member will be deleted, even if the member is part of the same set in another policy

npm/pkg/dataplane/dataplane_windows.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,9 @@ func (dp *DataPlane) updatePod(pod *updateNPMPod) error {
167167
*/
168168
selectorReference, err := dp.ipsetMgr.GetSelectorReferencesBySet(setName)
169169
if err != nil {
170-
return err
170+
// ignore this set since it may have been deleted in the background reconcile thread
171+
klog.Infof("[DataPlane] ignoring pod update for ipset to remove since the set does not exist. pod: %+v. set: %s", pod, setName)
172+
continue
171173
}
172174

173175
for policyKey := range selectorReference {
@@ -204,7 +206,9 @@ func (dp *DataPlane) updatePod(pod *updateNPMPod) error {
204206
*/
205207
selectorReference, err := dp.ipsetMgr.GetSelectorReferencesBySet(setName)
206208
if err != nil {
207-
return err
209+
// ignore this set since it may have been deleted in the background reconcile thread
210+
klog.Infof("[DataPlane] ignoring pod update for ipset to remove since the set does not exist. pod: %+v. set: %s", pod, setName)
211+
continue
208212
}
209213

210214
for policyKey := range selectorReference {

0 commit comments

Comments
 (0)