Skip to content

Commit 3e6ca73

Browse files
authored
[NPM][Fix] Adding redundant check to ignore endpoint not found while … (#1560)
* [NPM][Fix] Adding redundant check to ignore endpoint not found while removing policies * adding error in logline * adding error in logline * making sure applyDP error is handled correctly * revert changes in controllers, covered in a different PR * adding UTs
1 parent 8888338 commit 3e6ca73

File tree

3 files changed

+33
-5
lines changed

3 files changed

+33
-5
lines changed

network/hnswrapper/hnsv2wrapperfake.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ func (f Hnsv2wrapperFake) GetEndpointByID(endpointID string) (*hcn.HostComputeEn
203203
if ep, ok := f.Cache.endpoints[endpointID]; ok {
204204
return ep.GetHCNObj(), nil
205205
}
206-
return &hcn.HostComputeEndpoint{}, nil
206+
return &hcn.HostComputeEndpoint{}, hcn.EndpointNotFoundError{EndpointID: endpointID}
207207
}
208208

209209
func (f Hnsv2wrapperFake) CreateEndpoint(endpoint *hcn.HostComputeEndpoint) (*hcn.HostComputeEndpoint, error) {

npm/pkg/dataplane/policies/policymanager_windows.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -174,8 +174,9 @@ func (pMgr *PolicyManager) removePolicy(policy *NPMNetworkPolicy, endpointList m
174174
func (pMgr *PolicyManager) removePolicyByEndpointID(ruleID, epID string, noOfRulesToRemove int, resetAllACL shouldResetAllACLs) error {
175175
epObj, err := pMgr.ioShim.Hns.GetEndpointByID(epID)
176176
if err != nil {
177-
if isNotFoundErr(err) {
178-
klog.Infof("[PolicyManagerWindows] ignoring remove policy on endpoint since the endpoint wasn't found. the corresponding pod was most likely deleted. policy: %s, endpoint: %s", ruleID, epID)
177+
// IsNotFound check is being skipped at times. So adding a redundant check here.
178+
if isNotFoundErr(err) || strings.Contains(err.Error(), "endpoint was not found") {
179+
klog.Infof("[PolicyManagerWindows] ignoring remove policy since the endpoint wasn't found. the corresponding pod might be deleted. policy: %s, endpoint: %s, err: %s", ruleID, epID, err.Error())
179180
return nil
180181
}
181182
return fmt.Errorf("[PolicyManagerWindows] failed to remove policy while getting the endpoint. policy: %s, endpoint: %s, err: %w", ruleID, epID, err)
@@ -221,9 +222,10 @@ func (pMgr *PolicyManager) removePolicyByEndpointID(ruleID, epID string, noOfRul
221222
func (pMgr *PolicyManager) applyPoliciesToEndpointID(epID string, policies hcn.PolicyEndpointRequest) error {
222223
epObj, err := pMgr.ioShim.Hns.GetEndpointByID(epID)
223224
if err != nil {
224-
if isNotFoundErr(err) {
225+
// IsNotFound check is being skipped at times. So adding a redundant check here.
226+
if isNotFoundErr(err) || strings.Contains(err.Error(), "endpoint was not found") {
225227
// unlikely scenario where an endpoint is deleted right after we refresh HNS endpoints, or an unlikely scenario where an endpoint is deleted right after we refresh HNS endpoints
226-
metrics.SendErrorLogAndMetric(util.IptmID, "[PolicyManagerWindows] ignoring apply policies to endpoint since the endpoint wasn't found. endpoint: %s", epID)
228+
metrics.SendErrorLogAndMetric(util.IptmID, "[PolicyManagerWindows] ignoring apply policies to endpoint since the endpoint wasn't found. endpoint: %s, err: %s", epID, err.Error())
227229
return nil
228230
}
229231
return fmt.Errorf("[PolicyManagerWindows] to apply policies while getting the endpoint. endpoint: %s, err: %w", epID, err)

npm/pkg/dataplane/policies/policymanager_windows_test.go

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,32 @@ func TestRemovePolicies(t *testing.T) {
129129
verifyACLCacheIsCleaned(t, hns, len(endPointIDList))
130130
}
131131

132+
func TestApplyPoliciesEndpointNotFound(t *testing.T) {
133+
pMgr, _ := getPMgr(t)
134+
testendPointIDList := map[string]string{
135+
"10.0.0.5": "test10",
136+
}
137+
err := pMgr.AddPolicy(TestNetworkPolicies[0], testendPointIDList)
138+
require.NoError(t, err)
139+
}
140+
141+
func TestRemovePoliciesEndpointNotFound(t *testing.T) {
142+
pMgr, hns := getPMgr(t)
143+
err := pMgr.AddPolicy(TestNetworkPolicies[0], endPointIDList)
144+
require.NoError(t, err)
145+
146+
aclID := TestNetworkPolicies[0].ACLPolicyID
147+
148+
_, err = hns.Cache.ACLPolicies(endPointIDList, aclID)
149+
require.NoError(t, err)
150+
testendPointIDList := map[string]string{
151+
"10.0.0.5": "test10",
152+
}
153+
err = pMgr.RemovePolicy(TestNetworkPolicies[0].PolicyKey, testendPointIDList)
154+
require.NoError(t, err, err)
155+
verifyACLCacheIsCleaned(t, hns, len(endPointIDList))
156+
}
157+
132158
// Helper functions for UTS
133159

134160
func getPMgr(t *testing.T) (*PolicyManager, *hnswrapper.Hnsv2wrapperFake) {

0 commit comments

Comments
 (0)