Skip to content

Commit 9bae90a

Browse files
fix: [NPM] clean up logs (#1332)
* wip * fix lint * change wording in logs * address comment
1 parent 5b9cbd4 commit 9bae90a

File tree

4 files changed

+51
-20
lines changed

4 files changed

+51
-20
lines changed

npm/pkg/controlplane/controllers/v2/podController.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -421,7 +421,7 @@ func (c *PodController) syncAddedPod(podObj *corev1.Pod) error {
421421
targetSetKeyValue := ipsets.NewIPSetMetadata(labelKeyValue, ipsets.KeyValueLabelOfPod)
422422
allSets := []*ipsets.IPSetMetadata{targetSetKey, targetSetKeyValue}
423423

424-
klog.Infof("Creating ipsets %v if it does not already exist", allSets)
424+
klog.Infof("Creating ipsets %+v and %+v if they do not exist", targetSetKey, targetSetKeyValue)
425425
klog.Infof("Adding pod %s (ip : %s) to ipset %s and %s", podKey, npmPodObj.PodIP, labelKey, labelKeyValue)
426426
if err = c.dp.AddToSets(allSets, podMetadata); err != nil {
427427
return fmt.Errorf("[syncAddedPod] Error: failed to add pod to label ipset with err: %w", err)

npm/pkg/dataplane/policies/chain-management_linux.go

Lines changed: 39 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ const (
2121
// TODO replace all util constants with local constants
2222
defaultlockWaitTimeInSeconds string = "60"
2323

24-
doesNotExistErrorCode int = 1 // Bad rule (does a matching rule exist in that chain?)
24+
doesNotExistErrorCode int = 1 // stderr possibility: Bad rule (does a matching rule exist in that chain?)
2525
couldntLoadTargetErrorCode int = 2 // Couldn't load target `AZURE-NPM-EGRESS':No such file or directory
2626

2727
// transferred from iptm.go and not sure why this length is important
@@ -50,6 +50,21 @@ var (
5050
}
5151
jumpFromForwardToAzureChainArgs = append([]string{util.IptablesForwardChain}, jumpToAzureChainArgs...)
5252

53+
removeDeprecatedJumpIgnoredErrors = []*exitErrorInfo{
54+
{
55+
// doesNotExistErrorCode happens when AZURE-NPM chain exists, but this jump rule doesn't exist
56+
exitCode: doesNotExistErrorCode,
57+
stdErr: "No chain/target/match by that name",
58+
messageToLog: "didn't delete deprecated jump rule from FORWARD chain to AZURE-NPM chain likely because NPM v1 was not used prior",
59+
},
60+
{
61+
// couldntLoadTargetErrorCode happens when AZURE-NPM chain doesn't exist (and hence the jump rule doesn't exist too)
62+
exitCode: couldntLoadTargetErrorCode,
63+
stdErr: "Couldn't load target `AZURE-NPM':No such file or directory",
64+
messageToLog: "didn't delete deprecated jump rule from FORWARD chain to AZURE-NPM chain likely because AZURE-NPM chain doesn't exist",
65+
},
66+
}
67+
5368
listForwardEntriesArgs = []string{
5469
util.IptablesWaitFlag, defaultlockWaitTimeInSeconds, util.IptablesTableFlag, util.IptablesFilterTable,
5570
util.IptablesNumericFlag, util.IptablesListFlag, util.IptablesForwardChain, util.IptablesLineNumbersFlag,
@@ -64,6 +79,12 @@ var (
6479
}
6580
)
6681

82+
type exitErrorInfo struct {
83+
exitCode int
84+
stdErr string
85+
messageToLog string
86+
}
87+
6788
type staleChains struct {
6889
chainsToCleanup map[string]struct{}
6990
}
@@ -156,20 +177,11 @@ func (pMgr *PolicyManager) bootup(_ []string) error {
156177
defer pMgr.reconcileManager.forceUnlock()
157178

158179
// 1. delete the deprecated jump to AZURE-NPM
159-
deprecatedErrCode, deprecatedErr := pMgr.runIPTablesCommand(util.IptablesDeletionFlag, deprecatedJumpFromForwardToAzureChainArgs...)
160-
if deprecatedErr == nil {
180+
deprecatedErrCode, deprecatedErr := pMgr.ignoreErrorsAndRunIPTablesCommand(removeDeprecatedJumpIgnoredErrors, util.IptablesDeletionFlag, deprecatedJumpFromForwardToAzureChainArgs...)
181+
if deprecatedErrCode == 0 {
161182
klog.Infof("deleted deprecated jump rule from FORWARD chain to AZURE-NPM chain")
162-
} else {
163-
switch deprecatedErrCode {
164-
case couldntLoadTargetErrorCode:
165-
// couldntLoadTargetErrorCode happens when AZURE-NPM chain doesn't exist (and hence the jump rule doesn't exist too)
166-
klog.Infof("didn't delete deprecated jump rule from FORWARD chain to AZURE-NPM chain likely because AZURE-NPM chain doesn't exist. Exit code %d and error: %s", deprecatedErrCode, deprecatedErr)
167-
case doesNotExistErrorCode:
168-
// doesNotExistErrorCode happens when AZURE-NPM chain exists, but this jump rule doesn't exist
169-
klog.Infof("didn't delete deprecated jump rule from FORWARD chain to AZURE-NPM chain likely because NPM v1 was not used prior. Exit code %d and error: %s", deprecatedErrCode, deprecatedErr)
170-
default:
171-
klog.Errorf("failed to delete deprecated jump rule from FORWARD chain to AZURE-NPM chain for unexpected reason with exit code %d and error: %s", deprecatedErrCode, deprecatedErr.Error())
172-
}
183+
} else if deprecatedErr != nil {
184+
klog.Errorf("failed to delete deprecated jump rule from FORWARD chain to AZURE-NPM chain for unexpected reason with exit code %d and error: %s", deprecatedErrCode, deprecatedErr.Error())
173185
}
174186

175187
currentChains, err := ioutil.AllCurrentAzureChains(pMgr.ioShim.Exec, defaultlockWaitTimeInSeconds)
@@ -254,6 +266,10 @@ deleteLoop:
254266

255267
// this function has a direct comparison in NPM v1 iptables manager (iptm.go)
256268
func (pMgr *PolicyManager) runIPTablesCommand(operationFlag string, args ...string) (int, error) {
269+
return pMgr.ignoreErrorsAndRunIPTablesCommand(nil, operationFlag, args...)
270+
}
271+
272+
func (pMgr *PolicyManager) ignoreErrorsAndRunIPTablesCommand(ignored []*exitErrorInfo, operationFlag string, args ...string) (int, error) {
257273
allArgs := []string{util.IptablesWaitFlag, defaultlockWaitTimeInSeconds, operationFlag}
258274
allArgs = append(allArgs, args...)
259275

@@ -266,11 +282,17 @@ func (pMgr *PolicyManager) runIPTablesCommand(operationFlag string, args ...stri
266282
if ok := errors.As(err, &exitError); ok {
267283
errCode := exitError.ExitStatus()
268284
allArgsString := strings.Join(allArgs, " ")
269-
msgStr := strings.TrimSuffix(string(output), "\n")
285+
outputString := strings.TrimSuffix(string(output), "\n")
286+
for _, info := range ignored {
287+
if errCode == info.exitCode && strings.Contains(outputString, info.stdErr) {
288+
klog.Infof("%s. not able to run iptables command [%s %s]. exit code: %d, output: %s", info.messageToLog, util.Iptables, allArgsString, errCode, outputString)
289+
return errCode, nil
290+
}
291+
}
270292
if errCode > 0 {
271-
metrics.SendErrorLogAndMetric(util.IptmID, "error: There was an error running command: [%s %s] Stderr: [%v, %s]", util.Iptables, allArgsString, exitError, msgStr)
293+
metrics.SendErrorLogAndMetric(util.IptmID, "error: There was an error running command: [%s %s] Stderr: [%v, %s]", util.Iptables, allArgsString, exitError, outputString)
272294
}
273-
return errCode, npmerrors.SimpleErrorWrapper(fmt.Sprintf("failed to run iptables command [%s %s] Stderr: [%s]", util.Iptables, allArgsString, msgStr), exitError)
295+
return errCode, npmerrors.SimpleErrorWrapper(fmt.Sprintf("failed to run iptables command [%s %s] Stderr: [%s]", util.Iptables, allArgsString, outputString), exitError)
274296
}
275297
return 0, nil
276298
}

npm/pkg/dataplane/policies/chain-management_linux_test.go

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,11 @@ func TestBootupLinux(t *testing.T) {
371371
{
372372
name: "success after restore failure (no NPM prior)",
373373
calls: []testutils.TestCmd{
374-
{Cmd: []string{"iptables", "-w", "60", "-D", "FORWARD", "-j", "AZURE-NPM"}, ExitCode: 2}, // AZURE-NPM chain didn't exist
374+
{
375+
Cmd: []string{"iptables", "-w", "60", "-D", "FORWARD", "-j", "AZURE-NPM"},
376+
ExitCode: 2,
377+
Stdout: "iptables v1.8.4 (legacy): Couldn't load target `AZURE-NPM':No such file or directory",
378+
}, // AZURE-NPM chain didn't exist
375379
{Cmd: listAllCommandStrings, PipedToCommand: true},
376380
{Cmd: []string{"grep", "Chain AZURE-NPM"}, ExitCode: 1},
377381
fakeIPTablesRestoreFailureCommand, // e.g. xtables lock held by another app. Currently the stdout doesn't matter for retrying
@@ -385,7 +389,11 @@ func TestBootupLinux(t *testing.T) {
385389
{
386390
name: "success: v2 existed prior",
387391
calls: []testutils.TestCmd{
388-
{Cmd: []string{"iptables", "-w", "60", "-D", "FORWARD", "-j", "AZURE-NPM"}, ExitCode: 1}, // deprecated rule did not exist
392+
{
393+
Cmd: []string{"iptables", "-w", "60", "-D", "FORWARD", "-j", "AZURE-NPM"},
394+
ExitCode: 1,
395+
Stdout: "No chain/target/match by that name",
396+
}, // deprecated rule did not exist
389397
{Cmd: listAllCommandStrings, PipedToCommand: true},
390398
{
391399
Cmd: []string{"grep", "Chain AZURE-NPM"},

npm/pkg/dataplane/policies/policymanager_linux.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,7 @@ func (pMgr *PolicyManager) deleteJumpRule(policy *NPMNetworkPolicy, direction Un
175175

176176
specs = append([]string{baseChainName}, specs...)
177177
errCode, err := pMgr.runIPTablesCommand(util.IptablesDeletionFlag, specs...)
178+
// if this actually happens (don't think it should), could use ignoreErrorsAndRunIPTablesCommand instead with: "Bad rule (does a matching rule exist in that chain?)"
178179
if err != nil && errCode != doesNotExistErrorCode {
179180
errorString := fmt.Sprintf("failed to delete jump from %s chain to %s chain for policy %s with exit code %d", baseChainName, chainName, policy.PolicyKey, errCode)
180181
log.Errorf("%s: %w", errorString, err)

0 commit comments

Comments
 (0)