Skip to content

Commit 1ea2f5a

Browse files
feat: [NPM] num ACL rules for v2 & update existing metrics (#1223)
* wip * fix windows build err * address comments * fix lingering merge conflicts
1 parent c820189 commit 1ea2f5a

15 files changed

+318
-120
lines changed

npm/ipsm/ipsm.go

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -211,22 +211,21 @@ func (ipsMgr *IpsetManager) run(entry *ipsEntry) (int, error) {
211211
}
212212

213213
func (ipsMgr *IpsetManager) createList(listName string) error {
214-
prometheusTimer := metrics.StartNewTimer()
215214

216215
if _, exists := ipsMgr.listMap[listName]; exists {
217216
return nil
218217
}
219218

220-
defer metrics.RecordIPSetExecTime(prometheusTimer) // record execution time regardless of failure
221-
222219
entry := &ipsEntry{
223220
name: listName,
224221
operationFlag: util.IpsetCreationFlag,
225222
set: util.GetHashedName(listName),
226223
spec: []string{util.IpsetSetListFlag},
227224
}
228225
log.Logf("Creating List: %+v", entry)
226+
timer := metrics.StartNewTimer()
229227
errCode, err := ipsMgr.run(entry)
228+
metrics.RecordIPSetExecTime(timer) // record execution time regardless of failure
230229
if err != nil && errCode != 1 {
231230
metrics.SendErrorLogAndMetric(util.IpsmID, "Error: failed to create ipset list %s.", listName)
232231
return err
@@ -619,9 +618,10 @@ func (ipsMgr *IpsetManager) DestroyNpmIpsets() error {
619618
_, err := ipsMgr.run(flushEntry)
620619
if err != nil {
621620
metrics.SendErrorLogAndMetric(util.IpsmID, "{DestroyNpmIpsets} Error: failed to flush ipset %s", ipsetName)
621+
} else {
622+
metrics.RemoveAllEntriesFromIPSet(ipsetName)
622623
}
623624
}
624-
625625
for _, ipsetName := range ipsetLists {
626626
deleteEntry := &ipsEntry{
627627
operationFlag: util.IpsetDestroyFlag,
@@ -631,8 +631,6 @@ func (ipsMgr *IpsetManager) DestroyNpmIpsets() error {
631631
if err != nil {
632632
destroyFailureCount++
633633
metrics.SendErrorLogAndMetric(util.IpsmID, "{DestroyNpmIpsets} Error: failed to destroy ipset %s", ipsetName)
634-
} else {
635-
metrics.RemoveAllEntriesFromIPSet(ipsetName)
636634
}
637635
}
638636

@@ -644,9 +642,10 @@ func (ipsMgr *IpsetManager) DestroyNpmIpsets() error {
644642
} else {
645643
metrics.ResetNumIPSets()
646644
}
647-
// NOTE: in v2, we reset ipset entries, but in v1 we only remove entries for ipsets we delete.
648-
// So v2 may underestimate the number of entries if there are destroy failures,
649-
// but v1 may miss removing entries if some sets are in the prometheus metric but not in the kernel.
645+
// NOTE: in v2, we reset metrics blindly, regardless of errors
646+
// So v2 would underestimate the number of ipsets/entries if there are destroy failures.
647+
// In v1 we remove entries for ipsets we flush.
648+
// We may miss removing entries if some sets are in the prometheus metric but not in the kernel.
650649

651650
return nil
652651
}

npm/iptm/iptm.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -174,9 +174,6 @@ func (iptMgr *IptablesManager) UninitNpmChains() error {
174174

175175
// Add adds a rule in iptables.
176176
func (iptMgr *IptablesManager) Add(entry *IptEntry) error {
177-
prometheusTimer := metrics.StartNewTimer()
178-
defer metrics.RecordACLRuleExecTime(prometheusTimer) // record execution time regardless of failure
179-
180177
log.Logf("Adding iptables entry: %+v.", entry)
181178

182179
// Since there is a RETURN statement added to each DROP chain, we need to make sure
@@ -186,7 +183,10 @@ func (iptMgr *IptablesManager) Add(entry *IptEntry) error {
186183
} else {
187184
iptMgr.OperationFlag = util.IptablesInsertionFlag
188185
}
189-
if _, err := iptMgr.run(entry); err != nil {
186+
timer := metrics.StartNewTimer()
187+
_, err := iptMgr.run(entry)
188+
metrics.RecordACLRuleExecTime(timer) // record execution time regardless of failure
189+
if err != nil {
190190
metrics.SendErrorLogAndMetric(util.IptmID, "Error: failed to create iptables rules.")
191191
return err
192192
}

npm/metrics/acl_rules.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,21 @@ func IncNumACLRules() {
55
numACLRules.Inc()
66
}
77

8+
// IncNumACLRulesBy increments the number of ACL rules by the amount.
9+
func IncNumACLRulesBy(amount int) {
10+
numACLRules.Add(float64(amount))
11+
}
12+
813
// DecNumACLRules decrements the number of ACL rules.
914
func DecNumACLRules() {
1015
numACLRules.Dec()
1116
}
1217

18+
// DecNumACLRulesBy decrements the number of ACL rules by the amount.
19+
func DecNumACLRulesBy(amount int) {
20+
numACLRules.Add(float64(-amount))
21+
}
22+
1323
// ResetNumACLRules sets the number of ACL rules to 0.
1424
func ResetNumACLRules() {
1525
numACLRules.Set(0)

npm/metrics/acl_rules_test.go

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,9 @@ package metrics
33
import "testing"
44

55
var (
6-
numRulesMetric = &basicMetric{ResetNumACLRules, IncNumACLRules, DecNumACLRules, GetNumACLRules}
7-
ruleExecMetric = &recordingMetric{RecordACLRuleExecTime, GetACLRuleExecCount}
6+
numRulesMetric = &basicMetric{ResetNumACLRules, IncNumACLRules, DecNumACLRules, GetNumACLRules}
7+
numRulesAmountMetric = &amountMetric{basicMetric: numRulesMetric, incBy: IncNumACLRulesBy, decBy: DecNumACLRulesBy}
8+
ruleExecMetric = &recordingMetric{RecordACLRuleExecTime, GetACLRuleExecCount}
89
)
910

1011
func TestRecordACLRuleExecTime(t *testing.T) {
@@ -22,3 +23,11 @@ func TestDecNumACLRules(t *testing.T) {
2223
func TestResetNumACLRules(t *testing.T) {
2324
testResetMetric(t, numRulesMetric)
2425
}
26+
27+
func TestIncNumACLRulesBy(t *testing.T) {
28+
numRulesAmountMetric.testIncByMetric(t)
29+
}
30+
31+
func TestDecNumACLRulesBy(t *testing.T) {
32+
numRulesAmountMetric.testDecByMetric(t)
33+
}

npm/metrics/prometheus_test.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,25 @@ type basicMetric struct {
2323
get func() (int, error)
2424
}
2525

26+
type amountMetric struct {
27+
*basicMetric
28+
incBy func(int)
29+
decBy func(int)
30+
}
31+
32+
func (metric *amountMetric) testIncByMetric(t *testing.T) {
33+
metric.reset()
34+
metric.incBy(2)
35+
assertMetricVal(t, metric.basicMetric, 2)
36+
}
37+
38+
func (metric *amountMetric) testDecByMetric(t *testing.T) {
39+
metric.reset()
40+
metric.incBy(5)
41+
metric.decBy(2)
42+
assertMetricVal(t, metric.basicMetric, 3)
43+
}
44+
2645
type recordingMetric struct {
2746
record func(timer *Timer)
2847
getCount func() (int, error)

npm/pkg/dataplane/ipsets/ipsetmanager.go

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,13 +51,15 @@ func NewIPSetManager(iMgrCfg *IPSetManagerCfg, ioShim *common.IOShim) *IPSetMana
5151
func (iMgr *IPSetManager) ResetIPSets() error {
5252
iMgr.Lock()
5353
defer iMgr.Unlock()
54+
metrics.ResetNumIPSets()
55+
metrics.ResetIPSetEntries()
5456
err := iMgr.resetIPSets()
57+
iMgr.setMap = make(map[string]*IPSet)
58+
iMgr.clearDirtyCache()
5559
if err != nil {
5660
metrics.SendErrorLogAndMetric(util.IpsmID, "error: failed to reset ipsetmanager: %s", err.Error())
5761
return fmt.Errorf("error while resetting ipsetmanager: %w", err)
5862
}
59-
// TODO update prometheus metrics here instead of in OS-specific functions (done in Linux right now)
60-
// metrics.ResetNumIPSets() and metrics.ResetIPSetEntries()
6163
return nil
6264
}
6365

@@ -384,22 +386,21 @@ func (iMgr *IPSetManager) RemoveFromList(listMetadata *IPSetMetadata, setMetadat
384386
}
385387

386388
func (iMgr *IPSetManager) ApplyIPSets() error {
387-
prometheusTimer := metrics.StartNewTimer()
388-
389389
iMgr.Lock()
390390
defer iMgr.Unlock()
391391

392392
if len(iMgr.toAddOrUpdateCache) == 0 && len(iMgr.toDeleteCache) == 0 {
393393
klog.Info("[IPSetManager] No IPSets to apply")
394394
return nil
395395
}
396-
defer metrics.RecordIPSetExecTime(prometheusTimer) // record execution time regardless of failure
397396

398397
klog.Infof("[IPSetManager] toAddUpdateCache %+v \n ", iMgr.toAddOrUpdateCache)
399398
klog.Infof("[IPSetManager] toDeleteCache %+v \n ", iMgr.toDeleteCache)
400399
iMgr.sanitizeDirtyCache()
401400

402401
// Call the appropriate apply ipsets
402+
prometheusTimer := metrics.StartNewTimer()
403+
defer metrics.RecordIPSetExecTime(prometheusTimer) // record execution time regardless of failure
403404
err := iMgr.applyIPSets()
404405
if err != nil {
405406
metrics.SendErrorLogAndMetric(util.IpsmID, "error: failed to apply ipsets: %s", err.Error())

npm/pkg/dataplane/ipsets/ipsetmanager_linux.go

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ import (
44
"fmt"
55
"strings"
66

7-
"github.com/Azure/azure-container-networking/npm/metrics"
87
"github.com/Azure/azure-container-networking/npm/pkg/dataplane/parse"
98
"github.com/Azure/azure-container-networking/npm/util"
109
npmerrors "github.com/Azure/azure-container-networking/npm/util/errors"
@@ -90,26 +89,20 @@ func (iMgr *IPSetManager) resetIPSets() error {
9089
grepCommand := iMgr.ioShim.Exec.Command(ioutil.Grep, azureNPMPrefix)
9190
azureIPSets, haveAzureIPSets, commandError := ioutil.PipeCommandToGrep(listCommand, grepCommand)
9291
if commandError != nil {
93-
return npmerrors.SimpleErrorWrapper("failed to run ipset list for resetting IPSets", commandError)
92+
return npmerrors.SimpleErrorWrapper("failed to run ipset list for resetting IPSets (prometheus metrics may be off now)", commandError)
9493
}
9594
if !haveAzureIPSets {
96-
metrics.ResetNumIPSets()
97-
metrics.ResetIPSetEntries()
9895
return nil
9996
}
10097
creator, originalNumAzureSets, destroyFailureCount := iMgr.fileCreatorForReset(azureIPSets)
10198
restoreError := creator.RunCommandWithFile(ipsetCommand, ipsetRestoreFlag)
10299
if restoreError != nil {
103-
metrics.SetNumIPSets(originalNumAzureSets)
104-
// NOTE: the num entries for sets may be incorrect if the restore fails
100+
klog.Errorf(
101+
"failed to restore ipsets (prometheus metrics may be off now). Had originalNumAzureSets %d and destroyFailureCount %d with err: %v",
102+
originalNumAzureSets, destroyFailureCount, restoreError,
103+
)
105104
return npmerrors.SimpleErrorWrapper("failed to run ipset restore for resetting IPSets", restoreError)
106105
}
107-
if metrics.NumIPSetsIsPositive() {
108-
metrics.SetNumIPSets(*destroyFailureCount)
109-
} else {
110-
metrics.ResetNumIPSets()
111-
}
112-
metrics.ResetIPSetEntries() // NOTE: the num entries for sets that fail to flush may be incorrect after this
113106
return nil
114107
}
115108

0 commit comments

Comments
 (0)