Skip to content

Commit 87311c5

Browse files
rejain456sivakami
authored andcommitted
[NPM Lite] Querying L1VH + Non-L1VH Endpoints (#3086)
* Added logic to make 2 hns calls for 2 different endpoint states * added querying to l1vh hns only if npm lite is enabled * added logging line for debugging * updated config * removed logging lines * fixing go lint err * refactored based on pr comments * replaced with errors.Wrap and fixed a logging statement * added if condition with logic * changed errl1vh to err * added omments * added logging lines for debugging * added npm lite enabled log debugging * spacing * syntax * added logs for debugging * optimizing api load * added function to remove common endpoints * added logging for debugging * removed npm lite check * removed all the debugging comments * added extra unit test cases * added additional unit tests * removed protobuf code * fixed comment * fixed a spelling error * resolved pr comments * updated a comment * revised comment * resolved further pr comments * changed back to for loop from range
1 parent 0228a2b commit 87311c5

File tree

5 files changed

+133
-16
lines changed

5 files changed

+133
-16
lines changed

npm/cmd/start.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,8 @@ func start(config npmconfig.Config, flags npmconfig.Flags) error {
137137
stopChannel := wait.NeverStop
138138
if config.Toggles.EnableV2NPM {
139139
// update the dataplane config
140+
npmV2DataplaneCfg.EnableNPMLite = config.Toggles.EnableNPMLite
141+
140142
npmV2DataplaneCfg.MaxBatchedACLsPerPod = config.MaxBatchedACLsPerPod
141143

142144
npmV2DataplaneCfg.NetPolInBackground = config.Toggles.NetPolInBackground

npm/npm.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ func (npMgr *NetworkPolicyManager) Start(config npmconfig.Config, stopCh <-chan
192192
// Starts all informers manufactured by npMgr's informerFactory.
193193
npMgr.InformerFactory.Start(stopCh)
194194

195-
// npn lite
195+
// npm lite
196196
if npMgr.NpmLiteToggle {
197197
npMgr.PodInformerFactory.Start(stopCh)
198198
}

npm/pkg/dataplane/dataplane.go

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ type Config struct {
4545
NetPolInBackground bool
4646
MaxPendingNetPols int
4747
NetPolInterval time.Duration
48+
EnableNPMLite bool
4849
*ipsets.IPSetManagerCfg
4950
*policies.PolicyManagerCfg
5051
}
@@ -64,12 +65,13 @@ type DataPlane struct {
6465
nodeName string
6566
// endpointCache stores all endpoints of the network (including off-node)
6667
// Key is PodIP
67-
endpointCache *endpointCache
68-
ioShim *common.IOShim
69-
updatePodCache *updatePodCache
70-
endpointQuery *endpointQuery
71-
applyInfo *applyInfo
72-
netPolQueue *netPolQueue
68+
endpointCache *endpointCache
69+
ioShim *common.IOShim
70+
updatePodCache *updatePodCache
71+
endpointQuery *endpointQuery
72+
endpointQueryAttachedState *endpointQuery // windows -> filter for state 2 (attached) endpoints in l1vh
73+
applyInfo *applyInfo
74+
netPolQueue *netPolQueue
7375
// removePolicyInfo tracks when a policy was removed yet had ApplyIPSet failures.
7476
// This field is only relevant for Linux.
7577
removePolicyInfo removePolicyInfo
@@ -88,11 +90,12 @@ func NewDataPlane(nodeName string, ioShim *common.IOShim, cfg *Config, stopChann
8890
policyMgr: policies.NewPolicyManager(ioShim, cfg.PolicyManagerCfg),
8991
ipsetMgr: ipsets.NewIPSetManager(cfg.IPSetManagerCfg, ioShim),
9092
// networkID is set when initializing Windows dataplane
91-
networkID: "",
92-
endpointCache: newEndpointCache(),
93-
nodeName: nodeName,
94-
ioShim: ioShim,
95-
endpointQuery: new(endpointQuery),
93+
networkID: "",
94+
endpointCache: newEndpointCache(),
95+
nodeName: nodeName,
96+
ioShim: ioShim,
97+
endpointQuery: new(endpointQuery),
98+
endpointQueryAttachedState: new(endpointQuery),
9699
applyInfo: &applyInfo{
97100
inBootupPhase: true,
98101
},
@@ -128,7 +131,6 @@ func NewDataPlane(nodeName string, ioShim *common.IOShim, cfg *Config, stopChann
128131
} else {
129132
metrics.SendLog(util.DaemonDataplaneID, "[DataPlane] dataplane configured to NOT add netpols in background", true)
130133
}
131-
132134
return dp, nil
133135
}
134136

npm/pkg/dataplane/dataplane_windows.go

Lines changed: 52 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ package dataplane
22

33
import (
44
"encoding/json"
5-
"errors"
65
"fmt"
76
"strings"
87
"time"
@@ -12,6 +11,7 @@ import (
1211
"github.com/Azure/azure-container-networking/npm/util"
1312
npmerrors "github.com/Azure/azure-container-networking/npm/util/errors"
1413
"github.com/Microsoft/hcsshim/hcn"
14+
"github.com/pkg/errors"
1515
"k8s.io/klog"
1616
)
1717

@@ -50,14 +50,31 @@ func (dp *DataPlane) initializeDataPlane() error {
5050
},
5151
Flags: hcn.HostComputeQueryFlagsNone,
5252
}
53+
// Initialize Endpoint query used to filter healthy endpoints (vNIC) of Windows pods on L1VH Node
54+
dp.endpointQueryAttachedState.query = hcn.HostComputeQuery{
55+
SchemaVersion: hcn.SchemaVersion{
56+
Major: hcnSchemaMajorVersion,
57+
Minor: hcnSchemaMinorVersion,
58+
},
59+
Flags: hcn.HostComputeQueryFlagsNone,
60+
}
61+
5362
// Filter out any endpoints that are not in "AttachedShared" State. All running Windows pods with networking must be in this state.
5463
filterMap := map[string]uint16{"State": hcnEndpointStateAttachedSharing}
5564
filter, err := json.Marshal(filterMap)
5665
if err != nil {
57-
return npmerrors.SimpleErrorWrapper("failed to marshal endpoint filter map", err)
66+
return errors.Wrap(err, "failed to marshal endpoint filter map for attachedsharing state")
5867
}
5968
dp.endpointQuery.query.Filter = string(filter)
6069

70+
// Filter out any endpoints that are not in "Attached" State. All running Windows pods on L1VH with networking must be in this state.
71+
filterMapAttached := map[string]uint16{"State": hcnEndpointStateAttached}
72+
filterAttached, err := json.Marshal(filterMapAttached)
73+
if err != nil {
74+
return errors.Wrap(err, "failed to marshal endpoint filter map for attched state")
75+
}
76+
dp.endpointQueryAttachedState.query.Filter = string(filterAttached)
77+
6178
// reset endpoint cache so that netpol references are removed for all endpoints while refreshing pod endpoints
6279
// no need to lock endpointCache at boot up
6380
dp.endpointCache.cache = make(map[string]*npmEndpoint)
@@ -329,21 +346,53 @@ func (dp *DataPlane) getEndpointsToApplyPolicies(netPols []*policies.NPMNetworkP
329346

330347
func (dp *DataPlane) getLocalPodEndpoints() ([]*hcn.HostComputeEndpoint, error) {
331348
klog.Info("getting local endpoints")
349+
350+
// Gets endpoints in state: Attached
332351
timer := metrics.StartNewTimer()
352+
endpointsAttached, err := dp.ioShim.Hns.ListEndpointsQuery(dp.endpointQueryAttachedState.query)
353+
metrics.RecordListEndpointsLatency(timer)
354+
if err != nil {
355+
metrics.IncListEndpointsFailures()
356+
return nil, errors.Wrap(err, "failed to get local pod endpoints in state:attached")
357+
}
358+
359+
// Gets endpoints in state: AttachedSharing
360+
timer = metrics.StartNewTimer()
333361
endpoints, err := dp.ioShim.Hns.ListEndpointsQuery(dp.endpointQuery.query)
334362
metrics.RecordListEndpointsLatency(timer)
335363
if err != nil {
336364
metrics.IncListEndpointsFailures()
337-
return nil, npmerrors.SimpleErrorWrapper("failed to get local pod endpoints", err)
365+
return nil, errors.Wrap(err, "failed to get local pod endpoints in state: attachedSharing")
338366
}
339367

368+
// Get endpoints unique to endpoints and endpointsAttached
369+
endpoints = GetUniqueEndpoints(endpoints, endpointsAttached)
370+
340371
epPointers := make([]*hcn.HostComputeEndpoint, 0, len(endpoints))
341372
for k := range endpoints {
342373
epPointers = append(epPointers, &endpoints[k])
343374
}
344375
return epPointers, nil
345376
}
346377

378+
func GetUniqueEndpoints(endpoints, endpointsAttached []hcn.HostComputeEndpoint) []hcn.HostComputeEndpoint {
379+
// Store IDs of endpoints list in a map for quick lookup
380+
idMap := make(map[string]struct{}, len(endpoints))
381+
for i := 0; i < len(endpoints); i++ {
382+
ep := endpoints[i]
383+
idMap[ep.Id] = struct{}{}
384+
}
385+
386+
// Add endpointsAttached list endpoints in endpoints list if the endpoint is not in the map
387+
for i := 0; i < len(endpointsAttached); i++ {
388+
ep := endpointsAttached[i]
389+
if _, ok := idMap[ep.Id]; !ok {
390+
endpoints = append(endpoints, ep)
391+
}
392+
}
393+
return endpoints
394+
}
395+
347396
// refreshPodEndpoints will refresh all the pod endpoints and create empty netpol references for new endpoints
348397
/*
349398
Key Assumption: a new pod event (w/ IP) cannot come before HNS knows (and can tell us) about the endpoint.

npm/pkg/dataplane/dataplane_windows_test.go

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ import (
1010
"github.com/Azure/azure-container-networking/npm/metrics"
1111
"github.com/Azure/azure-container-networking/npm/pkg/dataplane/ipsets"
1212
dptestutils "github.com/Azure/azure-container-networking/npm/pkg/dataplane/testutils"
13+
"github.com/Microsoft/hcsshim/hcn"
14+
"github.com/google/go-cmp/cmp"
1315
"github.com/pkg/errors"
1416
"github.com/stretchr/testify/require"
1517
)
@@ -86,6 +88,68 @@ func TestMultiJobApplyInBackground(t *testing.T) {
8688
testMultiJobCases(t, multiJobApplyInBackgroundTests(), time.Duration(1*time.Second))
8789
}
8890

91+
func TestRemoveCommonEndpoints(t *testing.T) {
92+
tests := []struct {
93+
name string
94+
endpoints []hcn.HostComputeEndpoint
95+
endpointsAttached []hcn.HostComputeEndpoint
96+
expected []hcn.HostComputeEndpoint
97+
}{
98+
{
99+
name: "1 value same",
100+
endpoints: []hcn.HostComputeEndpoint{{Id: "456901"}, {Id: "123456"}, {Id: "560971"}},
101+
endpointsAttached: []hcn.HostComputeEndpoint{{Id: "567890"}, {Id: "123456"}, {Id: "789012"}},
102+
expected: []hcn.HostComputeEndpoint{{Id: "456901"}, {Id: "123456"}, {Id: "560971"}, {Id: "567890"}, {Id: "789012"}},
103+
},
104+
{
105+
name: "no values same",
106+
endpoints: []hcn.HostComputeEndpoint{{Id: "456901"}, {Id: "560971"}},
107+
endpointsAttached: []hcn.HostComputeEndpoint{{Id: "567890"}, {Id: "789012"}},
108+
expected: []hcn.HostComputeEndpoint{{Id: "456901"}, {Id: "560971"}, {Id: "567890"}, {Id: "789012"}},
109+
},
110+
{
111+
name: "1 value same",
112+
endpoints: []hcn.HostComputeEndpoint{{Id: "456901"}, {Id: "123456"}, {Id: "560971"}},
113+
endpointsAttached: []hcn.HostComputeEndpoint{{Id: "567890"}, {Id: "123456"}, {Id: "789012"}},
114+
expected: []hcn.HostComputeEndpoint{{Id: "456901"}, {Id: "123456"}, {Id: "560971"}, {Id: "567890"}, {Id: "789012"}},
115+
},
116+
{
117+
name: "two values same",
118+
endpoints: []hcn.HostComputeEndpoint{{Id: "456901"}, {Id: "560971"}, {Id: "123456"}, {Id: "789012"}},
119+
endpointsAttached: []hcn.HostComputeEndpoint{{Id: "567890"}, {Id: "789012"}, {Id: "123456"}},
120+
expected: []hcn.HostComputeEndpoint{{Id: "456901"}, {Id: "560971"}, {Id: "123456"}, {Id: "789012"}, {Id: "567890"}},
121+
},
122+
{
123+
name: "no values",
124+
endpoints: []hcn.HostComputeEndpoint{},
125+
endpointsAttached: []hcn.HostComputeEndpoint{},
126+
expected: []hcn.HostComputeEndpoint{},
127+
},
128+
{
129+
name: "1 value - same",
130+
endpoints: []hcn.HostComputeEndpoint{{Id: "456901"}},
131+
endpointsAttached: []hcn.HostComputeEndpoint{{Id: "456901"}},
132+
expected: []hcn.HostComputeEndpoint{{Id: "456901"}},
133+
},
134+
{
135+
name: "1 value - different",
136+
endpoints: []hcn.HostComputeEndpoint{{Id: "456901"}},
137+
endpointsAttached: []hcn.HostComputeEndpoint{},
138+
expected: []hcn.HostComputeEndpoint{{Id: "456901"}},
139+
},
140+
}
141+
for _, tt := range tests {
142+
tt := tt
143+
144+
t.Run(tt.name, func(t *testing.T) {
145+
result := GetUniqueEndpoints(tt.endpoints, tt.endpointsAttached)
146+
if !cmp.Equal(tt.expected, result) {
147+
t.Errorf("Test %s failed: expected %v, got %v", tt.name, tt.expected, result)
148+
}
149+
})
150+
}
151+
}
152+
89153
func testSerialCases(t *testing.T, tests []*SerialTestCase, finalSleep time.Duration) {
90154
for i, tt := range tests {
91155
i := i

0 commit comments

Comments
 (0)