Skip to content

Commit ad516c0

Browse files
authored
feat: [NPM] Adding DPShim layer in controller pods (#1206)
* intial touches to create sets * adding initial touches to dpshim * deprecating initialize DP func * feat: [NPM] Adding DPShim layer in controller pods * correcting an import error * Adding some UTs * adding a UT * Addressing some comments * Moving an UT to linux specifc file * Fixing some issues with controller pod * Adding some dns poilices and logs for debugging * Moving aroudn outchannel to help with hydration of new clients * removeing pass by ref * Adding http server in daemon for pprof * Adding a new grpc option to wait * Fixing 100% cpu in daemon * Fixing some logic in list management * Applying some golints * adding mutex * Addressing comments and solving a bug. Cyclonus seems to be good now * Fixing a bug * Addressing a comment * fixing an issue and addressing comments
1 parent 297a157 commit ad516c0

33 files changed

+1489
-573
lines changed

npm/cmd/start_daemon.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ import (
99
"github.com/Azure/azure-container-networking/common"
1010
npmconfig "github.com/Azure/azure-container-networking/npm/config"
1111
"github.com/Azure/azure-container-networking/npm/daemon"
12+
restserver "github.com/Azure/azure-container-networking/npm/http/server"
13+
"github.com/Azure/azure-container-networking/npm/metrics"
1214
"github.com/Azure/azure-container-networking/npm/pkg/controlplane/goalstateprocessor"
1315
"github.com/Azure/azure-container-networking/npm/pkg/dataplane"
1416
"github.com/Azure/azure-container-networking/npm/pkg/models"
@@ -49,6 +51,9 @@ func startDaemon(config npmconfig.Config) error {
4951
pod := os.Getenv(podNameEnv)
5052
node := os.Getenv(nodeNameEnv)
5153

54+
klog.Infof("initializing metrics")
55+
metrics.InitializeAll()
56+
5257
addr := config.Transport.Address + ":" + strconv.Itoa(config.Transport.ServicePort)
5358
ctx := context.Background()
5459
err := initLogging()
@@ -65,6 +70,10 @@ func startDaemon(config npmconfig.Config) error {
6570
return fmt.Errorf("failed to create dataplane with error %w", err)
6671
}
6772

73+
dp.RunPeriodicTasks()
74+
// TODO Daemon should implement cache encoder
75+
go restserver.NPMRestServerListenAndServe(config, nil)
76+
6877
client, err := transport.NewEventsClient(ctx, pod, node, addr)
6978
if err != nil {
7079
klog.Errorf("failed to create dataplane events client with error %v", err)
@@ -88,5 +97,6 @@ func startDaemon(config npmconfig.Config) error {
8897
klog.Errorf("failed to start dataplane : %v", err)
8998
return fmt.Errorf("failed to start dataplane: %w", err)
9099
}
100+
91101
return nil
92102
}

npm/cmd/start_server.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ import (
1111
"github.com/Azure/azure-container-networking/npm/controller"
1212
restserver "github.com/Azure/azure-container-networking/npm/http/server"
1313
"github.com/Azure/azure-container-networking/npm/metrics"
14-
"github.com/Azure/azure-container-networking/npm/pkg/dataplane"
1514
"github.com/Azure/azure-container-networking/npm/pkg/dataplane/dpshim"
1615
"github.com/Azure/azure-container-networking/npm/pkg/transport"
1716
"github.com/spf13/cobra"
@@ -60,6 +59,9 @@ func startControlplane(config npmconfig.Config, flags npmconfig.Flags) error {
6059
return err
6160
}
6261

62+
klog.Infof("initializing metrics")
63+
metrics.InitializeAll()
64+
6365
// Create the kubernetes client
6466
var k8sConfig *rest.Config
6567
if flags.KubeConfigPath == "" {
@@ -96,16 +98,14 @@ func startControlplane(config npmconfig.Config, flags npmconfig.Flags) error {
9698

9799
k8sServerVersion := k8sServerVersion(clientset)
98100

99-
var dp dataplane.GenericDataplane
100-
101-
mgr := transport.NewEventsServer(context.Background(), config.Transport.Port)
102-
103-
dp, err = dpshim.NewDPSim(mgr.InputChannel())
101+
dp, err := dpshim.NewDPSim(wait.NeverStop)
104102
if err != nil {
105103
klog.Errorf("failed to create dataplane shim with error: %v", err)
106104
return fmt.Errorf("failed to create dataplane with error: %w", err)
107105
}
108106

107+
mgr := transport.NewEventsServer(context.Background(), config.Transport.Port, dp)
108+
109109
npMgr, err := controller.NewNetworkPolicyServer(config, factory, mgr, dp, version, k8sServerVersion)
110110
if err != nil {
111111
klog.Errorf("failed to create NPM controlplane manager with error: %v", err)

npm/deploy/kustomize/overlays/daemon/deployment.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ spec:
7171
- name: azure-npm-config
7272
mountPath: /etc/azure-npm
7373
hostNetwork: true
74+
dnsPolicy: ClusterFirstWithHostNet
7475
volumes:
7576
- name: log
7677
hostPath:

npm/deploy/manifests/daemon/azure-npm.yaml

Lines changed: 91 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -14,24 +14,24 @@ metadata:
1414
name: azure-npm
1515
namespace: kube-system
1616
rules:
17-
- apiGroups:
18-
- ""
19-
resources:
20-
- pods
21-
- nodes
22-
- namespaces
23-
verbs:
24-
- get
25-
- list
26-
- watch
27-
- apiGroups:
28-
- networking.k8s.io
29-
resources:
30-
- networkpolicies
31-
verbs:
32-
- get
33-
- list
34-
- watch
17+
- apiGroups:
18+
- ""
19+
resources:
20+
- pods
21+
- nodes
22+
- namespaces
23+
verbs:
24+
- get
25+
- list
26+
- watch
27+
- apiGroups:
28+
- networking.k8s.io
29+
resources:
30+
- networkpolicies
31+
verbs:
32+
- get
33+
- list
34+
- watch
3535
---
3636
apiVersion: rbac.authorization.k8s.io/v1
3737
kind: ClusterRoleBinding
@@ -45,9 +45,9 @@ roleRef:
4545
kind: ClusterRole
4646
name: azure-npm
4747
subjects:
48-
- kind: ServiceAccount
49-
name: azure-npm
50-
namespace: kube-system
48+
- kind: ServiceAccount
49+
name: azure-npm
50+
namespace: kube-system
5151
---
5252
apiVersion: v1
5353
data:
@@ -80,15 +80,15 @@ metadata:
8080
labels:
8181
app: azure-npm
8282
component: daemon
83-
name: npm-deamon-metrics-cluster-service
83+
name: npm-daemon-metrics-cluster-service
8484
namespace: kube-system
8585
spec:
8686
ports:
87-
- name: metrics
88-
port: 9000
89-
targetPort: 10091
87+
- name: metrics
88+
port: 9000
89+
targetPort: 10091
9090
selector:
91-
component: deamon
91+
component: daemon
9292
k8s-app: azure-npm
9393
---
9494
apiVersion: apps/v1
@@ -98,7 +98,7 @@ metadata:
9898
addonmanager.kubernetes.io/mode: EnsureExists
9999
app: azure-npm
100100
component: daemon
101-
name: azure-npm-deamon
101+
name: azure-npm-daemon
102102
namespace: kube-system
103103
spec:
104104
selector:
@@ -115,72 +115,73 @@ spec:
115115
k8s-app: azure-npm
116116
spec:
117117
containers:
118-
- args:
119-
- start
120-
- daemon
121-
command:
122-
- azure-npm
123-
env:
124-
- name: HOSTNAME
125-
valueFrom:
126-
fieldRef:
127-
apiVersion: v1
128-
fieldPath: spec.nodeName
129-
- name: NPM_CONFIG
130-
value: /etc/azure-npm/azure-npm.json
131-
- name: DAEMON_POD_NAME
132-
valueFrom:
133-
fieldRef:
134-
fieldPath: metadata.name
135-
- name: DAEMON_NODE_NAME
136-
valueFrom:
137-
fieldRef:
138-
fieldPath: spec.nodeName
139-
image: azure-npm:v1.4.1
140-
name: azure-npm
141-
ports:
142-
- name: metrics
143-
containerPort: 10091
144-
resources:
145-
limits:
146-
cpu: 250m
147-
memory: 300Mi
148-
requests:
149-
cpu: 250m
150-
securityContext:
151-
privileged: true
152-
volumeMounts:
153-
- mountPath: /var/log
154-
name: log
155-
- mountPath: /run/xtables.lock
156-
name: xtables-lock
157-
- mountPath: /etc/protocols
158-
name: protocols
159-
- mountPath: /etc/azure-npm
160-
name: azure-npm-config
118+
- args:
119+
- start
120+
- daemon
121+
command:
122+
- azure-npm
123+
env:
124+
- name: HOSTNAME
125+
valueFrom:
126+
fieldRef:
127+
apiVersion: v1
128+
fieldPath: spec.nodeName
129+
- name: NPM_CONFIG
130+
value: /etc/azure-npm/azure-npm.json
131+
- name: DAEMON_POD_NAME
132+
valueFrom:
133+
fieldRef:
134+
fieldPath: metadata.name
135+
- name: DAEMON_NODE_NAME
136+
valueFrom:
137+
fieldRef:
138+
fieldPath: spec.nodeName
139+
image: azure-npm:v1.4.1
140+
name: azure-npm
141+
ports:
142+
- name: metrics
143+
containerPort: 10091
144+
resources:
145+
limits:
146+
cpu: 250m
147+
memory: 300Mi
148+
requests:
149+
cpu: 250m
150+
securityContext:
151+
privileged: true
152+
volumeMounts:
153+
- mountPath: /var/log
154+
name: log
155+
- mountPath: /run/xtables.lock
156+
name: xtables-lock
157+
- mountPath: /etc/protocols
158+
name: protocols
159+
- mountPath: /etc/azure-npm
160+
name: azure-npm-config
161161
hostNetwork: true
162+
dnsPolicy: ClusterFirstWithHostNet
162163
priorityClassName: system-node-critical
163164
serviceAccountName: azure-npm
164165
tolerations:
165-
- effect: NoExecute
166-
operator: Exists
167-
- effect: NoSchedule
168-
operator: Exists
169-
- key: CriticalAddonsOnly
170-
operator: Exists
166+
- effect: NoExecute
167+
operator: Exists
168+
- effect: NoSchedule
169+
operator: Exists
170+
- key: CriticalAddonsOnly
171+
operator: Exists
171172
volumes:
172-
- hostPath:
173-
path: /var/log
174-
type: Directory
175-
name: log
176-
- hostPath:
177-
path: /run/xtables.lock
178-
type: File
179-
name: xtables-lock
180-
- hostPath:
181-
path: /etc/protocols
182-
type: File
183-
name: protocols
184-
- configMap:
173+
- hostPath:
174+
path: /var/log
175+
type: Directory
176+
name: log
177+
- hostPath:
178+
path: /run/xtables.lock
179+
type: File
180+
name: xtables-lock
181+
- hostPath:
182+
path: /etc/protocols
183+
type: File
184+
name: protocols
185+
- configMap:
186+
name: azure-npm-config
185187
name: azure-npm-config
186-
name: azure-npm-config

npm/http/server/server.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ func NPMRestServerListenAndServe(config npmconfig.Config, npmEncoder json.Marsha
3232
rs.router.Handle(api.ClusterMetricsPath, metrics.GetHandler(metrics.ClusterMetrics))
3333
}
3434

35-
if config.Toggles.EnableHTTPDebugAPI {
35+
if config.Toggles.EnableHTTPDebugAPI && npmEncoder != nil {
3636
// ACN CLI debug handlerss
3737
rs.router.Handle(api.NPMMgrPath, rs.npmCacheHandler(npmEncoder)).Methods(http.MethodGet)
3838
}

npm/npm.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ func (npMgr *NetworkPolicyManager) GetAppVersion() string {
157157
func (npMgr *NetworkPolicyManager) Start(config npmconfig.Config, stopCh <-chan struct{}) error {
158158
if !config.Toggles.EnableV2NPM {
159159
// Do initialization of data plane before starting syncup of each controller to avoid heavy call to api-server
160-
if err := npMgr.NetPolControllerV1.ResetDataPlane(); err != nil {
160+
if err := npMgr.NetPolControllerV1.BootupDataplane(); err != nil {
161161
return fmt.Errorf("Failed to initialized data plane with err %w", err)
162162
}
163163
}

npm/pkg/controlplane/controllers/v1/networkPolicyController.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,9 @@ func NewNetworkPolicyController(npInformer networkinginformers.NetworkPolicyInfo
6464
return netPolController
6565
}
6666

67-
// initializeDataPlane do all initialization tasks for data plane
67+
// BootupDataplane does all initialization tasks for data plane
6868
// TODO(jungukcho) Need to refactor UninitNpmChains since it assumes it has already AZURE-NPM chains
69-
func (c *NetworkPolicyController) ResetDataPlane() error {
69+
func (c *NetworkPolicyController) BootupDataplane() error {
7070
klog.Infof("Initiailize data plane. Clean up Azure-NPM chains and start reconcile iptables")
7171

7272
// TODO(jungukcho): will clean-up error handling codes to initialize iptables and ipset in a separate PR

npm/pkg/controlplane/controllers/v2/podController.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -589,7 +589,7 @@ func (c *PodController) cleanUpDeletedPod(cachedNpmPodKey string) error {
589589
// Get lists of podLabelKey and podLabelKey + podLavelValue ,and then start deleting them from ipsets
590590
for labelKey, labelVal := range cachedNpmPod.Labels {
591591
labelKeyValue := util.GetIpSetFromLabelKV(labelKey, labelVal)
592-
klog.Infof("Deleting pod %s (ip : %s) to ipset %s and %s", cachedNpmPodKey, cachedNpmPod.PodIP, labelKey, labelKeyValue)
592+
klog.Infof("Deleting pod %s (ip : %s) from ipsets %s and %s", cachedNpmPodKey, cachedNpmPod.PodIP, labelKey, labelKeyValue)
593593
if err = c.dp.RemoveFromSets(
594594
[]*ipsets.IPSetMetadata{
595595
ipsets.NewIPSetMetadata(labelKey, ipsets.KeyLabelOfPod),

0 commit comments

Comments
 (0)