@@ -14,6 +14,7 @@ import (
1414 "github.com/Azure/azure-container-networking/telemetry"
1515 corev1 "k8s.io/api/core/v1"
1616 networkingv1 "k8s.io/api/networking/v1"
17+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1718 "k8s.io/apimachinery/pkg/version"
1819 "k8s.io/client-go/informers"
1920 coreinformers "k8s.io/client-go/informers/core/v1"
@@ -22,9 +23,10 @@ import (
2223 "k8s.io/client-go/tools/cache"
2324)
2425
25- var (
26- hostNetAgentURLForNpm = "http://168.63.129.16/machine/plugins?comp=netagent&type=npmreport"
27- contentType = "application/json"
26+ const (
27+ hostNetAgentURLForNpm = "http://168.63.129.16/machine/plugins?comp=netagent&type=npmreport"
28+ contentType = "application/json"
29+ telemetryRetryWaitTimeInSeconds = 60
2830)
2931
3032// NetworkPolicyManager contains informers for pod, namespace and networkpolicy.
@@ -44,17 +46,41 @@ type NetworkPolicyManager struct {
4446 clusterState telemetry.ClusterState
4547 reportManager * telemetry.ReportManager
4648
47- serverVersion * version.Info
49+ serverVersion * version.Info
50+ TelemetryEnabled bool
4851}
4952
5053// GetClusterState returns current cluster state.
5154func (npMgr * NetworkPolicyManager ) GetClusterState () telemetry.ClusterState {
55+ pods , err := npMgr .clientset .CoreV1 ().Pods ("" ).List (metav1.ListOptions {})
56+ if err != nil {
57+ log .Printf ("Error Listing pods in GetClusterState" )
58+ }
59+
60+ namespaces , err := npMgr .clientset .CoreV1 ().Namespaces ().List (metav1.ListOptions {})
61+ if err != nil {
62+ log .Printf ("Error Listing namespaces in GetClusterState" )
63+ }
64+
65+ networkpolicies , err := npMgr .clientset .NetworkingV1 ().NetworkPolicies ("" ).List (metav1.ListOptions {})
66+ if err != nil {
67+ log .Printf ("Error Listing networkpolicies in GetClusterState" )
68+ }
69+
70+ npMgr .clusterState .PodCount = len (pods .Items )
71+ npMgr .clusterState .NsCount = len (namespaces .Items )
72+ npMgr .clusterState .NwPolicyCount = len (networkpolicies .Items )
73+
5274 return npMgr .clusterState
5375}
5476
5577// UpdateAndSendReport updates the npm report then send it.
5678// This function should only be called when npMgr is locked.
5779func (npMgr * NetworkPolicyManager ) UpdateAndSendReport (err error , eventMsg string ) error {
80+ if ! npMgr .TelemetryEnabled {
81+ return nil
82+ }
83+
5884 clusterState := npMgr .GetClusterState ()
5985 v := reflect .ValueOf (npMgr .reportManager .Report ).Elem ().FieldByName ("ClusterState" )
6086 if v .CanSet () {
@@ -69,7 +95,10 @@ func (npMgr *NetworkPolicyManager) UpdateAndSendReport(err error, eventMsg strin
6995 reflect .ValueOf (npMgr .reportManager .Report ).Elem ().FieldByName ("EventMessage" ).SetString (err .Error ())
7096 }
7197
72- return npMgr .reportManager .SendReport (nil )
98+ var telemetryBuffer * telemetry.TelemetryBuffer
99+ connectToTelemetryServer (telemetryBuffer )
100+
101+ return npMgr .reportManager .SendReport (telemetryBuffer )
73102}
74103
75104// Run starts shared informers and waits for the shared informer cache to sync.
@@ -93,8 +122,33 @@ func (npMgr *NetworkPolicyManager) Run(stopCh <-chan struct{}) error {
93122 return nil
94123}
95124
125+ func connectToTelemetryServer (telemetryBuffer * telemetry.TelemetryBuffer ) {
126+ for {
127+ telemetryBuffer = telemetry .NewTelemetryBuffer ("" )
128+ err := telemetryBuffer .StartServer ()
129+ if err == nil || telemetryBuffer .FdExists {
130+ connErr := telemetryBuffer .Connect ()
131+ if connErr == nil {
132+ break
133+ }
134+
135+ log .Printf ("[NPM-Telemetry] Failed to establish telemetry manager connection." )
136+ time .Sleep (time .Second * telemetryRetryWaitTimeInSeconds )
137+ }
138+ }
139+ }
140+
96141// RunReportManager starts NPMReportManager and send telemetry periodically.
97142func (npMgr * NetworkPolicyManager ) RunReportManager () {
143+ if ! npMgr .TelemetryEnabled {
144+ return
145+ }
146+
147+ var telemetryBuffer * telemetry.TelemetryBuffer
148+ connectToTelemetryServer (telemetryBuffer )
149+
150+ go telemetryBuffer .BufferAndPushData (time .Duration (0 ))
151+
98152 for {
99153 clusterState := npMgr .GetClusterState ()
100154 v := reflect .ValueOf (npMgr .reportManager .Report ).Elem ().FieldByName ("ClusterState" )
@@ -104,11 +158,12 @@ func (npMgr *NetworkPolicyManager) RunReportManager() {
104158 v .FieldByName ("NwPolicyCount" ).SetInt (int64 (clusterState .NwPolicyCount ))
105159 }
106160
107- if err := npMgr .reportManager .SendReport (nil ); err != nil {
108- log .Printf ("Error sending NPM telemetry report" )
161+ if err := npMgr .reportManager .SendReport (telemetryBuffer ); err != nil {
162+ log .Printf ("[NPM-Telemetry] Error sending NPM telemetry report" )
163+ connectToTelemetryServer (telemetryBuffer )
109164 }
110165
111- time .Sleep (1 * time .Minute )
166+ time .Sleep (5 * time .Minute )
112167 }
113168}
114169
@@ -150,7 +205,8 @@ func NewNetworkPolicyManager(clientset *kubernetes.Clientset, informerFactory in
150205 ContentType : contentType ,
151206 Report : & telemetry.NPMReport {},
152207 },
153- serverVersion : serverVersion ,
208+ serverVersion : serverVersion ,
209+ TelemetryEnabled : true ,
154210 }
155211
156212 clusterID := util .GetClusterID (npMgr .nodeName )
0 commit comments