Skip to content

Commit 83945fd

Browse files
fixed logging part of telemetry (#312)
* fixed logging part of telemetry * fixed an issue * added more test coverage * fixed an issue * fixed invalid condition and added UT * initialize telemetry logger as part of telemetry server * changed checkifsockexists to sockexists * changed interval time to push telemetry data to host
1 parent 159cd85 commit 83945fd

File tree

6 files changed

+98
-80
lines changed

6 files changed

+98
-80
lines changed

cni/network/plugin/main.go

Lines changed: 47 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,52 @@ func handleIfCniUpdate(update func(*skel.CmdArgs) error) (bool, error) {
133133
return isupdate, nil
134134
}
135135

136+
// startTelemetryService - Kills if any telemetry service runs and start new telemetry service
137+
func startTelemetryService(path string) error {
138+
platform.KillProcessByName(telemetry.TelemetryServiceProcessName)
139+
140+
log.Printf("[cni] Starting telemetry service process")
141+
142+
if err := common.StartProcess(path); err != nil {
143+
log.Printf("[Telemetry] Failed to start telemetry service process :%v", err)
144+
return err
145+
}
146+
147+
log.Printf("[cni] Telemetry service started")
148+
149+
for attempt := 0; attempt < 5; attempt++ {
150+
if telemetry.SockExists() {
151+
break
152+
}
153+
154+
time.Sleep(200 * time.Millisecond)
155+
}
156+
157+
return nil
158+
}
159+
160+
func connectToTelemetryService(tb *telemetry.TelemetryBuffer) {
161+
path := fmt.Sprintf("%v/%v", telemetry.CniInstallDir, telemetry.TelemetryServiceProcessName)
162+
163+
for attempt := 0; attempt < 2; attempt++ {
164+
if err := tb.Connect(); err != nil {
165+
log.Printf("Connection to telemetry socket failed: %v", err)
166+
tb.Cleanup(telemetry.FdName)
167+
168+
if isExists, _ := common.CheckIfFileExists(path); !isExists {
169+
log.Printf("Skip starting telemetry service as file didn't exist")
170+
return
171+
}
172+
173+
startTelemetryService(path)
174+
} else {
175+
tb.Connected = true
176+
log.Printf("Connected to telemetry service")
177+
return
178+
}
179+
}
180+
}
181+
136182
// Main is the entry point for CNI network plugin.
137183
func main() {
138184

@@ -170,20 +216,7 @@ func main() {
170216
}
171217

172218
tb := telemetry.NewTelemetryBuffer("")
173-
174-
for attempt := 0; attempt < 2; attempt++ {
175-
err = tb.Connect()
176-
if err != nil {
177-
log.Printf("Connection to telemetry socket failed: %v", err)
178-
tb.Cleanup(telemetry.FdName)
179-
telemetry.StartTelemetryService()
180-
} else {
181-
tb.Connected = true
182-
log.Printf("Connected to telemetry service")
183-
break
184-
}
185-
}
186-
219+
connectToTelemetryService(tb)
187220
defer tb.Close()
188221

189222
t := time.Now()

telemetry/telemetry.go

Lines changed: 14 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717
"strings"
1818

1919
"github.com/Azure/azure-container-networking/common"
20+
"github.com/Azure/azure-container-networking/log"
2021
"github.com/Azure/azure-container-networking/platform"
2122
)
2223

@@ -236,29 +237,16 @@ func (report *NPMReport) GetReport(clusterID, nodeName, npmVersion, kubernetesVe
236237
// SendReport will send telemetry report to HostNetAgent.
237238
func (reportMgr *ReportManager) SendReport(tb *TelemetryBuffer) error {
238239
var err error
239-
if tb != nil && tb.Connected {
240-
telemetryLogger.Printf("[Telemetry] Going to send Telemetry report to hostnetagent")
241-
242-
switch reportMgr.Report.(type) {
243-
case *CNIReport:
244-
telemetryLogger.Printf("[Telemetry] %+v", reportMgr.Report.(*CNIReport))
245-
case *NPMReport:
246-
telemetryLogger.Printf("[Telemetry] %+v", reportMgr.Report.(*NPMReport))
247-
case *DNCReport:
248-
telemetryLogger.Printf("[Telemetry] %+v", reportMgr.Report.(*DNCReport))
249-
default:
250-
telemetryLogger.Printf("[Telemetry] Invalid report type")
251-
}
240+
var report []byte
252241

253-
report, err := reportMgr.ReportToBytes()
242+
if tb != nil && tb.Connected {
243+
report, err = reportMgr.ReportToBytes()
254244
if err == nil {
255245
// If write fails, try to re-establish connections as server/client
256246
if _, err = tb.Write(report); err != nil {
257247
tb.Cancel()
258248
}
259249
}
260-
} else {
261-
err = fmt.Errorf("Not connected to telemetry server or tb is nil")
262250
}
263251

264252
return err
@@ -284,21 +272,20 @@ func (reportMgr *ReportManager) SetReportState(telemetryFile string) error {
284272

285273
_, err = f.Write(reportBytes)
286274
if err != nil {
287-
telemetryLogger.Printf("[Telemetry] Error while writing to file %v", err)
275+
log.Printf("[Telemetry] Error while writing to file %v", err)
288276
return fmt.Errorf("[Telemetry] Error while writing to file %v", err)
289277
}
290278

291279
// set IsNewInstance in report
292280
reflect.ValueOf(reportMgr.Report).Elem().FieldByName("IsNewInstance").SetBool(false)
293-
telemetryLogger.Printf("[Telemetry] SetReportState succeeded")
294281
return nil
295282
}
296283

297284
// GetReportState will check if report is sent at least once by checking telemetry file.
298285
func (reportMgr *ReportManager) GetReportState(telemetryFile string) bool {
299286
// try to set IsNewInstance in report
300287
if _, err := os.Stat(telemetryFile); os.IsNotExist(err) {
301-
telemetryLogger.Printf("[Telemetry] File not exist %v", telemetryFile)
288+
log.Printf("[Telemetry] File not exist %v", telemetryFile)
302289
reflect.ValueOf(reportMgr.Report).Elem().FieldByName("IsNewInstance").SetBool(true)
303290
return false
304291
}
@@ -430,7 +417,10 @@ func (report *CNIReport) GetOrchestratorDetails() {
430417
}
431418

432419
// ReportToBytes - returns the report bytes
433-
func (reportMgr *ReportManager) ReportToBytes() (report []byte, err error) {
420+
func (reportMgr *ReportManager) ReportToBytes() ([]byte, error) {
421+
var err error
422+
var report []byte
423+
434424
switch reportMgr.Report.(type) {
435425
case *CNIReport:
436426
case *NPMReport:
@@ -440,9 +430,10 @@ func (reportMgr *ReportManager) ReportToBytes() (report []byte, err error) {
440430
err = fmt.Errorf("[Telemetry] Invalid report type")
441431
}
442432

443-
if err == nil {
444-
report, err = json.Marshal(reportMgr.Report)
433+
if err != nil {
434+
return []byte{}, err
445435
}
446436

447-
return
437+
report, err = json.Marshal(reportMgr.Report)
438+
return report, err
448439
}

telemetry/telemetry_test.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,10 @@ func TestMain(m *testing.M) {
9494
reportManager.ContentType = "application/json"
9595
reportManager.Report = &CNIReport{}
9696

97+
if err := InitTelemetryLogger(); err == nil {
98+
defer CloseTelemetryLogger()
99+
}
100+
97101
tb = NewTelemetryBuffer(hostAgentUrl)
98102
err = tb.StartServer()
99103
if err == nil {
@@ -158,6 +162,14 @@ func TestSendTelemetry(t *testing.T) {
158162
if err != nil {
159163
t.Errorf("SendTelemetry failed due to %v", err)
160164
}
165+
166+
i := 3
167+
rpMgr := &ReportManager{}
168+
rpMgr.Report = &i
169+
err = rpMgr.SendReport(tb)
170+
if err == nil {
171+
t.Errorf("SendTelemetry not failed for incorrect report type")
172+
}
161173
}
162174

163175
func TestReceiveTelemetryData(t *testing.T) {
@@ -214,6 +226,10 @@ func TestClientCloseTelemetryConnection(t *testing.T) {
214226
go tb.BufferAndPushData(0)
215227
}
216228

229+
if !SockExists() {
230+
t.Errorf("telemetry sock doesn't exist")
231+
}
232+
217233
// create client telemetrybuffer and connect to server
218234
tb1 := NewTelemetryBuffer(hostAgentUrl)
219235
if err := tb1.Connect(); err != nil {

telemetry/telemetrybuffer.go

Lines changed: 14 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,7 @@ import (
1515
"sync"
1616
"time"
1717

18-
"github.com/Azure/azure-container-networking/common"
1918
"github.com/Azure/azure-container-networking/log"
20-
"github.com/Azure/azure-container-networking/platform"
2119
)
2220

2321
// FdName - file descriptor name
@@ -30,7 +28,7 @@ const (
3028
FdName = "azure-vnet-telemetry"
3129
Delimiter = '\n'
3230
azureHostReportURL = "http://168.63.129.16/machine/plugins?comp=netagent&type=payload"
33-
DefaultInterval = 10 * time.Second
31+
DefaultInterval = 30 * time.Second
3432
logName = "azure-vnet-telemetry"
3533
MaxPayloadSize uint16 = 65535
3634
dnc = "DNC"
@@ -64,6 +62,14 @@ type Payload struct {
6462
CNSReports []CNSReport
6563
}
6664

65+
func InitTelemetryLogger() error {
66+
return telemetryLogger.SetTarget(log.TargetLogfile)
67+
}
68+
69+
func CloseTelemetryLogger() {
70+
telemetryLogger.Close()
71+
}
72+
6773
// NewTelemetryBuffer - create a new TelemetryBuffer
6874
func NewTelemetryBuffer(hostReportURL string) *TelemetryBuffer {
6975
var tb TelemetryBuffer
@@ -80,11 +86,6 @@ func NewTelemetryBuffer(hostReportURL string) *TelemetryBuffer {
8086
tb.payload.NPMReports = make([]NPMReport, 0)
8187
tb.payload.CNSReports = make([]CNSReport, 0)
8288

83-
err := telemetryLogger.SetTarget(log.TargetLogfile)
84-
if err != nil {
85-
fmt.Printf("Failed to configure logging: %v\n", err)
86-
}
87-
8889
return &tb
8990
}
9091

@@ -107,6 +108,8 @@ func (tb *TelemetryBuffer) StartServer() error {
107108
return err
108109
}
109110

111+
InitTelemetryLogger()
112+
110113
telemetryLogger.Printf("Telemetry service started")
111114
// Spawn server goroutine to handle incoming connections
112115
go func() {
@@ -251,23 +254,21 @@ func (tb *TelemetryBuffer) Cancel() {
251254
// Close - close all connections
252255
func (tb *TelemetryBuffer) Close() {
253256
if tb.client != nil {
254-
telemetryLogger.Printf("client close")
255257
tb.client.Close()
256258
tb.client = nil
257259
}
258260

259261
if tb.listener != nil {
260262
telemetryLogger.Printf("server close")
261263
tb.listener.Close()
262-
tb.listener = nil
264+
CloseTelemetryLogger()
263265
}
264266

265267
tb.mutex.Lock()
266268
defer tb.mutex.Unlock()
267269

268270
for _, conn := range tb.connections {
269271
if conn != nil {
270-
telemetryLogger.Printf("connection close as server closed")
271272
conn.Close()
272273
}
273274
}
@@ -390,11 +391,12 @@ func getHostMetadata() (Metadata, error) {
390391
if err == nil {
391392
var metadata Metadata
392393
if err = json.Unmarshal(content, &metadata); err == nil {
393-
telemetryLogger.Printf("[Telemetry] Returning hostmetadata from state")
394394
return metadata, nil
395395
}
396396
}
397397

398+
telemetryLogger.Printf("[Telemetry] Request metadata from wireserver")
399+
398400
req, err := http.NewRequest("GET", metadataURL, nil)
399401
if err != nil {
400402
return Metadata{}, err
@@ -424,27 +426,3 @@ func getHostMetadata() (Metadata, error) {
424426

425427
return metareport.Metadata, err
426428
}
427-
428-
// StartTelemetryService - Kills if any telemetry service runs and start new telemetry service
429-
func StartTelemetryService() error {
430-
platform.KillProcessByName(telemetryServiceProcessName)
431-
432-
telemetryLogger.Printf("[Telemetry] Starting telemetry service process")
433-
path := fmt.Sprintf("%v/%v", cniInstallDir, telemetryServiceProcessName)
434-
if err := common.StartProcess(path); err != nil {
435-
telemetryLogger.Printf("[Telemetry] Failed to start telemetry service process :%v", err)
436-
return err
437-
}
438-
439-
telemetryLogger.Printf("[Telemetry] Telemetry service started")
440-
441-
for attempt := 0; attempt < 5; attempt++ {
442-
if checkIfSockExists() {
443-
break
444-
}
445-
446-
time.Sleep(200 * time.Millisecond)
447-
}
448-
449-
return nil
450-
}

telemetry/telemetrybuffer_linux.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@ import (
1010
)
1111

1212
const (
13-
fdTemplate = "/tmp/%s.sock"
14-
telemetryServiceProcessName = "azure-vnet-telemetry"
15-
cniInstallDir = "/opt/cni/bin"
13+
fdTemplate = "/var/run/%s.sock"
14+
TelemetryServiceProcessName = "azure-vnet-telemetry"
15+
CniInstallDir = "/opt/cni/bin"
1616
metadataFile = "/tmp/azuremetadata.json"
1717
)
1818

@@ -41,7 +41,7 @@ func (tb *TelemetryBuffer) Cleanup(name string) error {
4141
return os.Remove(fmt.Sprintf(fdTemplate, name))
4242
}
4343

44-
func checkIfSockExists() bool {
44+
func SockExists() bool {
4545
if _, err := os.Stat(fmt.Sprintf(fdTemplate, FdName)); !os.IsNotExist(err) {
4646
return true
4747
}

telemetry/telemetrybuffer_windows.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ import (
1212

1313
const (
1414
fdTemplate = "\\\\.\\pipe\\%s"
15-
telemetryServiceProcessName = "azure-vnet-telemetry.exe"
16-
cniInstallDir = "c:\\k\\azurecni\\bin"
15+
TelemetryServiceProcessName = "azure-vnet-telemetry.exe"
16+
CniInstallDir = "c:\\k\\azurecni\\bin"
1717
metadataFile = "azuremetadata.json"
1818
)
1919

@@ -43,7 +43,7 @@ func (tb *TelemetryBuffer) Cleanup(name string) error {
4343
}
4444

4545
// Check if telemetry unix domain socket exists
46-
func checkIfSockExists() bool {
46+
func SockExists() bool {
4747
if _, err := os.Stat(fmt.Sprintf(fdTemplate, FdName)); !os.IsNotExist(err) {
4848
return true
4949
}

0 commit comments

Comments
 (0)