Skip to content

Commit a11d510

Browse files
Config support for Telemetry service (#317)
* 1. Start telemetry service as separate process for all components cns/networkmonitor 2. Added telemetry config through which we can configure reportTohost interval 3. Added unit tests for changes * added new files for telemetry testing * added tests for invalid cases * updated with dummy subid
1 parent 0012ae5 commit a11d510

File tree

10 files changed

+262
-72
lines changed

10 files changed

+262
-72
lines changed

Makefile

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -244,14 +244,16 @@ publish-azure-npm-image:
244244
.PHONY: cni-archive
245245
cni-archive:
246246
cp cni/azure-$(GOOS).conflist $(CNI_BUILD_DIR)/10-azure.conflist
247+
cp telemetry/azure-vnet-telemetry.config $(CNI_BUILD_DIR)/azure-vnet-telemetry.config
247248
chmod 0755 $(CNI_BUILD_DIR)/azure-vnet$(EXE_EXT) $(CNI_BUILD_DIR)/azure-vnet-ipam$(EXE_EXT) $(CNI_BUILD_DIR)/azure-vnet-telemetry$(EXE_EXT)
248-
cd $(CNI_BUILD_DIR) && $(ARCHIVE_CMD) $(CNI_ARCHIVE_NAME) azure-vnet$(EXE_EXT) azure-vnet-ipam$(EXE_EXT) azure-vnet-telemetry$(EXE_EXT) 10-azure.conflist
249+
cd $(CNI_BUILD_DIR) && $(ARCHIVE_CMD) $(CNI_ARCHIVE_NAME) azure-vnet$(EXE_EXT) azure-vnet-ipam$(EXE_EXT) azure-vnet-telemetry$(EXE_EXT) 10-azure.conflist azure-vnet-telemetry.config
249250
chown $(BUILD_USER):$(BUILD_USER) $(CNI_BUILD_DIR)/$(CNI_ARCHIVE_NAME)
250251
mkdir -p $(CNI_MULTITENANCY_BUILD_DIR)
251252
cp cni/azure-$(GOOS)-multitenancy.conflist $(CNI_MULTITENANCY_BUILD_DIR)/10-azure.conflist
252-
cp $(CNI_BUILD_DIR)/azure-vnet$(EXE_EXT) $(CNI_BUILD_DIR)/azure-vnet-ipam$(EXE_EXT) $(CNI_MULTITENANCY_BUILD_DIR)
253+
cp telemetry/azure-vnet-telemetry.config $(CNI_MULTITENANCY_BUILD_DIR)/azure-vnet-telemetry.config
254+
cp $(CNI_BUILD_DIR)/azure-vnet$(EXE_EXT) $(CNI_BUILD_DIR)/azure-vnet-ipam$(EXE_EXT) $(CNI_BUILD_DIR)/azure-vnet-telemetry$(EXE_EXT) $(CNI_MULTITENANCY_BUILD_DIR)
253255
chmod 0755 $(CNI_MULTITENANCY_BUILD_DIR)/azure-vnet$(EXE_EXT) $(CNI_MULTITENANCY_BUILD_DIR)/azure-vnet-ipam$(EXE_EXT)
254-
cd $(CNI_MULTITENANCY_BUILD_DIR) && $(ARCHIVE_CMD) $(CNI_MULTITENANCY_ARCHIVE_NAME) azure-vnet$(EXE_EXT) azure-vnet-ipam$(EXE_EXT) 10-azure.conflist
256+
cd $(CNI_MULTITENANCY_BUILD_DIR) && $(ARCHIVE_CMD) $(CNI_MULTITENANCY_ARCHIVE_NAME) azure-vnet$(EXE_EXT) azure-vnet-ipam$(EXE_EXT) azure-vnet-telemetry$(EXE_EXT) 10-azure.conflist azure-vnet-telemetry.config
255257
chown $(BUILD_USER):$(BUILD_USER) $(CNI_MULTITENANCY_BUILD_DIR)/$(CNI_MULTITENANCY_ARCHIVE_NAME)
256258

257259
# Create a CNM archive for the target platform.

cni/network/plugin/main.go

Lines changed: 8 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,11 @@ import (
2222
)
2323

2424
const (
25-
hostNetAgentURL = "http://168.63.129.16/machine/plugins?comp=netagent&type=cnireport"
26-
ipamQueryURL = "http://168.63.129.16/machine/plugins?comp=nmagent&type=getinterfaceinfov1"
27-
pluginName = "CNI"
25+
hostNetAgentURL = "http://168.63.129.16/machine/plugins?comp=netagent&type=cnireport"
26+
ipamQueryURL = "http://168.63.129.16/machine/plugins?comp=nmagent&type=getinterfaceinfov1"
27+
pluginName = "CNI"
28+
telemetryNumRetries = 5
29+
telemetryWaitTimeInMilliseconds = 200
2830
)
2931

3032
// Version is populated by make during build.
@@ -133,32 +135,9 @@ func handleIfCniUpdate(update func(*skel.CmdArgs) error) (bool, error) {
133135
return isupdate, nil
134136
}
135137

136-
// startTelemetryService - Kills if any telemetry service runs and start new telemetry service
137-
func startTelemetryService(path string) error {
138-
platform.KillProcessByName(telemetry.TelemetryServiceProcessName)
139-
140-
log.Printf("[cni] Starting telemetry service process")
141-
142-
if err := common.StartProcess(path); err != nil {
143-
log.Printf("[Telemetry] Failed to start telemetry service process :%v", err)
144-
return err
145-
}
146-
147-
log.Printf("[cni] Telemetry service started")
148-
149-
for attempt := 0; attempt < 5; attempt++ {
150-
if telemetry.SockExists() {
151-
break
152-
}
153-
154-
time.Sleep(200 * time.Millisecond)
155-
}
156-
157-
return nil
158-
}
159-
160138
func connectToTelemetryService(tb *telemetry.TelemetryBuffer) {
161139
path := fmt.Sprintf("%v/%v", telemetry.CniInstallDir, telemetry.TelemetryServiceProcessName)
140+
args := []string{"-d", telemetry.CniInstallDir}
162141

163142
for attempt := 0; attempt < 2; attempt++ {
164143
if err := tb.Connect(); err != nil {
@@ -170,7 +149,8 @@ func connectToTelemetryService(tb *telemetry.TelemetryBuffer) {
170149
return
171150
}
172151

173-
startTelemetryService(path)
152+
telemetry.StartTelemetryService(path, args)
153+
telemetry.WaitForTelemetrySocket(telemetryNumRetries, telemetryWaitTimeInMilliseconds)
174154
} else {
175155
tb.Connected = true
176156
log.Printf("Connected to telemetry service")

cni/telemetry/service/telemetrymain.go

Lines changed: 118 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,30 +3,145 @@ package main
33
// Entry point of the telemetry service if started by CNI
44

55
import (
6+
"fmt"
7+
"os"
8+
"runtime"
69
"time"
710

11+
acn "github.com/Azure/azure-container-networking/common"
12+
"github.com/Azure/azure-container-networking/log"
813
"github.com/Azure/azure-container-networking/telemetry"
914
)
1015

1116
const (
12-
reportToHostIntervalInSeconds = 60 * time.Second
13-
azurecnitelemetry = "azure-vnet-telemetry"
17+
reportToHostIntervalInSeconds = 30
18+
azureVnetTelemetry = "azure-vnet-telemetry"
19+
configExtension = ".config"
1420
)
1521

22+
var version string
23+
24+
var args = acn.ArgumentList{
25+
{
26+
Name: acn.OptLogLevel,
27+
Shorthand: acn.OptLogLevelAlias,
28+
Description: "Set the logging level",
29+
Type: "int",
30+
DefaultValue: acn.OptLogLevelInfo,
31+
ValueMap: map[string]interface{}{
32+
acn.OptLogLevelInfo: log.LevelInfo,
33+
acn.OptLogLevelDebug: log.LevelDebug,
34+
},
35+
},
36+
{
37+
Name: acn.OptLogTarget,
38+
Shorthand: acn.OptLogTargetAlias,
39+
Description: "Set the logging target",
40+
Type: "int",
41+
DefaultValue: acn.OptLogTargetFile,
42+
ValueMap: map[string]interface{}{
43+
acn.OptLogTargetSyslog: log.TargetSyslog,
44+
acn.OptLogTargetStderr: log.TargetStderr,
45+
acn.OptLogTargetFile: log.TargetLogfile,
46+
acn.OptLogStdout: log.TargetStdout,
47+
acn.OptLogMultiWrite: log.TargetStdOutAndLogFile,
48+
},
49+
},
50+
{
51+
Name: acn.OptLogLocation,
52+
Shorthand: acn.OptLogLocationAlias,
53+
Description: "Set the directory location where logs will be saved",
54+
Type: "string",
55+
DefaultValue: "",
56+
},
57+
{
58+
Name: acn.OptVersion,
59+
Shorthand: acn.OptVersionAlias,
60+
Description: "Print version information",
61+
Type: "bool",
62+
DefaultValue: false,
63+
},
64+
{
65+
Name: acn.OptTelemetryConfigDir,
66+
Shorthand: acn.OptTelemetryConfigDirAlias,
67+
Description: "Set the telmetry config directory",
68+
Type: "string",
69+
DefaultValue: telemetry.CniInstallDir,
70+
},
71+
}
72+
73+
// Prints description and version information.
74+
func printVersion() {
75+
fmt.Printf("Azure Container Telemetry Service\n")
76+
fmt.Printf("Version %v\n", version)
77+
}
78+
1679
func main() {
1780
var tb *telemetry.TelemetryBuffer
81+
var config telemetry.TelemetryConfig
82+
var configPath string
1883
var err error
1984

85+
acn.ParseArgs(&args, printVersion)
86+
logTarget := acn.GetArg(acn.OptLogTarget).(int)
87+
logDirectory := acn.GetArg(acn.OptLogLocation).(string)
88+
logLevel := acn.GetArg(acn.OptLogLevel).(int)
89+
configDirectory := acn.GetArg(acn.OptTelemetryConfigDir).(string)
90+
vers := acn.GetArg(acn.OptVersion).(bool)
91+
92+
if vers {
93+
printVersion()
94+
os.Exit(0)
95+
}
96+
97+
log.SetName(azureVnetTelemetry)
98+
log.SetLevel(logLevel)
99+
if logDirectory != "" {
100+
log.SetLogDirectory(logDirectory)
101+
}
102+
103+
err = log.SetTarget(logTarget)
104+
if err != nil {
105+
fmt.Printf("Failed to configure logging: %v\n", err)
106+
return
107+
}
108+
109+
log.Printf("args %+v", os.Args)
110+
111+
if runtime.GOOS == "linux" {
112+
configPath = fmt.Sprintf("%s/%s%s", configDirectory, azureVnetTelemetry, configExtension)
113+
} else {
114+
configPath = fmt.Sprintf("%s\\%s%s", configDirectory, azureVnetTelemetry, configExtension)
115+
}
116+
117+
log.Printf("[Telemetry] Config path: %s", configPath)
118+
119+
config, err = telemetry.ReadConfigFile(configPath)
120+
if err != nil {
121+
log.Printf("[Telemetry] Error reading telemetry config: %v", err)
122+
}
123+
124+
log.Printf("read config returned %+v", config)
125+
20126
for {
21127
tb = telemetry.NewTelemetryBuffer("")
128+
129+
log.Printf("[Telemetry] Starting telemetry server")
22130
err = tb.StartServer()
23131
if err == nil || tb.FdExists {
24132
break
25133
}
26134

135+
log.Printf("[Telemetry] Telemetry service starting failed: %v", err)
27136
tb.Cleanup(telemetry.FdName)
28137
time.Sleep(time.Millisecond * 200)
29138
}
30139

31-
tb.BufferAndPushData(reportToHostIntervalInSeconds)
140+
if config.ReportToHostIntervalInSeconds == 0 {
141+
config.ReportToHostIntervalInSeconds = reportToHostIntervalInSeconds
142+
}
143+
144+
log.Printf("[Telemetry] Report to host for an interval of %d seconds", config.ReportToHostIntervalInSeconds)
145+
tb.BufferAndPushData(config.ReportToHostIntervalInSeconds * time.Second)
146+
log.Close()
32147
}

common/config.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,4 +67,8 @@ const (
6767
// CNI binary location
6868
OptCNIConfigFile = "cni-config-file"
6969
OptCNIConfigFileAlias = "cniconfig"
70+
71+
// Telemetry config Location
72+
OptTelemetryConfigDir = "telemetry-config-file"
73+
OptTelemetryConfigDirAlias = "d"
7074
)

common/utils.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ func GetInterfaceSubnetWithSpecificIp(ipAddr string) *net.IPNet {
104104
return nil
105105
}
106106

107-
func StartProcess(path string) error {
107+
func StartProcess(path string, args []string) error {
108108
var attr = os.ProcAttr{
109109
Env: os.Environ(),
110110
Files: []*os.File{
@@ -114,8 +114,8 @@ func StartProcess(path string) error {
114114
},
115115
}
116116

117-
args := []string{path}
118-
process, err := os.StartProcess(path, args, &attr)
117+
processArgs := append([]string{path}, args...)
118+
process, err := os.StartProcess(path, processArgs, &attr)
119119
if err == nil {
120120
// Release detaches the process
121121
return process.Release()
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"reportToHostIntervalInSeconds": 30
3+
}

telemetry/metadata_test.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"location":"eastus","name":"k8s-agentpool1-42685608-0","offer":"aks","osType":"Linux","placementGroupId":"","platformFaultDomain":"0","platformUpdateDomain":"0","publisher":"microsoft-aks","resourceGroupName":"rgcnideftesttamil","sku":"aks-ubuntu-1604-201902","subscriptionId":"ea821859-912a-4d20-a4dd-e18a3ce5ba2c","tags":"aksEngineVersion:canary;creationSource:aksengine-k8s-agentpool1-42685608-0;orchestrator:Kubernetes:1.10.13;poolName:agentpool1;resourceNameSuffix:42685608","version":"2019.02.12","vmId":"6baf785b-397c-4967-9f75-cdb3d0df66c4","vmSize":"Standard_DS2_v2","KernelVersion":""}

telemetry/telemetry.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,7 @@ func (report *CNIReport) GetInterfaceDetails(queryUrl string) {
325325
if resp.StatusCode != http.StatusOK {
326326
errMsg := fmt.Sprintf("Error while getting interface details. http code :%d", resp.StatusCode)
327327
report.InterfaceDetails.ErrorMessage = errMsg
328-
telemetryLogger.Printf(errMsg)
328+
log.Printf(errMsg)
329329
return
330330
}
331331

telemetry/telemetry_test.go

Lines changed: 51 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,16 @@ import (
1111
"net/http"
1212
"net/url"
1313
"os"
14+
"runtime"
1415
"testing"
1516
"time"
1617

1718
"github.com/Azure/azure-container-networking/common"
19+
"github.com/Azure/azure-container-networking/platform"
20+
)
21+
22+
const (
23+
telemetryConfig = "azure-vnet-telemetry.config"
1824
)
1925

2026
var reportManager *ReportManager
@@ -89,15 +95,17 @@ func TestMain(m *testing.M) {
8995
return
9096
}
9197

98+
if runtime.GOOS == "linux" {
99+
platform.ExecuteCommand("cp metadata_test.json /tmp/azuremetadata.json")
100+
} else {
101+
platform.ExecuteCommand("copy metadata_test.json azuremetadata.json")
102+
}
103+
92104
reportManager = &ReportManager{}
93105
reportManager.HostNetAgentURL = "http://" + hostAgentUrl
94106
reportManager.ContentType = "application/json"
95107
reportManager.Report = &CNIReport{}
96108

97-
if err := InitTelemetryLogger(); err == nil {
98-
defer CloseTelemetryLogger()
99-
}
100-
101109
tb = NewTelemetryBuffer(hostAgentUrl)
102110
err = tb.StartServer()
103111
if err == nil {
@@ -109,6 +117,13 @@ func TestMain(m *testing.M) {
109117
}
110118

111119
exitCode := m.Run()
120+
121+
if runtime.GOOS == "linux" {
122+
platform.ExecuteCommand("rm /tmp/azuremetadata.json")
123+
} else {
124+
platform.ExecuteCommand("del azuremetadata.json")
125+
}
126+
112127
tb.Cleanup(FdName)
113128
os.Exit(exitCode)
114129
}
@@ -248,6 +263,38 @@ func TestClientCloseTelemetryConnection(t *testing.T) {
248263
tb.Cancel()
249264
}
250265

266+
func TestReadConfigFile(t *testing.T) {
267+
config, err := ReadConfigFile(telemetryConfig)
268+
if err != nil {
269+
t.Errorf("Read telemetry config failed with error %v", err)
270+
}
271+
272+
if config.ReportToHostIntervalInSeconds != 30 {
273+
t.Errorf("ReportToHostIntervalInSeconds not expected value. Got %d", config.ReportToHostIntervalInSeconds)
274+
}
275+
276+
config, err = ReadConfigFile("a.config")
277+
if err == nil {
278+
t.Errorf("[Telemetry] Didn't throw not found error: %v", err)
279+
}
280+
281+
config, err = ReadConfigFile("telemetry.go")
282+
if err == nil {
283+
t.Errorf("[Telemetry] Didn't report invalid telemetry config: %v", err)
284+
}
285+
}
286+
287+
func TestStartTelemetryService(t *testing.T) {
288+
err := StartTelemetryService("", nil)
289+
if err == nil {
290+
t.Errorf("StartTelemetryService didnt return error for incorrect service name %v", err)
291+
}
292+
}
293+
294+
func TestWaitForTelemetrySocket(t *testing.T) {
295+
WaitForTelemetrySocket(1, 10)
296+
}
297+
251298
func TestSetReportState(t *testing.T) {
252299
err := reportManager.SetReportState("a.json")
253300
if err != nil {

0 commit comments

Comments
 (0)