Skip to content

Commit 24f410b

Browse files
Aritra Basusknat
authored andcommitted
added TCP and session stats
Signed-off-by: Aritra Basu <aritrbas@cisco.com>
1 parent c583010 commit 24f410b

File tree

2 files changed

+1666
-66
lines changed

2 files changed

+1666
-66
lines changed

calico-vpp-agent/prometheus/prometheus.go

Lines changed: 172 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -83,87 +83,43 @@ func NewPrometheusServer(vpp *vpplink.VppLink, log *logrus.Entry) *PrometheusSer
8383
return server
8484
}
8585

86-
func cleanVppStatName(vppStatName string) string {
86+
func cleanVppIfStatName(vppStatName string) string {
8787
vppStatName = strings.TrimPrefix(vppStatName, "/if/")
8888
vppStatName = strings.Replace(vppStatName, "-", "_", -1)
8989
return vppStatName
9090
}
9191

92-
func getVppStatDescription(vppStatName string) string {
93-
switch cleanVppStatName(vppStatName) {
94-
case "drops":
95-
return "number of drops on interface"
96-
case "ip4":
97-
return "IPv4 received packets"
98-
case "ip6":
99-
return "IPv6 received packets"
100-
case "punt":
101-
return "number of punts on interface"
102-
case "rx_bytes":
103-
return "total number of bytes received over the interface"
104-
case "tx_bytes":
105-
return "total number of bytes transmitted by the interface"
106-
case "rx_packets":
107-
return "total number of packets received over the interface"
108-
case "tx_packets":
109-
return "total number of packets transmitted by the interface"
110-
case "tx_broadcast_packets":
111-
return "number of multipoint communications transmitted by the interface in packets"
112-
case "rx_broadcast_packets":
113-
return "number of multipoint communications received by the interface in packets"
114-
case "tx_broadcast_bytes":
115-
return "number of multipoint communications transmitted by the interface in bytes"
116-
case "rx_broadcast_bytes":
117-
return "number of multipoint communications received by the interface in bytes"
118-
case "tx_unicast_packets":
119-
return "number of point-to-point communications transmitted by the interface in packets"
120-
case "rx_unicast_packets":
121-
return "number of point-to-point communications received by the interface in packets"
122-
case "tx_unicast_bytes":
123-
return "number of point-to-point communications transmitted by the interface in bytes"
124-
case "rx_unicast_bytes":
125-
return "number of point-to-point communications received by the interface in bytes"
126-
case "tx_multicast_packets":
127-
return "number of one-to-many communications transmitted by the interface in packets"
128-
case "rx_multicast_packets":
129-
return "number of one-to-many communications received by the interface in packets"
130-
case "tx_multicast_bytes":
131-
return "number of one-to-many communications transmitted by the interface in bytes"
132-
case "rx_multicast_bytes":
133-
return "number of one-to-many communications received by the interface in bytes"
134-
case "rx_error":
135-
return "total number of erroneous received packets"
136-
case "tx_error":
137-
return "total number of erroneous transmitted packets"
138-
case "rx_miss":
139-
return "total of rx packets dropped because there are no available buffer"
140-
case "tx_miss":
141-
return "total of tx packets dropped because there are no available buffer"
142-
case "rx_no_buf":
143-
return "total number of rx mbuf allocation failures"
144-
case "tx_no_buf":
145-
return "total number of tx mbuf allocation failures"
146-
default:
147-
return vppStatName
148-
}
92+
func cleanVppTCPStatName(vppStatName string, prefix string) string {
93+
vppStatName = strings.TrimPrefix(vppStatName, prefix)
94+
vppStatName = strings.Replace(vppStatName, "-", "_", -1)
95+
vppStatName = strings.Replace(vppStatName, "/", "_", -1)
96+
return vppStatName
97+
}
98+
99+
func cleanVppSessionStatName(vppStatName string) string {
100+
vppStatName = strings.TrimPrefix(vppStatName, "/sys/session/")
101+
vppStatName = strings.Replace(vppStatName, "/", "_", -1)
102+
return vppStatName
149103
}
150104

151105
func (self *PrometheusServer) exportMetrics() error {
152-
vppStats, err := self.statsclient.DumpStats("/if/")
106+
ifStats, err := self.statsclient.DumpStats("/if/")
153107
if err != nil {
154-
self.log.Errorf("Error running statsclient.DumpStats %v", err)
108+
self.log.Errorf("Error running statsclient.DumpStats for Interface stats %v", err)
155109
return nil
156110
}
157111
var ifNames adapter.NameStat
158-
for _, vppStat := range vppStats {
112+
for _, vppStat := range ifStats {
159113
switch values := vppStat.Data.(type) {
160114
case adapter.NameStat:
161115
ifNames = values
162116
}
163117
}
164118

165119
self.lock.Lock()
166-
for _, vppStat := range vppStats {
120+
121+
// Export Interface stats
122+
for _, vppStat := range ifStats {
167123
switch values := vppStat.Data.(type) {
168124
case adapter.SimpleCounterStat:
169125
for worker, perWorkerValues := range values {
@@ -180,7 +136,80 @@ func (self *PrometheusServer) exportMetrics() error {
180136
}
181137
}
182138
}
139+
140+
// Export TCP stats
141+
tcpStats, err := self.statsclient.DumpStats("/sys/tcp")
142+
if err != nil {
143+
self.log.Errorf("Error running statsclient.DumpStats for TCP stats %v", err)
144+
return nil
145+
}
146+
for _, vppStat := range tcpStats {
147+
switch values := vppStat.Data.(type) {
148+
case adapter.SimpleCounterStat:
149+
for worker, perWorkerValues := range values {
150+
for _, counter := range perWorkerValues {
151+
self.exportTCPMetric(cleanVppTCPStatName(string(vppStat.Name), "/sys/"), worker, uint64(counter))
152+
}
153+
}
154+
}
155+
}
156+
157+
// Export TCP4 error stats
158+
tcp4ErrStats, err := self.statsclient.DumpStats("/err/tcp4")
159+
if err != nil {
160+
self.log.Errorf("Error running statsclient.DumpStats for TCP4 error stats %v", err)
161+
return nil
162+
}
163+
for _, vppStat := range tcp4ErrStats {
164+
switch values := vppStat.Data.(type) {
165+
case adapter.SimpleCounterStat:
166+
for worker, perWorkerValues := range values {
167+
for _, counter := range perWorkerValues {
168+
self.exportTCPMetric(cleanVppTCPStatName(string(vppStat.Name), "/err/"), worker, uint64(counter))
169+
}
170+
}
171+
}
172+
}
173+
174+
// Export TCP6 error stats
175+
tcp6ErrStats, err := self.statsclient.DumpStats("/err/tcp6")
176+
if err != nil {
177+
self.log.Errorf("Error running statsclient.DumpStats for TCP6 error stats %v", err)
178+
return nil
179+
}
180+
for _, vppStat := range tcp6ErrStats {
181+
switch values := vppStat.Data.(type) {
182+
case adapter.SimpleCounterStat:
183+
for worker, perWorkerValues := range values {
184+
for _, counter := range perWorkerValues {
185+
self.exportTCPMetric(cleanVppTCPStatName(string(vppStat.Name), "/err/"), worker, uint64(counter))
186+
}
187+
}
188+
}
189+
}
190+
191+
// Export Session stats
192+
sessionStats, err := self.statsclient.DumpStats("/sys/session")
193+
if err != nil {
194+
self.log.Errorf("Error running statsclient.DumpStats for Session stats %v", err)
195+
return nil
196+
}
197+
for _, vppStat := range sessionStats {
198+
switch values := vppStat.Data.(type) {
199+
case adapter.SimpleCounterStat:
200+
for worker, perWorkerValues := range values {
201+
for _, counter := range perWorkerValues {
202+
self.exportSessionMetric(string(vppStat.Name), worker, uint64(counter))
203+
}
204+
}
205+
case adapter.ScalarStat:
206+
// ScalarStat is a single value, not per-worker
207+
self.exportSessionMetric(string(vppStat.Name), 0, uint64(values))
208+
}
209+
}
210+
183211
self.lock.Unlock()
212+
184213
return nil
185214
}
186215

@@ -196,9 +225,10 @@ func (self *PrometheusServer) exportInterfaceMetric(name string, worker int, swI
196225
nil, /* resource */
197226
&metricspb.Metric{
198227
MetricDescriptor: &metricspb.MetricDescriptor{
199-
Name: cleanVppStatName(name),
228+
Name: cleanVppIfStatName(name),
200229
Unit: unit,
201-
Description: getVppStatDescription(name),
230+
Description: getVppIfStatDescription(name),
231+
Type: metricspb.MetricDescriptor_CUMULATIVE_DOUBLE,
202232
// empty timeseries prevents exporter from updating
203233
LabelKeys: []*metricspb.LabelKey{
204234
{Key: "worker", Description: "VPP worker index"},
@@ -231,6 +261,74 @@ func (self *PrometheusServer) exportInterfaceMetric(name string, worker int, swI
231261
}
232262
}
233263

264+
func (self *PrometheusServer) exportTCPMetric(name string, worker int, value uint64) {
265+
err := self.exporter.ExportMetric(
266+
context.Background(),
267+
nil, /* node */
268+
nil, /* resource */
269+
&metricspb.Metric{
270+
MetricDescriptor: &metricspb.MetricDescriptor{
271+
Name: name,
272+
Unit: "",
273+
Description: getVppTCPStatDescription(name),
274+
Type: metricspb.MetricDescriptor_CUMULATIVE_INT64,
275+
LabelKeys: []*metricspb.LabelKey{
276+
{Key: "worker", Description: "VPP worker index"},
277+
},
278+
},
279+
Timeseries: []*metricspb.TimeSeries{{
280+
LabelValues: []*metricspb.LabelValue{
281+
{Value: strconv.Itoa(worker)},
282+
},
283+
Points: []*metricspb.Point{
284+
{
285+
Value: &metricspb.Point_Int64Value{
286+
Int64Value: int64(value),
287+
},
288+
},
289+
},
290+
}},
291+
},
292+
)
293+
if err != nil {
294+
self.log.Errorf("Error prometheus exporter.ExportMetric for TCP %v", err)
295+
}
296+
}
297+
298+
func (self *PrometheusServer) exportSessionMetric(name string, worker int, value uint64) {
299+
err := self.exporter.ExportMetric(
300+
context.Background(),
301+
nil, /* node */
302+
nil, /* resource */
303+
&metricspb.Metric{
304+
MetricDescriptor: &metricspb.MetricDescriptor{
305+
Name: cleanVppSessionStatName(name),
306+
Unit: "",
307+
Description: getVppSessionStatDescription(name),
308+
Type: metricspb.MetricDescriptor_CUMULATIVE_INT64,
309+
LabelKeys: []*metricspb.LabelKey{
310+
{Key: "worker", Description: "VPP worker index"},
311+
},
312+
},
313+
Timeseries: []*metricspb.TimeSeries{{
314+
LabelValues: []*metricspb.LabelValue{
315+
{Value: strconv.Itoa(worker)},
316+
},
317+
Points: []*metricspb.Point{
318+
{
319+
Value: &metricspb.Point_Int64Value{
320+
Int64Value: int64(value),
321+
},
322+
},
323+
},
324+
}},
325+
},
326+
)
327+
if err != nil {
328+
self.log.Errorf("Error prometheus exporter.ExportMetric for Session %v", err)
329+
}
330+
}
331+
234332
func (self *PrometheusServer) ServePrometheus(t *tomb.Tomb) error {
235333
if !(*config.GetCalicoVppFeatureGates().PrometheusEnabled) {
236334
return nil
@@ -294,10 +392,18 @@ func (self *PrometheusServer) ServePrometheus(t *tomb.Tomb) error {
294392
return errors.Wrap(err, "could not connect statsclient")
295393
}
296394

297-
go self.httpServer.ListenAndServe()
395+
go (func() {
396+
err := self.httpServer.ListenAndServe()
397+
if err != nil {
398+
panic(err)
399+
}
400+
})()
298401
ticker := time.NewTicker(*config.GetCalicoVppInitialConfig().PrometheusRecordMetricInterval)
299402
for ; t.Alive(); <-ticker.C {
300-
self.exportMetrics()
403+
err := self.exportMetrics()
404+
if err != nil {
405+
self.log.WithError(err).Errorf("exportMetrics errored")
406+
}
301407
}
302408
ticker.Stop()
303409
self.log.Warn("Prometheus Server returned")

0 commit comments

Comments
 (0)