Skip to content

Commit bfbf496

Browse files
committed
Add netpol events as predefined metrics
3 new metrics: - node_network_policy_events_total - namespace_network_policy_events_total (enabled by default) - workload_network_policy_events_total And their related charts
1 parent 3fa7d1e commit bfbf496

File tree

10 files changed

+103
-8
lines changed

10 files changed

+103
-8
lines changed

apis/flowcollector/v1beta1/flowcollector_webhook_test.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,14 @@ func TestBeta1ConversionRoundtrip_Metrics(t *testing.T) {
123123
err := initial.ConvertTo(&converted)
124124
assert.NoError(err)
125125

126-
expectedDefaultMetrics := []v1beta2.FLPMetric{"namespace_egress_packets_total", "namespace_flows_total", "namespace_rtt_seconds", "namespace_drop_packets_total", "namespace_dns_latency_seconds"}
126+
expectedDefaultMetrics := []v1beta2.FLPMetric{
127+
"namespace_egress_packets_total",
128+
"namespace_flows_total",
129+
"namespace_rtt_seconds",
130+
"namespace_drop_packets_total",
131+
"namespace_dns_latency_seconds",
132+
"namespace_network_policy_events_total",
133+
}
127134
assert.Equal([]v1beta2.FLPAlert{v1beta2.AlertLokiError}, converted.Spec.Processor.Metrics.DisableAlerts)
128135
assert.NotNil(converted.Spec.Processor.Metrics.IncludeList)
129136
assert.Equal(expectedDefaultMetrics, *converted.Spec.Processor.Metrics.IncludeList)

apis/flowcollector/v1beta2/flowcollector_types.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -530,7 +530,7 @@ const (
530530
)
531531

532532
// Metric name. More information in https://github.com/netobserv/network-observability-operator/blob/main/docs/Metrics.md.
533-
// +kubebuilder:validation:Enum:="namespace_egress_bytes_total";"namespace_egress_packets_total";"namespace_ingress_bytes_total";"namespace_ingress_packets_total";"namespace_flows_total";"node_egress_bytes_total";"node_egress_packets_total";"node_ingress_bytes_total";"node_ingress_packets_total";"node_flows_total";"workload_egress_bytes_total";"workload_egress_packets_total";"workload_ingress_bytes_total";"workload_ingress_packets_total";"workload_flows_total";"namespace_drop_bytes_total";"namespace_drop_packets_total";"node_drop_bytes_total";"node_drop_packets_total";"workload_drop_bytes_total";"workload_drop_packets_total";"namespace_rtt_seconds";"node_rtt_seconds";"workload_rtt_seconds";"namespace_dns_latency_seconds";"node_dns_latency_seconds";"workload_dns_latency_seconds"
533+
// +kubebuilder:validation:Enum:="namespace_egress_bytes_total";"namespace_egress_packets_total";"namespace_ingress_bytes_total";"namespace_ingress_packets_total";"namespace_flows_total";"node_egress_bytes_total";"node_egress_packets_total";"node_ingress_bytes_total";"node_ingress_packets_total";"node_flows_total";"workload_egress_bytes_total";"workload_egress_packets_total";"workload_ingress_bytes_total";"workload_ingress_packets_total";"workload_flows_total";"namespace_drop_bytes_total";"namespace_drop_packets_total";"node_drop_bytes_total";"node_drop_packets_total";"workload_drop_bytes_total";"workload_drop_packets_total";"namespace_rtt_seconds";"node_rtt_seconds";"workload_rtt_seconds";"namespace_dns_latency_seconds";"node_dns_latency_seconds";"workload_dns_latency_seconds";"node_network_policy_events_total";"namespace_network_policy_events_total";"workload_network_policy_events_total"
534534
type FLPMetric string
535535

536536
// `FLPMetrics` define the desired FLP configuration regarding metrics
@@ -546,7 +546,8 @@ type FLPMetrics struct {
546546
// Metrics enabled by default are:
547547
// `namespace_flows_total`, `node_ingress_bytes_total`, `node_egress_bytes_total`, `workload_ingress_bytes_total`,
548548
// `workload_egress_bytes_total`, `namespace_drop_packets_total` (when `PacketDrop` feature is enabled),
549-
// `namespace_rtt_seconds` (when `FlowRTT` feature is enabled), `namespace_dns_latency_seconds` (when `DNSTracking` feature is enabled).
549+
// `namespace_rtt_seconds` (when `FlowRTT` feature is enabled), `namespace_dns_latency_seconds` (when `DNSTracking` feature is enabled),
550+
// `namespace_network_policy_events_total` (when `NetworkEvents` feature is enabled).
550551
// More information, with full list of available metrics: https://github.com/netobserv/network-observability-operator/blob/main/docs/Metrics.md
551552
// +optional
552553
IncludeList *[]FLPMetric `json:"includeList,omitempty"`

bundle/manifests/flows.netobserv.io_flowcollectors.yaml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8205,7 +8205,8 @@ spec:
82058205
Metrics enabled by default are:
82068206
`namespace_flows_total`, `node_ingress_bytes_total`, `node_egress_bytes_total`, `workload_ingress_bytes_total`,
82078207
`workload_egress_bytes_total`, `namespace_drop_packets_total` (when `PacketDrop` feature is enabled),
8208-
`namespace_rtt_seconds` (when `FlowRTT` feature is enabled), `namespace_dns_latency_seconds` (when `DNSTracking` feature is enabled).
8208+
`namespace_rtt_seconds` (when `FlowRTT` feature is enabled), `namespace_dns_latency_seconds` (when `DNSTracking` feature is enabled),
8209+
`namespace_network_policy_events_total` (when `NetworkEvents` feature is enabled).
82098210
More information, with full list of available metrics: https://github.com/netobserv/network-observability-operator/blob/main/docs/Metrics.md
82108211
items:
82118212
description: Metric name. More information in https://github.com/netobserv/network-observability-operator/blob/main/docs/Metrics.md.
@@ -8237,6 +8238,9 @@ spec:
82378238
- namespace_dns_latency_seconds
82388239
- node_dns_latency_seconds
82398240
- workload_dns_latency_seconds
8241+
- node_network_policy_events_total
8242+
- namespace_network_policy_events_total
8243+
- workload_network_policy_events_total
82408244
type: string
82418245
type: array
82428246
server:

config/crd/bases/flows.netobserv.io_flowcollectors.yaml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7585,7 +7585,8 @@ spec:
75857585
Metrics enabled by default are:
75867586
`namespace_flows_total`, `node_ingress_bytes_total`, `node_egress_bytes_total`, `workload_ingress_bytes_total`,
75877587
`workload_egress_bytes_total`, `namespace_drop_packets_total` (when `PacketDrop` feature is enabled),
7588-
`namespace_rtt_seconds` (when `FlowRTT` feature is enabled), `namespace_dns_latency_seconds` (when `DNSTracking` feature is enabled).
7588+
`namespace_rtt_seconds` (when `FlowRTT` feature is enabled), `namespace_dns_latency_seconds` (when `DNSTracking` feature is enabled),
7589+
`namespace_network_policy_events_total` (when `NetworkEvents` feature is enabled).
75897590
More information, with full list of available metrics: https://github.com/netobserv/network-observability-operator/blob/main/docs/Metrics.md
75907591
items:
75917592
description: Metric name. More information in https://github.com/netobserv/network-observability-operator/blob/main/docs/Metrics.md.
@@ -7617,6 +7618,9 @@ spec:
76177618
- namespace_dns_latency_seconds
76187619
- node_dns_latency_seconds
76197620
- workload_dns_latency_seconds
7621+
- node_network_policy_events_total
7622+
- namespace_network_policy_events_total
7623+
- workload_network_policy_events_total
76207624
type: string
76217625
type: array
76227626
server:

docs/FlowCollector.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16877,7 +16877,8 @@ Note that the more metrics you add, the bigger is the impact on Prometheus workl
1687716877
Metrics enabled by default are:
1687816878
`namespace_flows_total`, `node_ingress_bytes_total`, `node_egress_bytes_total`, `workload_ingress_bytes_total`,
1687916879
`workload_egress_bytes_total`, `namespace_drop_packets_total` (when `PacketDrop` feature is enabled),
16880-
`namespace_rtt_seconds` (when `FlowRTT` feature is enabled), `namespace_dns_latency_seconds` (when `DNSTracking` feature is enabled).
16880+
`namespace_rtt_seconds` (when `FlowRTT` feature is enabled), `namespace_dns_latency_seconds` (when `DNSTracking` feature is enabled),
16881+
`namespace_network_policy_events_total` (when `NetworkEvents` feature is enabled).
1688116882
More information, with full list of available metrics: https://github.com/netobserv/network-observability-operator/blob/main/docs/Metrics.md<br/>
1688216883
</td>
1688316884
<td>false</td>

docs/Metrics.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,11 @@ When the `DNSTracking` feature is enabled in `spec.agent.ebpf.features`, additio
5151
- `node_dns_latency_seconds`
5252
- `workload_dns_latency_seconds` `**`
5353

54+
When the `NetworkEvents` feature is enabled in `spec.agent.ebpf.features`,
55+
- `namespace_network_policy_events_total` `*`
56+
- `node_network_policy_events_total`
57+
- `workload_network_policy_events_total`
58+
5459
## Custom metrics using the FlowMetrics API
5560

5661
The FlowMetrics API ([spec reference](./FlowMetric.md)) has been designed to give you full control on the metrics generation out of the NetObserv' enriched NetFlow data.

pkg/dashboards/dashboard_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@ func TestCreateFlowMetricsDashboard_All(t *testing.T) {
2121

2222
assert.Equal("NetObserv / Main", d.Title)
2323

24-
assert.Equal([]string{"Overview", "Traffic rates", "TCP latencies", "Byte and packet drops", "DNS"}, d.Titles())
24+
assert.Equal([]string{"Overview", "Traffic rates", "TCP latencies", "Byte and packet drops", "DNS", "Network Policy"}, d.Titles())
2525

26-
assert.Len(d.Rows[0].Panels, 16)
26+
assert.Len(d.Rows[0].Panels, 18)
2727
assert.Len(d.Rows[1].Panels, 20)
2828

2929
p := d.FindPanel("Top egress traffic per node")

pkg/metrics/predefined_charts.go

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,49 @@ func dnsCharts(group string) []metricslatest.Chart {
203203
}, group, "")...)
204204
}
205205

206+
func netpolCharts(group string) []metricslatest.Chart {
207+
sectionName := "Network Policy"
208+
charts := []metricslatest.Chart{
209+
{
210+
Type: metricslatest.ChartTypeSingleStat,
211+
SectionName: "",
212+
DashboardName: mainDashboard,
213+
Title: "Policy drop rate",
214+
Queries: []metricslatest.Query{{PromQL: `sum(rate($METRIC{action="drop"}[2m]))`}},
215+
},
216+
{
217+
Type: metricslatest.ChartTypeSingleStat,
218+
SectionName: "",
219+
DashboardName: mainDashboard,
220+
Title: "Policy allow rate",
221+
Queries: []metricslatest.Query{{PromQL: `sum(rate($METRIC{action=~"allow.*"}[2m]))`}},
222+
},
223+
}
224+
225+
charts = append(charts,
226+
chartVariantsFor(&metricslatest.Chart{
227+
Type: metricslatest.ChartTypeStackArea,
228+
SectionName: sectionName,
229+
DashboardName: mainDashboard,
230+
Title: "Drop rate",
231+
Queries: []metricslatest.Query{{
232+
PromQL: `sum(rate($METRIC{action="drop",$FILTERS}[2m])) by (type,direction,$LABELS)`,
233+
Legend: "$LEGEND, {{ type }}, {{ direction }}",
234+
}},
235+
}, group, "")...)
236+
return append(charts,
237+
chartVariantsFor(&metricslatest.Chart{
238+
Type: metricslatest.ChartTypeStackArea,
239+
SectionName: sectionName,
240+
DashboardName: mainDashboard,
241+
Title: "Allow rate",
242+
Queries: []metricslatest.Query{{
243+
PromQL: `sum(rate($METRIC{action=~"allow.*",$FILTERS}[2m])) by (type,direction,$LABELS)`,
244+
Legend: "$LEGEND, {{ type }}, {{ direction }}",
245+
}},
246+
}, group, "")...)
247+
}
248+
206249
func chartVariantsFor(chart *metricslatest.Chart, group, unit string) []metricslatest.Chart {
207250
switch group {
208251
case tagNodes:

pkg/metrics/predefined_metrics.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ var (
4343
"namespace_drop_packets_total",
4444
"namespace_rtt_seconds",
4545
"namespace_dns_latency_seconds",
46+
"namespace_network_policy_events_total",
4647
}
4748
// More metrics enabled when Loki is disabled, to avoid loss of information
4849
DefaultIncludeListLokiDisabled = []string{
@@ -57,6 +58,7 @@ var (
5758
"workload_drop_packets_total",
5859
"workload_rtt_seconds",
5960
"workload_dns_latency_seconds",
61+
"namespace_network_policy_events_total",
6062
}
6163
// Pre-deprecation default IgnoreTags list (1.4) - used before switching to whitelist approach,
6264
// to make sure there is no unintended new metrics being collected
@@ -163,6 +165,28 @@ func init() {
163165
},
164166
tags: []string{group, "dns"},
165167
})
168+
169+
// Netpol metrics
170+
netpolLabels := labels
171+
netpolLabels = append(netpolLabels, "NetworkEvents>Type", "NetworkEvents>Namespace", "NetworkEvents>Name", "NetworkEvents>Action", "NetworkEvents>Direction")
172+
predefinedMetrics = append(predefinedMetrics, taggedMetricDefinition{
173+
FlowMetricSpec: metricslatest.FlowMetricSpec{
174+
MetricName: fmt.Sprintf("%s_network_policy_events_total", groupTrimmed),
175+
Type: "counter",
176+
Labels: netpolLabels,
177+
Filters: []metricslatest.MetricFilter{{Field: "NetworkEvents>Feature", Value: "acl"}},
178+
Flatten: []string{"NetworkEvents"},
179+
Remap: map[string]string{
180+
"NetworkEvents>Type": "type",
181+
"NetworkEvents>Namespace": "namespace",
182+
"NetworkEvents>Name": "name",
183+
"NetworkEvents>Action": "action",
184+
"NetworkEvents>Direction": "direction",
185+
},
186+
Charts: netpolCharts(group),
187+
},
188+
tags: []string{group, "network-policy"},
189+
})
166190
}
167191
}
168192

@@ -253,6 +277,9 @@ func GetIncludeList(spec *flowslatest.FlowCollectorSpec) []string {
253277
if !helper.IsDNSTrackingEnabled(&spec.Agent.EBPF) {
254278
list = removeMetricsByPattern(list, "_dns_")
255279
}
280+
if !helper.IsNetworkEventsEnabled(&spec.Agent.EBPF) {
281+
list = removeMetricsByPattern(list, "_network_policy_")
282+
}
256283
return list
257284
}
258285

pkg/metrics/predefined_metrics_test.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,17 @@ func TestIncludeExclude(t *testing.T) {
1919
"node_rtt_seconds",
2020
"node_drop_bytes_total",
2121
"node_dns_latency_seconds",
22+
"node_network_policy_events_total",
2223
"namespace_ingress_bytes_total",
2324
"namespace_rtt_seconds",
2425
"namespace_drop_bytes_total",
2526
"namespace_dns_latency_seconds",
27+
"namespace_network_policy_events_total",
2628
"workload_ingress_bytes_total",
2729
"workload_rtt_seconds",
2830
"workload_drop_bytes_total",
2931
"workload_dns_latency_seconds",
32+
"workload_network_policy_events_total",
3033
}, *res)
3134

3235
// IgnoreTags set, Include list set => keep include list

0 commit comments

Comments
 (0)