Skip to content

Commit 996c3fe

Browse files
jshr-wmatmerr
andauthored
ci: move hubble connectivity tests to nightly pipeline (#2310)
* [test] add hubble system test to CI (#2265) * cilium configmap * update hubble configs and add metrics test * update pipeline yaml * separate cilium+hubble config * ci: move hubble connectivity test to nightly pipeline * fix: move to correct file * style: indentation change * ci: update configmap to enable Hubble * fix: move hubble test * fix: move connectivity test before delete * fix: add daemonset namespace * fix: update command for configmap replace * test no restart after replace * fix: try apply instead of replace * fix: add back restart ds * add longer timeout after ds restart * adjust timeout setup * extend timeout, add logging * add logging, change cm command * update hubble configmap * clean up sleep statements * remove hubble connectivity test from PR pipeline * use kubernetes utils * fix style Signed-off-by: jshr-w <[email protected]> * update ds restart wait * enable Hubble on nightly, disable on PR * tag networkobservability test * fix test call * enable Hubble after Cilium is ready * change location of hubble enable --------- Signed-off-by: jshr-w <[email protected]> Co-authored-by: Mathew Merrick <[email protected]>
1 parent e6bc6da commit 996c3fe

File tree

7 files changed

+272
-2
lines changed

7 files changed

+272
-2
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,6 @@ go.work*
3535

3636
# scale-test
3737
test/scale/generated/*
38+
39+
# test env file
40+
*.env

.pipelines/cni/cilium/nightly-release-test.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ stages:
8888
name: "cilium_nightly"
8989
testDropgz: ""
9090
clusterName: ciliumnightly-$(commitID)
91+
testHubble: true
9192
- job: logs
9293
displayName: "Failure Logs"
9394
dependsOn:

.pipelines/singletenancy/cilium-overlay/cilium-overlay-e2e-step-template.yaml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ parameters:
22
name: ""
33
testDropgz: ""
44
clusterName: ""
5+
testHubble: false
56

67
steps:
78
- bash: |
@@ -153,6 +154,21 @@ steps:
153154
name: "ciliumConnectivityTests"
154155
displayName: "Run Cilium Connectivity Tests"
155156
157+
- ${{ if eq( parameters['testHubble'], true) }}:
158+
- script: |
159+
echo "enable Hubble metrics server"
160+
kubectl apply -f test/integration/manifests/cilium/hubble/hubble-peer-svc.yaml
161+
kubectl apply -f test/integration/manifests/cilium/cilium-config-hubble.yaml
162+
kubectl rollout restart ds cilium -n kube-system
163+
echo "wait <3 minutes for pods to be ready after restart"
164+
kubectl rollout status ds cilium -n kube-system --timeout=3m
165+
kubectl get pods -Aowide
166+
echo "verify Hubble metrics endpoint is usable"
167+
go test ./test/integration/networkobservability -count=1 -v -tags=networkobservability
168+
retryCountOnTaskFailure: 3
169+
name: "HubbleConnectivityTests"
170+
displayName: "Run Hubble Connectivity Tests"
171+
156172
- script: |
157173
echo "validate pod IP assignment and check systemd-networkd restart"
158174
kubectl get pod -owide -A

hack/toolbox/server/Dockerfile.heavy

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ ADD ./ /
33
WORKDIR /
44
RUN CGO_ENABLED=0 GOOS=linux go build -o server .
55

6-
FROM mcr.microsoft.com/oss/mirror/docker.io/library/ubuntu:20.04
6+
FROM mcr.microsoft.com/mirror/docker/library/ubuntu:22.04
77
RUN apt-get update
88
RUN apt-get install -y \
99
axel \
@@ -21,12 +21,12 @@ RUN apt-get install -y \
2121
net-tools \
2222
netcat \
2323
nmap \
24-
python \
2524
python3 \
2625
ssh \
2726
sudo \
2827
tcpdump \
2928
traceroute \
29+
unzip \
3030
vim \
3131
wget
3232

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
apiVersion: v1
2+
data:
3+
agent-not-ready-taint-key: node.cilium.io/agent-not-ready
4+
arping-refresh-period: 30s
5+
auto-direct-node-routes: "false"
6+
bpf-lb-external-clusterip: "false"
7+
bpf-lb-map-max: "65536"
8+
bpf-lb-mode: snat
9+
bpf-map-dynamic-size-ratio: "0.0025"
10+
bpf-policy-map-max: "16384"
11+
bpf-root: /sys/fs/bpf
12+
cgroup-root: /run/cilium/cgroupv2
13+
cilium-endpoint-gc-interval: 5m0s
14+
cluster-id: "0"
15+
cluster-name: default
16+
debug: "false"
17+
disable-cnp-status-updates: "true"
18+
disable-endpoint-crd: "false"
19+
enable-auto-protect-node-port-range: "true"
20+
enable-bgp-control-plane: "false"
21+
enable-bpf-clock-probe: "true"
22+
enable-endpoint-health-checking: "false"
23+
enable-endpoint-routes: "true"
24+
enable-health-check-nodeport: "true"
25+
enable-health-checking: "true"
26+
enable-host-legacy-routing: "true"
27+
enable-hubble: "true"
28+
enable-ipv4: "true"
29+
enable-ipv4-masquerade: "false"
30+
enable-ipv6: "false"
31+
enable-ipv6-masquerade: "false"
32+
enable-k8s-terminating-endpoint: "true"
33+
enable-l2-neigh-discovery: "true"
34+
enable-l7-proxy: "false"
35+
enable-local-node-route: "false"
36+
enable-local-redirect-policy: "false"
37+
enable-metrics: "true"
38+
enable-policy: default
39+
enable-remote-node-identity: "true"
40+
enable-session-affinity: "true"
41+
enable-svc-source-range-check: "true"
42+
enable-vtep: "false"
43+
enable-well-known-identities: "false"
44+
enable-xt-socket-fallback: "true"
45+
hubble-metrics: flow:sourceContext=workload-name;destinationContext=workload-name
46+
tcp:sourceContext=workload-name;destinationContext=workload-name
47+
dns:flow:sourceContext=workload-name;destinationContext=workload-name
48+
hubble-metrics-server: :9965
49+
hubble-disable-tls: "false"
50+
hubble-listen-address: ""
51+
hubble-socket-path: /dev/null
52+
hubble-tls-cert-file: /var/lib/cilium/tls/hubble/server.crt
53+
hubble-tls-client-ca-files: /var/lib/cilium/tls/hubble/client-ca.crt
54+
hubble-tls-key-file: /var/lib/cilium/tls/hubble/server.key
55+
identity-allocation-mode: crd
56+
install-iptables-rules: "true"
57+
install-no-conntrack-iptables-rules: "false"
58+
ipam: delegated-plugin
59+
kube-proxy-replacement: strict
60+
kube-proxy-replacement-healthz-bind-address: "0.0.0.0:10256"
61+
local-router-ipv4: 169.254.23.0
62+
metrics: +cilium_bpf_map_pressure
63+
monitor-aggregation: medium
64+
monitor-aggregation-flags: all
65+
monitor-aggregation-interval: 5s
66+
node-port-bind-protection: "true"
67+
nodes-gc-interval: 5m0s
68+
operator-api-serve-addr: 127.0.0.1:9234
69+
operator-prometheus-serve-addr: :9963
70+
preallocate-bpf-maps: "false"
71+
procfs: /host/proc
72+
prometheus-serve-addr: :9962
73+
remove-cilium-node-taints: "true"
74+
set-cilium-is-up-condition: "true"
75+
sidecar-istio-proxy-image: cilium/istio_proxy
76+
synchronize-k8s-nodes: "true"
77+
tofqdns-dns-reject-response-code: refused
78+
tofqdns-enable-dns-compression: "true"
79+
tofqdns-endpoint-max-ip-per-hostname: "50"
80+
tofqdns-idle-connection-grace-period: 0s
81+
tofqdns-max-deferred-connection-deletes: "10000"
82+
tofqdns-min-ttl: "3600"
83+
tofqdns-proxy-response-max-delay: 100ms
84+
tunnel: disabled
85+
unmanaged-pod-watcher-interval: "15"
86+
vtep-cidr: ""
87+
vtep-endpoint: ""
88+
vtep-mac: ""
89+
vtep-mask: ""
90+
kind: ConfigMap
91+
metadata:
92+
annotations:
93+
meta.helm.sh/release-name: cilium
94+
meta.helm.sh/release-namespace: kube-system
95+
labels:
96+
app.kubernetes.io/managed-by: Helm
97+
name: cilium-config
98+
namespace: kube-system
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
apiVersion: v1
2+
kind: Service
3+
metadata:
4+
labels:
5+
k8s-app: cilium
6+
name: hubble-peer
7+
namespace: kube-system
8+
spec:
9+
internalTrafficPolicy: Cluster
10+
ports:
11+
- name: peer-service
12+
port: 443
13+
protocol: TCP
14+
targetPort: 4244
15+
selector:
16+
k8s-app: cilium
17+
sessionAffinity: None
18+
type: ClusterIP
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
//go:build networkobservability
2+
3+
package networkobservability
4+
5+
import (
6+
"context"
7+
"fmt"
8+
"io"
9+
"net/http"
10+
"strings"
11+
"testing"
12+
"time"
13+
14+
k8s "github.com/Azure/azure-container-networking/test/integration"
15+
"github.com/Azure/azure-container-networking/test/internal/kubernetes"
16+
"github.com/Azure/azure-container-networking/test/internal/retry"
17+
)
18+
19+
const (
20+
retryAttempts = 10
21+
retryDelay = 5 * time.Second
22+
promAddress = "http://localhost:9965/metrics"
23+
labelSelector = "k8s-app=cilium"
24+
namespace = "kube-system"
25+
)
26+
27+
var (
28+
defaultRetrier = retry.Retrier{Attempts: retryAttempts, Delay: retryDelay}
29+
requiredMetrics = []string{
30+
"hubble_flows_processed_total",
31+
"hubble_tcp_flags_total",
32+
}
33+
)
34+
35+
func TestEndpoints(t *testing.T) {
36+
config := kubernetes.MustGetRestConfig()
37+
ctx := context.Background()
38+
clusterCtx, cancel := context.WithTimeout(ctx, 5*time.Minute)
39+
defer cancel()
40+
pingCheckFn := func() error {
41+
var pf *k8s.PortForwarder
42+
pf, err := k8s.NewPortForwarder(config, t, k8s.PortForwardingOpts{
43+
Namespace: namespace,
44+
LabelSelector: labelSelector,
45+
LocalPort: 9965,
46+
DestPort: 9965,
47+
})
48+
if err != nil {
49+
t.Error(err)
50+
}
51+
pctx := context.Background()
52+
53+
portForwardCtx, cancel := context.WithTimeout(pctx, (retryAttempts+1)*retryDelay)
54+
defer cancel()
55+
56+
portForwardFn := func() error {
57+
t.Logf("attempting port forward to a pod with label %s, in namespace %s...", labelSelector, namespace)
58+
if err = pf.Forward(portForwardCtx); err != nil {
59+
return fmt.Errorf("could not start port forward: %w", err)
60+
}
61+
return nil
62+
}
63+
64+
if err = defaultRetrier.Do(portForwardCtx, portForwardFn); err != nil {
65+
t.Fatalf("could not start port forward within %d: %v", (retryAttempts+1)*retryDelay, err)
66+
}
67+
defer pf.Stop()
68+
69+
// scrape the hubble metrics
70+
metrics, err := getPrometheusMetrics(promAddress)
71+
if err != nil {
72+
return fmt.Errorf("scraping %s, failed with error: %w", promAddress, err)
73+
}
74+
75+
// verify that the response contains the required metrics
76+
for _, reqMetric := range requiredMetrics {
77+
if val, exists := metrics[reqMetric]; !exists {
78+
return fmt.Errorf("scraping %s, did not find metric %s", val, promAddress) //nolint:goerr113,gocritic
79+
}
80+
}
81+
t.Logf("all metrics validated: %+v", requiredMetrics)
82+
return nil
83+
}
84+
85+
if err := defaultRetrier.Do(clusterCtx, pingCheckFn); err != nil {
86+
t.Fatalf("metrics check failed with error: %v", err)
87+
}
88+
}
89+
90+
func getPrometheusMetrics(url string) (map[string]struct{}, error) {
91+
client := http.Client{}
92+
resp, err := client.Get(url) //nolint
93+
if err != nil {
94+
return nil, fmt.Errorf("HTTP request failed: %w", err)
95+
}
96+
defer resp.Body.Close()
97+
98+
if resp.StatusCode != http.StatusOK {
99+
return nil, fmt.Errorf("HTTP request failed with status: %v", resp.Status) //nolint:goerr113,gocritic
100+
}
101+
102+
metricsData, err := io.ReadAll(resp.Body)
103+
if err != nil {
104+
return nil, fmt.Errorf("reading HTTP response body failed: %w", err)
105+
}
106+
107+
metrics := parseMetrics(string(metricsData))
108+
return metrics, nil
109+
}
110+
111+
func parseMetrics(metricsData string) map[string]struct{} {
112+
// Create a map to store the strings before the first '{'.
113+
metrics := make(map[string]struct{})
114+
115+
// sample metrics
116+
// hubble_tcp_flags_total{destination="",family="IPv4",flag="RST",source="kube-system/metrics-server"} 980
117+
// hubble_tcp_flags_total{destination="",family="IPv4",flag="SYN",source="kube-system/ama-metrics"} 1777
118+
// we only want the metric name for the time being
119+
// label order/parseing can happen later
120+
lines := strings.Split(metricsData, "\n")
121+
// Iterate through each line.
122+
for _, line := range lines {
123+
// Find the index of the first '{' character.
124+
index := strings.Index(line, "{")
125+
if index >= 0 {
126+
// Extract the string before the first '{'.
127+
str := strings.TrimSpace(line[:index])
128+
// Store the string in the map.
129+
metrics[str] = struct{}{}
130+
}
131+
}
132+
133+
return metrics
134+
}

0 commit comments

Comments
 (0)