Skip to content

Commit 94c6c9b

Browse files
karina-ranadiveKarina Ranadive
andauthored
test: expand LRP test to include lifecycle events (#4086)
* test: expand LRP test to include lifecycle events * fix * adjustments * fix * removed redundant basic lrp test * changed retry to 1 minute --------- Co-authored-by: Karina Ranadive <[email protected]>
1 parent faa9c44 commit 94c6c9b

File tree

4 files changed

+321
-13
lines changed

4 files changed

+321
-13
lines changed

test/integration/lrp/lrp_fqdn_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ func TestLRPFQDN(t *testing.T) {
102102
for _, tt := range tests {
103103
tt := tt
104104
t.Run(tt.name, func(t *testing.T) {
105-
testLRPCase(t, ctx, *selectedPod, tt.command, tt.expectedMsgContains, tt.expectedErrMsgContains, tt.shouldError, tt.countIncreases)
105+
testLRPCase(t, ctx, *selectedPod, tt.command, tt.expectedMsgContains, tt.expectedErrMsgContains, tt.shouldError, tt.countIncreases, getPrometheusAddress(initialPrometheusPort))
106106
})
107107
}
108108
}

test/integration/lrp/lrp_test.go

Lines changed: 301 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ package lrp
44

55
import (
66
"context"
7+
"fmt"
78
"os"
89
"strings"
910
"testing"
@@ -13,11 +14,16 @@ import (
1314
"github.com/Azure/azure-container-networking/test/integration/prometheus"
1415
"github.com/Azure/azure-container-networking/test/internal/kubernetes"
1516
"github.com/Azure/azure-container-networking/test/internal/retry"
17+
ciliumv2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2"
1618
ciliumClientset "github.com/cilium/cilium/pkg/k8s/client/clientset/versioned"
1719
"github.com/pkg/errors"
1820
"github.com/stretchr/testify/require"
1921
"golang.org/x/exp/rand"
2022
corev1 "k8s.io/api/core/v1"
23+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
24+
k8sclient "k8s.io/client-go/kubernetes"
25+
"k8s.io/client-go/rest"
26+
"sigs.k8s.io/yaml"
2127
)
2228

2329
const (
@@ -28,11 +34,13 @@ const (
2834
dnsService = "kube-dns"
2935
retryAttempts = 10
3036
retryDelay = 5 * time.Second
31-
promAddress = "http://localhost:9253/metrics"
3237
nodeLocalDNSLabelSelector = "k8s-app=node-local-dns"
3338
clientLabelSelector = "lrp-test=true"
3439
coreDNSRequestCountTotal = "coredns_dns_request_count_total"
3540
clientContainer = "no-op"
41+
// Port constants for prometheus endpoints
42+
initialPrometheusPort = 9253
43+
recreatedPrometheusPort = 9254
3644
)
3745

3846
var (
@@ -47,6 +55,11 @@ var (
4755
clientPath = ciliumManifestsDir + "client-ds.yaml"
4856
)
4957

58+
// getPrometheusAddress returns the prometheus metrics URL for the given port
59+
func getPrometheusAddress(port int) string {
60+
return fmt.Sprintf("http://localhost:%d/metrics", port)
61+
}
62+
5063
func setupLRP(t *testing.T, ctx context.Context) (*corev1.Pod, func()) {
5164
var cleanUpFns []func()
5265
success := false
@@ -132,8 +145,8 @@ func setupLRP(t *testing.T, ctx context.Context) (*corev1.Pod, func()) {
132145
pf, err := k8s.NewPortForwarder(config, k8s.PortForwardingOpts{
133146
Namespace: nodeLocalDNSDS.Namespace,
134147
PodName: selectedLocalDNSPod,
135-
LocalPort: 9253,
136-
DestPort: 9253,
148+
LocalPort: initialPrometheusPort,
149+
DestPort: initialPrometheusPort,
137150
})
138151
require.NoError(t, err)
139152
pctx := context.Background()
@@ -154,7 +167,7 @@ func setupLRP(t *testing.T, ctx context.Context) (*corev1.Pod, func()) {
154167
}
155168

156169
func testLRPCase(t *testing.T, ctx context.Context, clientPod corev1.Pod, clientCmd []string, expectResponse, expectErrMsg string,
157-
shouldError, countShouldIncrease bool) {
170+
shouldError, countShouldIncrease bool, prometheusAddress string) {
158171

159172
config := kubernetes.MustGetRestConfig()
160173
cs := kubernetes.MustGetClientset()
@@ -167,9 +180,11 @@ func testLRPCase(t *testing.T, ctx context.Context, clientPod corev1.Pod, client
167180
"zone": ".",
168181
}
169182

170-
// curl localhost:9253/metrics
171-
beforeMetric, err := prometheus.GetMetric(promAddress, coreDNSRequestCountTotal, metricLabels)
183+
// curl to the specified prometheus address
184+
beforeMetric, err := prometheus.GetMetric(prometheusAddress, coreDNSRequestCountTotal, metricLabels)
172185
require.NoError(t, err)
186+
beforeValue := beforeMetric.GetCounter().GetValue()
187+
t.Logf("Before DNS request - metric count: %.0f", beforeValue)
173188

174189
t.Log("calling command from client")
175190

@@ -187,13 +202,15 @@ func testLRPCase(t *testing.T, ctx context.Context, clientPod corev1.Pod, client
187202
time.Sleep(500 * time.Millisecond)
188203

189204
// curl again and see count diff
190-
afterMetric, err := prometheus.GetMetric(promAddress, coreDNSRequestCountTotal, metricLabels)
205+
afterMetric, err := prometheus.GetMetric(prometheusAddress, coreDNSRequestCountTotal, metricLabels)
191206
require.NoError(t, err)
207+
afterValue := afterMetric.GetCounter().GetValue()
208+
t.Logf("After DNS request - metric count: %.0f (diff: %.0f)", afterValue, afterValue-beforeValue)
192209

193210
if countShouldIncrease {
194-
require.Greater(t, afterMetric.GetCounter().GetValue(), beforeMetric.GetCounter().GetValue(), "dns metric count did not increase after command")
211+
require.Greater(t, afterValue, beforeValue, "dns metric count did not increase after command - before: %.0f, after: %.0f", beforeValue, afterValue)
195212
} else {
196-
require.Equal(t, afterMetric.GetCounter().GetValue(), beforeMetric.GetCounter().GetValue(), "dns metric count increased after command")
213+
require.Equal(t, afterValue, beforeValue, "dns metric count increased after command - before: %.0f, after: %.0f", beforeValue, afterValue)
197214
}
198215
}
199216

@@ -210,9 +227,282 @@ func TestLRP(t *testing.T) {
210227
defer cleanupFn()
211228
require.NotNil(t, selectedPod)
212229

230+
// Get the kube-dns service IP for DNS requests
231+
cs := kubernetes.MustGetClientset()
232+
svc, err := kubernetes.GetService(ctx, cs, kubeSystemNamespace, dnsService)
233+
require.NoError(t, err)
234+
kubeDNS := svc.Spec.ClusterIP
235+
236+
t.Logf("LRP Test Starting...")
237+
238+
// Basic LRP test - using initial port from setupLRP
213239
testLRPCase(t, ctx, *selectedPod, []string{
214-
"nslookup", "google.com", "10.0.0.10",
215-
}, "", "", false, true)
240+
"nslookup", "google.com", kubeDNS,
241+
}, "", "", false, true, getPrometheusAddress(initialPrometheusPort))
242+
243+
t.Logf("LRP Test Completed")
244+
245+
t.Logf("LRP Lifecycle Test Starting")
246+
247+
// Run LRP Lifecycle test
248+
testLRPLifecycle(t, ctx, *selectedPod, kubeDNS)
249+
250+
t.Logf("LRP Lifecycle Test Completed")
251+
}
252+
253+
// testLRPLifecycle performs testing of Local Redirect Policy functionality
254+
// including pod restarts, resource recreation, and cilium command validation
255+
func testLRPLifecycle(t *testing.T, ctx context.Context, clientPod corev1.Pod, kubeDNS string) {
256+
config := kubernetes.MustGetRestConfig()
257+
cs := kubernetes.MustGetClientset()
258+
259+
260+
// Step 1: Validate LRP using cilium commands
261+
t.Log("Step 1: Validating LRP using cilium commands")
262+
validateCiliumLRP(t, ctx, cs, config)
263+
264+
// Step 2: Restart busybox pods and verify LRP still works
265+
t.Log("Step 2: Restarting client pods to test persistence")
266+
restartedPod := restartClientPodsAndGetPod(t, ctx, cs, clientPod)
267+
268+
// Step 3: Verify metrics after restart
269+
t.Log("Step 3: Verifying LRP functionality after pod restart")
270+
testLRPCase(t, ctx, restartedPod, []string{
271+
"nslookup", "google.com", kubeDNS,
272+
}, "", "", false, true, getPrometheusAddress(initialPrometheusPort))
273+
274+
// Step 4: Validate cilium commands still show LRP
275+
t.Log("Step 4: Re-validating cilium LRP after restart")
276+
validateCiliumLRP(t, ctx, cs, config)
277+
278+
// Step 5: Delete and recreate resources & restart nodelocaldns daemonset
279+
t.Log("Step 5: Testing resource deletion and recreation")
280+
recreatedPod := deleteAndRecreateResources(t, ctx, cs, clientPod)
281+
282+
// Step 6: Re-establish port forward to new node-local-dns pod and validate metrics
283+
t.Log("Step 6: Re-establishing port forward to new node-local-dns pod for metrics validation")
284+
285+
// Get the new node-local-dns pod on the same node as our recreated client pod
286+
nodeName := recreatedPod.Spec.NodeName
287+
newNodeLocalDNSPods, err := kubernetes.GetPodsByNode(ctx, cs, kubeSystemNamespace, nodeLocalDNSLabelSelector, nodeName)
288+
require.NoError(t, err)
289+
require.NotEmpty(t, newNodeLocalDNSPods.Items, "No node-local-dns pod found on node %s after restart", nodeName)
290+
291+
newNodeLocalDNSPod := TakeOne(newNodeLocalDNSPods.Items)
292+
t.Logf("Setting up port forward to new node-local-dns pod: %s", newNodeLocalDNSPod.Name)
293+
294+
// Setup new port forward to the new node-local-dns pod
295+
newPf, err := k8s.NewPortForwarder(config, k8s.PortForwardingOpts{
296+
Namespace: newNodeLocalDNSPod.Namespace,
297+
PodName: newNodeLocalDNSPod.Name,
298+
LocalPort: recreatedPrometheusPort, // Use different port to avoid conflicts
299+
DestPort: initialPrometheusPort,
300+
})
301+
require.NoError(t, err)
302+
303+
newPortForwardCtx, newCancel := context.WithTimeout(ctx, (retryAttempts+1)*retryDelay)
304+
defer newCancel()
305+
306+
err = defaultRetrier.Do(newPortForwardCtx, func() error {
307+
t.Logf("attempting port forward to new node-local-dns pod %s...", newNodeLocalDNSPod.Name)
308+
return errors.Wrap(newPf.Forward(newPortForwardCtx), "could not start port forward to new pod")
309+
})
310+
require.NoError(t, err, "could not start port forward to new node-local-dns pod")
311+
defer newPf.Stop()
312+
313+
t.Log("Port forward to new node-local-dns pod established")
314+
315+
// Use testLRPCase function with the new prometheus address
316+
t.Log("Validating metrics with new node-local-dns pod")
317+
testLRPCase(t, ctx, recreatedPod, []string{
318+
"nslookup", "github.com", kubeDNS,
319+
}, "", "", false, true, getPrometheusAddress(recreatedPrometheusPort))
320+
321+
t.Logf("SUCCESS: Metrics validation passed - traffic is being redirected to new node-local-dns pod %s", newNodeLocalDNSPod.Name)
322+
323+
// Step 7: Final cilium validation after node-local-dns restart
324+
t.Log("Step 7: Final cilium validation - ensuring LRP is still active after node-local-dns restart")
325+
validateCiliumLRP(t, ctx, cs, config)
326+
327+
}
328+
329+
// validateCiliumLRP checks that LRP is properly configured in cilium
330+
func validateCiliumLRP(t *testing.T, ctx context.Context, cs *k8sclient.Clientset, config *rest.Config) {
331+
ciliumPods, err := cs.CoreV1().Pods(kubeSystemNamespace).List(ctx, metav1.ListOptions{
332+
LabelSelector: "k8s-app=cilium",
333+
})
334+
require.NoError(t, err)
335+
require.NotEmpty(t, ciliumPods.Items)
336+
ciliumPod := TakeOne(ciliumPods.Items)
337+
338+
// Get Kubernetes version to determine validation approach
339+
serverVersion, err := cs.Discovery().ServerVersion()
340+
require.NoError(t, err)
341+
t.Logf("Detected Kubernetes version: %s", serverVersion.String())
342+
343+
// Get kube-dns service IP for validation
344+
svc, err := kubernetes.GetService(ctx, cs, kubeSystemNamespace, dnsService)
345+
require.NoError(t, err)
346+
kubeDNSIP := svc.Spec.ClusterIP
347+
348+
// IMPORTANT: Get node-local-dns pod IP on the SAME node as the cilium pod we're using
349+
selectedNode := ciliumPod.Spec.NodeName
350+
t.Logf("Using cilium pod %s on node %s for validation", ciliumPod.Name, selectedNode)
351+
352+
// Get node-local-dns pod specifically on the same node as our cilium pod
353+
nodeLocalDNSPods, err := kubernetes.GetPodsByNode(ctx, cs, kubeSystemNamespace, nodeLocalDNSLabelSelector, selectedNode)
354+
require.NoError(t, err)
355+
require.NotEmpty(t, nodeLocalDNSPods.Items, "No node-local-dns pod found on node %s", selectedNode)
356+
357+
// Use the first (and should be only) node-local-dns pod on this node
358+
nodeLocalDNSPod := nodeLocalDNSPods.Items[0]
359+
nodeLocalDNSIP := nodeLocalDNSPod.Status.PodIP
360+
require.NotEmpty(t, nodeLocalDNSIP, "node-local-dns pod %s has no IP address", nodeLocalDNSPod.Name)
361+
362+
t.Logf("Validating LRP: kubeDNS IP=%s, nodeLocalDNS IP=%s (pod: %s), node=%s",
363+
kubeDNSIP, nodeLocalDNSIP, nodeLocalDNSPod.Name, selectedNode)
364+
365+
// Check cilium lrp list
366+
lrpListCmd := []string{"cilium", "lrp", "list"}
367+
lrpOutput, _, err := kubernetes.ExecCmdOnPod(ctx, cs, ciliumPod.Namespace, ciliumPod.Name, "cilium-agent", lrpListCmd, config, false)
368+
require.NoError(t, err)
369+
370+
// Validate the LRP output structure more thoroughly
371+
lrpOutputStr := string(lrpOutput)
372+
require.Contains(t, lrpOutputStr, "nodelocaldns", "LRP not found in cilium lrp list")
373+
374+
// Parse LRP list output to validate structure
375+
lrpLines := strings.Split(lrpOutputStr, "\n")
376+
nodelocaldnsFound := false
377+
378+
for _, line := range lrpLines {
379+
line = strings.TrimSpace(line)
380+
if strings.Contains(line, "nodelocaldns") && strings.Contains(line, "kube-system") {
381+
// Validate that the line contains expected components
382+
require.Contains(t, line, "kube-dns", "LRP line should reference kube-dns service")
383+
nodelocaldnsFound = true
384+
t.Logf("Found nodelocaldns LRP entry: %s", line)
385+
break
386+
}
387+
}
388+
389+
require.True(t, nodelocaldnsFound, "nodelocaldns LRP entry not found with expected structure in output: %s", lrpOutputStr)
390+
391+
// Check cilium service list for localredirect
392+
serviceListCmd := []string{"cilium", "service", "list"}
393+
serviceOutput, _, err := kubernetes.ExecCmdOnPod(ctx, cs, ciliumPod.Namespace, ciliumPod.Name, "cilium-agent", serviceListCmd, config, false)
394+
require.NoError(t, err)
395+
require.Contains(t, string(serviceOutput), "LocalRedirect", "LocalRedirect not found in cilium service list")
396+
397+
// Validate LocalRedirect entries
398+
serviceLines := strings.Split(string(serviceOutput), "\n")
399+
tcpFound := false
400+
udpFound := false
401+
legacyFound := false
402+
403+
for _, line := range serviceLines {
404+
if strings.Contains(line, "LocalRedirect") && strings.Contains(line, kubeDNSIP) {
405+
// Check if this line contains the expected frontend (kube-dns) and backend (node-local-dns) IPs
406+
if strings.Contains(line, nodeLocalDNSIP) {
407+
// Check for both modern format (with /TCP or /UDP) and legacy format (without protocol)
408+
if strings.Contains(line, "/TCP") {
409+
tcpFound = true
410+
t.Logf("Found TCP LocalRedirect: %s", strings.TrimSpace(line))
411+
} else if strings.Contains(line, "/UDP") {
412+
udpFound = true
413+
t.Logf("Found UDP LocalRedirect: %s", strings.TrimSpace(line))
414+
} else {
415+
legacyFound = true
416+
t.Logf("Found legacy LocalRedirect: %s", strings.TrimSpace(line))
417+
}
418+
}
419+
}
420+
}
421+
422+
// Validate that we found either legacy format or modern format entries
423+
t.Log("Validating LocalRedirect entries - accepting either legacy format or modern TCP/UDP format")
424+
require.True(t, legacyFound || (tcpFound && udpFound), "Either legacy LocalRedirect entry OR both TCP and UDP entries must be found with frontend IP %s and backend IP %s on node %s", kubeDNSIP, nodeLocalDNSIP, selectedNode)
425+
426+
t.Logf("Cilium LRP List Output:\n%s", string(lrpOutput))
427+
t.Logf("Cilium Service List Output:\n%s", string(serviceOutput))
428+
}
429+
430+
// restartClientPodsAndGetPod restarts the client daemonset and returns a new pod reference
431+
func restartClientPodsAndGetPod(t *testing.T, ctx context.Context, cs *k8sclient.Clientset, originalPod corev1.Pod) corev1.Pod {
432+
// Get the node name for consistent testing
433+
nodeName := originalPod.Spec.NodeName
434+
435+
// Restart the daemonset (assumes it's named "lrp-test" based on the manifest)
436+
err := kubernetes.MustRestartDaemonset(ctx, cs, originalPod.Namespace, "lrp-test")
437+
require.NoError(t, err)
438+
439+
// Wait for the daemonset to be ready
440+
kubernetes.WaitForPodDaemonset(ctx, cs, originalPod.Namespace, "lrp-test", clientLabelSelector)
441+
442+
// Get the new pod on the same node
443+
clientPods, err := kubernetes.GetPodsByNode(ctx, cs, originalPod.Namespace, clientLabelSelector, nodeName)
444+
require.NoError(t, err)
445+
require.NotEmpty(t, clientPods.Items)
446+
447+
return TakeOne(clientPods.Items)
448+
}
449+
450+
// deleteAndRecreateResources deletes and recreates client pods and LRP, returning new pod
451+
func deleteAndRecreateResources(t *testing.T, ctx context.Context, cs *k8sclient.Clientset, originalPod corev1.Pod) corev1.Pod {
452+
config := kubernetes.MustGetRestConfig()
453+
ciliumCS, err := ciliumClientset.NewForConfig(config)
454+
require.NoError(t, err)
455+
456+
nodeName := originalPod.Spec.NodeName
457+
458+
// Delete client daemonset
459+
dsClient := cs.AppsV1().DaemonSets(originalPod.Namespace)
460+
clientDS := kubernetes.MustParseDaemonSet(clientPath)
461+
kubernetes.MustDeleteDaemonset(ctx, dsClient, clientDS)
462+
463+
// Delete LRP
464+
lrpContent, err := os.ReadFile(lrpPath)
465+
require.NoError(t, err)
466+
var lrp ciliumv2.CiliumLocalRedirectPolicy
467+
err = yaml.Unmarshal(lrpContent, &lrp)
468+
require.NoError(t, err)
469+
470+
lrpClient := ciliumCS.CiliumV2().CiliumLocalRedirectPolicies(lrp.Namespace)
471+
kubernetes.MustDeleteCiliumLocalRedirectPolicy(ctx, lrpClient, lrp)
472+
473+
// Wait for client pods to be deleted
474+
t.Log("Waiting for client pods to be deleted...")
475+
err = kubernetes.WaitForPodsDelete(ctx, cs, originalPod.Namespace, clientLabelSelector)
476+
require.NoError(t, err)
477+
478+
// Wait for LRP to be deleted by polling
479+
t.Log("Waiting for LRP to be deleted...")
480+
err = kubernetes.WaitForLRPDelete(ctx, ciliumCS, lrp)
481+
require.NoError(t, err)
482+
483+
// Recreate LRP
484+
_, cleanupLRP := kubernetes.MustSetupLRP(ctx, ciliumCS, lrpPath)
485+
t.Cleanup(cleanupLRP)
486+
487+
// Restart node-local-dns pods to pick up new LRP configuration
488+
t.Log("Restarting node-local-dns pods after LRP recreation")
489+
err = kubernetes.MustRestartDaemonset(ctx, cs, kubeSystemNamespace, "node-local-dns")
490+
require.NoError(t, err)
491+
kubernetes.WaitForPodDaemonset(ctx, cs, kubeSystemNamespace, "node-local-dns", nodeLocalDNSLabelSelector)
492+
493+
// Recreate client daemonset
494+
_, cleanupClient := kubernetes.MustSetupDaemonset(ctx, cs, clientPath)
495+
t.Cleanup(cleanupClient)
496+
497+
// Wait for pods to be ready
498+
kubernetes.WaitForPodDaemonset(ctx, cs, clientDS.Namespace, clientDS.Name, clientLabelSelector)
499+
500+
// Get new pod on the same node
501+
clientPods, err := kubernetes.GetPodsByNode(ctx, cs, clientDS.Namespace, clientLabelSelector, nodeName)
502+
require.NoError(t, err)
503+
require.NotEmpty(t, clientPods.Items)
504+
505+
return TakeOne(clientPods.Items)
216506
}
217507

218508
// TakeOne takes one item from the slice randomly; if empty, it returns the empty value for the type

0 commit comments

Comments
 (0)