@@ -4,6 +4,7 @@ package lrp
44
55import (
66 "context"
7+ "fmt"
78 "os"
89 "strings"
910 "testing"
@@ -13,11 +14,16 @@ import (
1314 "github.com/Azure/azure-container-networking/test/integration/prometheus"
1415 "github.com/Azure/azure-container-networking/test/internal/kubernetes"
1516 "github.com/Azure/azure-container-networking/test/internal/retry"
17+ ciliumv2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2"
1618 ciliumClientset "github.com/cilium/cilium/pkg/k8s/client/clientset/versioned"
1719 "github.com/pkg/errors"
1820 "github.com/stretchr/testify/require"
1921 "golang.org/x/exp/rand"
2022 corev1 "k8s.io/api/core/v1"
23+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
24+ k8sclient "k8s.io/client-go/kubernetes"
25+ "k8s.io/client-go/rest"
26+ "sigs.k8s.io/yaml"
2127)
2228
2329const (
@@ -28,11 +34,13 @@ const (
2834 dnsService = "kube-dns"
2935 retryAttempts = 10
3036 retryDelay = 5 * time .Second
31- promAddress = "http://localhost:9253/metrics"
3237 nodeLocalDNSLabelSelector = "k8s-app=node-local-dns"
3338 clientLabelSelector = "lrp-test=true"
3439 coreDNSRequestCountTotal = "coredns_dns_request_count_total"
3540 clientContainer = "no-op"
41+ // Port constants for prometheus endpoints
42+ initialPrometheusPort = 9253
43+ recreatedPrometheusPort = 9254
3644)
3745
3846var (
4755 clientPath = ciliumManifestsDir + "client-ds.yaml"
4856)
4957
58+ // getPrometheusAddress returns the prometheus metrics URL for the given port
59+ func getPrometheusAddress (port int ) string {
60+ return fmt .Sprintf ("http://localhost:%d/metrics" , port )
61+ }
62+
5063func setupLRP (t * testing.T , ctx context.Context ) (* corev1.Pod , func ()) {
5164 var cleanUpFns []func ()
5265 success := false
@@ -132,8 +145,8 @@ func setupLRP(t *testing.T, ctx context.Context) (*corev1.Pod, func()) {
132145 pf , err := k8s .NewPortForwarder (config , k8s.PortForwardingOpts {
133146 Namespace : nodeLocalDNSDS .Namespace ,
134147 PodName : selectedLocalDNSPod ,
135- LocalPort : 9253 ,
136- DestPort : 9253 ,
148+ LocalPort : initialPrometheusPort ,
149+ DestPort : initialPrometheusPort ,
137150 })
138151 require .NoError (t , err )
139152 pctx := context .Background ()
@@ -154,7 +167,7 @@ func setupLRP(t *testing.T, ctx context.Context) (*corev1.Pod, func()) {
154167}
155168
156169func testLRPCase (t * testing.T , ctx context.Context , clientPod corev1.Pod , clientCmd []string , expectResponse , expectErrMsg string ,
157- shouldError , countShouldIncrease bool ) {
170+ shouldError , countShouldIncrease bool , prometheusAddress string ) {
158171
159172 config := kubernetes .MustGetRestConfig ()
160173 cs := kubernetes .MustGetClientset ()
@@ -167,9 +180,11 @@ func testLRPCase(t *testing.T, ctx context.Context, clientPod corev1.Pod, client
167180 "zone" : "." ,
168181 }
169182
170- // curl localhost:9253/metrics
171- beforeMetric , err := prometheus .GetMetric (promAddress , coreDNSRequestCountTotal , metricLabels )
183+ // curl to the specified prometheus address
184+ beforeMetric , err := prometheus .GetMetric (prometheusAddress , coreDNSRequestCountTotal , metricLabels )
172185 require .NoError (t , err )
186+ beforeValue := beforeMetric .GetCounter ().GetValue ()
187+ t .Logf ("Before DNS request - metric count: %.0f" , beforeValue )
173188
174189 t .Log ("calling command from client" )
175190
@@ -187,13 +202,15 @@ func testLRPCase(t *testing.T, ctx context.Context, clientPod corev1.Pod, client
187202 time .Sleep (500 * time .Millisecond )
188203
189204 // curl again and see count diff
190- afterMetric , err := prometheus .GetMetric (promAddress , coreDNSRequestCountTotal , metricLabels )
205+ afterMetric , err := prometheus .GetMetric (prometheusAddress , coreDNSRequestCountTotal , metricLabels )
191206 require .NoError (t , err )
207+ afterValue := afterMetric .GetCounter ().GetValue ()
208+ t .Logf ("After DNS request - metric count: %.0f (diff: %.0f)" , afterValue , afterValue - beforeValue )
192209
193210 if countShouldIncrease {
194- require .Greater (t , afterMetric . GetCounter (). GetValue (), beforeMetric . GetCounter (). GetValue () , "dns metric count did not increase after command" )
211+ require .Greater (t , afterValue , beforeValue , "dns metric count did not increase after command - before: %.0f, after: %.0f" , beforeValue , afterValue )
195212 } else {
196- require .Equal (t , afterMetric . GetCounter (). GetValue (), beforeMetric . GetCounter (). GetValue () , "dns metric count increased after command" )
213+ require .Equal (t , afterValue , beforeValue , "dns metric count increased after command - before: %.0f, after: %.0f" , beforeValue , afterValue )
197214 }
198215}
199216
@@ -210,9 +227,282 @@ func TestLRP(t *testing.T) {
210227 defer cleanupFn ()
211228 require .NotNil (t , selectedPod )
212229
230+ // Get the kube-dns service IP for DNS requests
231+ cs := kubernetes .MustGetClientset ()
232+ svc , err := kubernetes .GetService (ctx , cs , kubeSystemNamespace , dnsService )
233+ require .NoError (t , err )
234+ kubeDNS := svc .Spec .ClusterIP
235+
236+ t .Logf ("LRP Test Starting..." )
237+
238+ // Basic LRP test - using initial port from setupLRP
213239 testLRPCase (t , ctx , * selectedPod , []string {
214- "nslookup" , "google.com" , "10.0.0.10" ,
215- }, "" , "" , false , true )
240+ "nslookup" , "google.com" , kubeDNS ,
241+ }, "" , "" , false , true , getPrometheusAddress (initialPrometheusPort ))
242+
243+ t .Logf ("LRP Test Completed" )
244+
245+ t .Logf ("LRP Lifecycle Test Starting" )
246+
247+ // Run LRP Lifecycle test
248+ testLRPLifecycle (t , ctx , * selectedPod , kubeDNS )
249+
250+ t .Logf ("LRP Lifecycle Test Completed" )
251+ }
252+
253+ // testLRPLifecycle performs testing of Local Redirect Policy functionality
254+ // including pod restarts, resource recreation, and cilium command validation
255+ func testLRPLifecycle (t * testing.T , ctx context.Context , clientPod corev1.Pod , kubeDNS string ) {
256+ config := kubernetes .MustGetRestConfig ()
257+ cs := kubernetes .MustGetClientset ()
258+
259+
260+ // Step 1: Validate LRP using cilium commands
261+ t .Log ("Step 1: Validating LRP using cilium commands" )
262+ validateCiliumLRP (t , ctx , cs , config )
263+
264+ // Step 2: Restart busybox pods and verify LRP still works
265+ t .Log ("Step 2: Restarting client pods to test persistence" )
266+ restartedPod := restartClientPodsAndGetPod (t , ctx , cs , clientPod )
267+
268+ // Step 3: Verify metrics after restart
269+ t .Log ("Step 3: Verifying LRP functionality after pod restart" )
270+ testLRPCase (t , ctx , restartedPod , []string {
271+ "nslookup" , "google.com" , kubeDNS ,
272+ }, "" , "" , false , true , getPrometheusAddress (initialPrometheusPort ))
273+
274+ // Step 4: Validate cilium commands still show LRP
275+ t .Log ("Step 4: Re-validating cilium LRP after restart" )
276+ validateCiliumLRP (t , ctx , cs , config )
277+
278+ // Step 5: Delete and recreate resources & restart nodelocaldns daemonset
279+ t .Log ("Step 5: Testing resource deletion and recreation" )
280+ recreatedPod := deleteAndRecreateResources (t , ctx , cs , clientPod )
281+
282+ // Step 6: Re-establish port forward to new node-local-dns pod and validate metrics
283+ t .Log ("Step 6: Re-establishing port forward to new node-local-dns pod for metrics validation" )
284+
285+ // Get the new node-local-dns pod on the same node as our recreated client pod
286+ nodeName := recreatedPod .Spec .NodeName
287+ newNodeLocalDNSPods , err := kubernetes .GetPodsByNode (ctx , cs , kubeSystemNamespace , nodeLocalDNSLabelSelector , nodeName )
288+ require .NoError (t , err )
289+ require .NotEmpty (t , newNodeLocalDNSPods .Items , "No node-local-dns pod found on node %s after restart" , nodeName )
290+
291+ newNodeLocalDNSPod := TakeOne (newNodeLocalDNSPods .Items )
292+ t .Logf ("Setting up port forward to new node-local-dns pod: %s" , newNodeLocalDNSPod .Name )
293+
294+ // Setup new port forward to the new node-local-dns pod
295+ newPf , err := k8s .NewPortForwarder (config , k8s.PortForwardingOpts {
296+ Namespace : newNodeLocalDNSPod .Namespace ,
297+ PodName : newNodeLocalDNSPod .Name ,
298+ LocalPort : recreatedPrometheusPort , // Use different port to avoid conflicts
299+ DestPort : initialPrometheusPort ,
300+ })
301+ require .NoError (t , err )
302+
303+ newPortForwardCtx , newCancel := context .WithTimeout (ctx , (retryAttempts + 1 )* retryDelay )
304+ defer newCancel ()
305+
306+ err = defaultRetrier .Do (newPortForwardCtx , func () error {
307+ t .Logf ("attempting port forward to new node-local-dns pod %s..." , newNodeLocalDNSPod .Name )
308+ return errors .Wrap (newPf .Forward (newPortForwardCtx ), "could not start port forward to new pod" )
309+ })
310+ require .NoError (t , err , "could not start port forward to new node-local-dns pod" )
311+ defer newPf .Stop ()
312+
313+ t .Log ("Port forward to new node-local-dns pod established" )
314+
315+ // Use testLRPCase function with the new prometheus address
316+ t .Log ("Validating metrics with new node-local-dns pod" )
317+ testLRPCase (t , ctx , recreatedPod , []string {
318+ "nslookup" , "github.com" , kubeDNS ,
319+ }, "" , "" , false , true , getPrometheusAddress (recreatedPrometheusPort ))
320+
321+ t .Logf ("SUCCESS: Metrics validation passed - traffic is being redirected to new node-local-dns pod %s" , newNodeLocalDNSPod .Name )
322+
323+ // Step 7: Final cilium validation after node-local-dns restart
324+ t .Log ("Step 7: Final cilium validation - ensuring LRP is still active after node-local-dns restart" )
325+ validateCiliumLRP (t , ctx , cs , config )
326+
327+ }
328+
329+ // validateCiliumLRP checks that LRP is properly configured in cilium
330+ func validateCiliumLRP (t * testing.T , ctx context.Context , cs * k8sclient.Clientset , config * rest.Config ) {
331+ ciliumPods , err := cs .CoreV1 ().Pods (kubeSystemNamespace ).List (ctx , metav1.ListOptions {
332+ LabelSelector : "k8s-app=cilium" ,
333+ })
334+ require .NoError (t , err )
335+ require .NotEmpty (t , ciliumPods .Items )
336+ ciliumPod := TakeOne (ciliumPods .Items )
337+
338+ // Get Kubernetes version to determine validation approach
339+ serverVersion , err := cs .Discovery ().ServerVersion ()
340+ require .NoError (t , err )
341+ t .Logf ("Detected Kubernetes version: %s" , serverVersion .String ())
342+
343+ // Get kube-dns service IP for validation
344+ svc , err := kubernetes .GetService (ctx , cs , kubeSystemNamespace , dnsService )
345+ require .NoError (t , err )
346+ kubeDNSIP := svc .Spec .ClusterIP
347+
348+ // IMPORTANT: Get node-local-dns pod IP on the SAME node as the cilium pod we're using
349+ selectedNode := ciliumPod .Spec .NodeName
350+ t .Logf ("Using cilium pod %s on node %s for validation" , ciliumPod .Name , selectedNode )
351+
352+ // Get node-local-dns pod specifically on the same node as our cilium pod
353+ nodeLocalDNSPods , err := kubernetes .GetPodsByNode (ctx , cs , kubeSystemNamespace , nodeLocalDNSLabelSelector , selectedNode )
354+ require .NoError (t , err )
355+ require .NotEmpty (t , nodeLocalDNSPods .Items , "No node-local-dns pod found on node %s" , selectedNode )
356+
357+ // Use the first (and should be only) node-local-dns pod on this node
358+ nodeLocalDNSPod := nodeLocalDNSPods .Items [0 ]
359+ nodeLocalDNSIP := nodeLocalDNSPod .Status .PodIP
360+ require .NotEmpty (t , nodeLocalDNSIP , "node-local-dns pod %s has no IP address" , nodeLocalDNSPod .Name )
361+
362+ t .Logf ("Validating LRP: kubeDNS IP=%s, nodeLocalDNS IP=%s (pod: %s), node=%s" ,
363+ kubeDNSIP , nodeLocalDNSIP , nodeLocalDNSPod .Name , selectedNode )
364+
365+ // Check cilium lrp list
366+ lrpListCmd := []string {"cilium" , "lrp" , "list" }
367+ lrpOutput , _ , err := kubernetes .ExecCmdOnPod (ctx , cs , ciliumPod .Namespace , ciliumPod .Name , "cilium-agent" , lrpListCmd , config , false )
368+ require .NoError (t , err )
369+
370+ // Validate the LRP output structure more thoroughly
371+ lrpOutputStr := string (lrpOutput )
372+ require .Contains (t , lrpOutputStr , "nodelocaldns" , "LRP not found in cilium lrp list" )
373+
374+ // Parse LRP list output to validate structure
375+ lrpLines := strings .Split (lrpOutputStr , "\n " )
376+ nodelocaldnsFound := false
377+
378+ for _ , line := range lrpLines {
379+ line = strings .TrimSpace (line )
380+ if strings .Contains (line , "nodelocaldns" ) && strings .Contains (line , "kube-system" ) {
381+ // Validate that the line contains expected components
382+ require .Contains (t , line , "kube-dns" , "LRP line should reference kube-dns service" )
383+ nodelocaldnsFound = true
384+ t .Logf ("Found nodelocaldns LRP entry: %s" , line )
385+ break
386+ }
387+ }
388+
389+ require .True (t , nodelocaldnsFound , "nodelocaldns LRP entry not found with expected structure in output: %s" , lrpOutputStr )
390+
391+ // Check cilium service list for localredirect
392+ serviceListCmd := []string {"cilium" , "service" , "list" }
393+ serviceOutput , _ , err := kubernetes .ExecCmdOnPod (ctx , cs , ciliumPod .Namespace , ciliumPod .Name , "cilium-agent" , serviceListCmd , config , false )
394+ require .NoError (t , err )
395+ require .Contains (t , string (serviceOutput ), "LocalRedirect" , "LocalRedirect not found in cilium service list" )
396+
397+ // Validate LocalRedirect entries
398+ serviceLines := strings .Split (string (serviceOutput ), "\n " )
399+ tcpFound := false
400+ udpFound := false
401+ legacyFound := false
402+
403+ for _ , line := range serviceLines {
404+ if strings .Contains (line , "LocalRedirect" ) && strings .Contains (line , kubeDNSIP ) {
405+ // Check if this line contains the expected frontend (kube-dns) and backend (node-local-dns) IPs
406+ if strings .Contains (line , nodeLocalDNSIP ) {
407+ // Check for both modern format (with /TCP or /UDP) and legacy format (without protocol)
408+ if strings .Contains (line , "/TCP" ) {
409+ tcpFound = true
410+ t .Logf ("Found TCP LocalRedirect: %s" , strings .TrimSpace (line ))
411+ } else if strings .Contains (line , "/UDP" ) {
412+ udpFound = true
413+ t .Logf ("Found UDP LocalRedirect: %s" , strings .TrimSpace (line ))
414+ } else {
415+ legacyFound = true
416+ t .Logf ("Found legacy LocalRedirect: %s" , strings .TrimSpace (line ))
417+ }
418+ }
419+ }
420+ }
421+
422+ // Validate that we found either legacy format or modern format entries
423+ t .Log ("Validating LocalRedirect entries - accepting either legacy format or modern TCP/UDP format" )
424+ require .True (t , legacyFound || (tcpFound && udpFound ), "Either legacy LocalRedirect entry OR both TCP and UDP entries must be found with frontend IP %s and backend IP %s on node %s" , kubeDNSIP , nodeLocalDNSIP , selectedNode )
425+
426+ t .Logf ("Cilium LRP List Output:\n %s" , string (lrpOutput ))
427+ t .Logf ("Cilium Service List Output:\n %s" , string (serviceOutput ))
428+ }
429+
430+ // restartClientPodsAndGetPod restarts the client daemonset and returns a new pod reference
431+ func restartClientPodsAndGetPod (t * testing.T , ctx context.Context , cs * k8sclient.Clientset , originalPod corev1.Pod ) corev1.Pod {
432+ // Get the node name for consistent testing
433+ nodeName := originalPod .Spec .NodeName
434+
435+ // Restart the daemonset (assumes it's named "lrp-test" based on the manifest)
436+ err := kubernetes .MustRestartDaemonset (ctx , cs , originalPod .Namespace , "lrp-test" )
437+ require .NoError (t , err )
438+
439+ // Wait for the daemonset to be ready
440+ kubernetes .WaitForPodDaemonset (ctx , cs , originalPod .Namespace , "lrp-test" , clientLabelSelector )
441+
442+ // Get the new pod on the same node
443+ clientPods , err := kubernetes .GetPodsByNode (ctx , cs , originalPod .Namespace , clientLabelSelector , nodeName )
444+ require .NoError (t , err )
445+ require .NotEmpty (t , clientPods .Items )
446+
447+ return TakeOne (clientPods .Items )
448+ }
449+
450+ // deleteAndRecreateResources deletes and recreates client pods and LRP, returning new pod
451+ func deleteAndRecreateResources (t * testing.T , ctx context.Context , cs * k8sclient.Clientset , originalPod corev1.Pod ) corev1.Pod {
452+ config := kubernetes .MustGetRestConfig ()
453+ ciliumCS , err := ciliumClientset .NewForConfig (config )
454+ require .NoError (t , err )
455+
456+ nodeName := originalPod .Spec .NodeName
457+
458+ // Delete client daemonset
459+ dsClient := cs .AppsV1 ().DaemonSets (originalPod .Namespace )
460+ clientDS := kubernetes .MustParseDaemonSet (clientPath )
461+ kubernetes .MustDeleteDaemonset (ctx , dsClient , clientDS )
462+
463+ // Delete LRP
464+ lrpContent , err := os .ReadFile (lrpPath )
465+ require .NoError (t , err )
466+ var lrp ciliumv2.CiliumLocalRedirectPolicy
467+ err = yaml .Unmarshal (lrpContent , & lrp )
468+ require .NoError (t , err )
469+
470+ lrpClient := ciliumCS .CiliumV2 ().CiliumLocalRedirectPolicies (lrp .Namespace )
471+ kubernetes .MustDeleteCiliumLocalRedirectPolicy (ctx , lrpClient , lrp )
472+
473+ // Wait for client pods to be deleted
474+ t .Log ("Waiting for client pods to be deleted..." )
475+ err = kubernetes .WaitForPodsDelete (ctx , cs , originalPod .Namespace , clientLabelSelector )
476+ require .NoError (t , err )
477+
478+ // Wait for LRP to be deleted by polling
479+ t .Log ("Waiting for LRP to be deleted..." )
480+ err = kubernetes .WaitForLRPDelete (ctx , ciliumCS , lrp )
481+ require .NoError (t , err )
482+
483+ // Recreate LRP
484+ _ , cleanupLRP := kubernetes .MustSetupLRP (ctx , ciliumCS , lrpPath )
485+ t .Cleanup (cleanupLRP )
486+
487+ // Restart node-local-dns pods to pick up new LRP configuration
488+ t .Log ("Restarting node-local-dns pods after LRP recreation" )
489+ err = kubernetes .MustRestartDaemonset (ctx , cs , kubeSystemNamespace , "node-local-dns" )
490+ require .NoError (t , err )
491+ kubernetes .WaitForPodDaemonset (ctx , cs , kubeSystemNamespace , "node-local-dns" , nodeLocalDNSLabelSelector )
492+
493+ // Recreate client daemonset
494+ _ , cleanupClient := kubernetes .MustSetupDaemonset (ctx , cs , clientPath )
495+ t .Cleanup (cleanupClient )
496+
497+ // Wait for pods to be ready
498+ kubernetes .WaitForPodDaemonset (ctx , cs , clientDS .Namespace , clientDS .Name , clientLabelSelector )
499+
500+ // Get new pod on the same node
501+ clientPods , err := kubernetes .GetPodsByNode (ctx , cs , clientDS .Namespace , clientLabelSelector , nodeName )
502+ require .NoError (t , err )
503+ require .NotEmpty (t , clientPods .Items )
504+
505+ return TakeOne (clientPods .Items )
216506}
217507
218508// TakeOne takes one item from the slice randomly; if empty, it returns the empty value for the type
0 commit comments