Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion test/integration/lrp/lrp_fqdn_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ func TestLRPFQDN(t *testing.T) {
for _, tt := range tests {
tt := tt
t.Run(tt.name, func(t *testing.T) {
testLRPCase(t, ctx, *selectedPod, tt.command, tt.expectedMsgContains, tt.expectedErrMsgContains, tt.shouldError, tt.countIncreases)
testLRPCase(t, ctx, *selectedPod, tt.command, tt.expectedMsgContains, tt.expectedErrMsgContains, tt.shouldError, tt.countIncreases, getPrometheusAddress(initialPrometheusPort))
})
}
}
312 changes: 301 additions & 11 deletions test/integration/lrp/lrp_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ package lrp

import (
"context"
"fmt"
"os"
"strings"
"testing"
Expand All @@ -13,11 +14,16 @@ import (
"github.com/Azure/azure-container-networking/test/integration/prometheus"
"github.com/Azure/azure-container-networking/test/internal/kubernetes"
"github.com/Azure/azure-container-networking/test/internal/retry"
ciliumv2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2"
ciliumClientset "github.com/cilium/cilium/pkg/k8s/client/clientset/versioned"
"github.com/pkg/errors"
"github.com/stretchr/testify/require"
"golang.org/x/exp/rand"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
k8sclient "k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"sigs.k8s.io/yaml"
)

const (
Expand All @@ -28,11 +34,13 @@ const (
dnsService = "kube-dns"
retryAttempts = 10
retryDelay = 5 * time.Second
promAddress = "http://localhost:9253/metrics"
nodeLocalDNSLabelSelector = "k8s-app=node-local-dns"
clientLabelSelector = "lrp-test=true"
coreDNSRequestCountTotal = "coredns_dns_request_count_total"
clientContainer = "no-op"
// Port constants for prometheus endpoints
initialPrometheusPort = 9253
recreatedPrometheusPort = 9254
)

var (
Expand All @@ -47,6 +55,11 @@ var (
clientPath = ciliumManifestsDir + "client-ds.yaml"
)

// getPrometheusAddress returns the prometheus metrics URL for the given port
func getPrometheusAddress(port int) string {
return fmt.Sprintf("http://localhost:%d/metrics", port)
}

func setupLRP(t *testing.T, ctx context.Context) (*corev1.Pod, func()) {
var cleanUpFns []func()
success := false
Expand Down Expand Up @@ -132,8 +145,8 @@ func setupLRP(t *testing.T, ctx context.Context) (*corev1.Pod, func()) {
pf, err := k8s.NewPortForwarder(config, k8s.PortForwardingOpts{
Namespace: nodeLocalDNSDS.Namespace,
PodName: selectedLocalDNSPod,
LocalPort: 9253,
DestPort: 9253,
LocalPort: initialPrometheusPort,
DestPort: initialPrometheusPort,
})
require.NoError(t, err)
pctx := context.Background()
Expand All @@ -154,7 +167,7 @@ func setupLRP(t *testing.T, ctx context.Context) (*corev1.Pod, func()) {
}

func testLRPCase(t *testing.T, ctx context.Context, clientPod corev1.Pod, clientCmd []string, expectResponse, expectErrMsg string,
shouldError, countShouldIncrease bool) {
shouldError, countShouldIncrease bool, prometheusAddress string) {

config := kubernetes.MustGetRestConfig()
cs := kubernetes.MustGetClientset()
Expand All @@ -167,9 +180,11 @@ func testLRPCase(t *testing.T, ctx context.Context, clientPod corev1.Pod, client
"zone": ".",
}

// curl localhost:9253/metrics
beforeMetric, err := prometheus.GetMetric(promAddress, coreDNSRequestCountTotal, metricLabels)
// curl to the specified prometheus address
beforeMetric, err := prometheus.GetMetric(prometheusAddress, coreDNSRequestCountTotal, metricLabels)
require.NoError(t, err)
beforeValue := beforeMetric.GetCounter().GetValue()
t.Logf("Before DNS request - metric count: %.0f", beforeValue)

t.Log("calling command from client")

Expand All @@ -187,13 +202,15 @@ func testLRPCase(t *testing.T, ctx context.Context, clientPod corev1.Pod, client
time.Sleep(500 * time.Millisecond)

// curl again and see count diff
afterMetric, err := prometheus.GetMetric(promAddress, coreDNSRequestCountTotal, metricLabels)
afterMetric, err := prometheus.GetMetric(prometheusAddress, coreDNSRequestCountTotal, metricLabels)
require.NoError(t, err)
afterValue := afterMetric.GetCounter().GetValue()
t.Logf("After DNS request - metric count: %.0f (diff: %.0f)", afterValue, afterValue-beforeValue)

if countShouldIncrease {
require.Greater(t, afterMetric.GetCounter().GetValue(), beforeMetric.GetCounter().GetValue(), "dns metric count did not increase after command")
require.Greater(t, afterValue, beforeValue, "dns metric count did not increase after command - before: %.0f, after: %.0f", beforeValue, afterValue)
} else {
require.Equal(t, afterMetric.GetCounter().GetValue(), beforeMetric.GetCounter().GetValue(), "dns metric count increased after command")
require.Equal(t, afterValue, beforeValue, "dns metric count increased after command - before: %.0f, after: %.0f", beforeValue, afterValue)
}
}

Expand All @@ -210,9 +227,282 @@ func TestLRP(t *testing.T) {
defer cleanupFn()
require.NotNil(t, selectedPod)

// Get the kube-dns service IP for DNS requests
cs := kubernetes.MustGetClientset()
svc, err := kubernetes.GetService(ctx, cs, kubeSystemNamespace, dnsService)
require.NoError(t, err)
kubeDNS := svc.Spec.ClusterIP

t.Logf("LRP Test Starting...")

// Basic LRP test - using initial port from setupLRP
testLRPCase(t, ctx, *selectedPod, []string{
"nslookup", "google.com", "10.0.0.10",
}, "", "", false, true)
"nslookup", "google.com", kubeDNS,
}, "", "", false, true, getPrometheusAddress(initialPrometheusPort))

t.Logf("LRP Test Completed")

t.Logf("LRP Lifecycle Test Starting")

// Run LRP Lifecycle test
testLRPLifecycle(t, ctx, *selectedPod, kubeDNS)

t.Logf("LRP Lifecycle Test Completed")
}

// testLRPLifecycle performs testing of Local Redirect Policy functionality
// including pod restarts, resource recreation, and cilium command validation
func testLRPLifecycle(t *testing.T, ctx context.Context, clientPod corev1.Pod, kubeDNS string) {
config := kubernetes.MustGetRestConfig()
cs := kubernetes.MustGetClientset()


// Step 1: Validate LRP using cilium commands
t.Log("Step 1: Validating LRP using cilium commands")
validateCiliumLRP(t, ctx, cs, config)

// Step 2: Restart busybox pods and verify LRP still works
t.Log("Step 2: Restarting client pods to test persistence")
restartedPod := restartClientPodsAndGetPod(t, ctx, cs, clientPod)

// Step 3: Verify metrics after restart
t.Log("Step 3: Verifying LRP functionality after pod restart")
testLRPCase(t, ctx, restartedPod, []string{
"nslookup", "google.com", kubeDNS,
}, "", "", false, true, getPrometheusAddress(initialPrometheusPort))

// Step 4: Validate cilium commands still show LRP
t.Log("Step 4: Re-validating cilium LRP after restart")
validateCiliumLRP(t, ctx, cs, config)

// Step 5: Delete and recreate resources & restart nodelocaldns daemonset
t.Log("Step 5: Testing resource deletion and recreation")
recreatedPod := deleteAndRecreateResources(t, ctx, cs, clientPod)

// Step 6: Re-establish port forward to new node-local-dns pod and validate metrics
t.Log("Step 6: Re-establishing port forward to new node-local-dns pod for metrics validation")

// Get the new node-local-dns pod on the same node as our recreated client pod
nodeName := recreatedPod.Spec.NodeName
newNodeLocalDNSPods, err := kubernetes.GetPodsByNode(ctx, cs, kubeSystemNamespace, nodeLocalDNSLabelSelector, nodeName)
require.NoError(t, err)
require.NotEmpty(t, newNodeLocalDNSPods.Items, "No node-local-dns pod found on node %s after restart", nodeName)

newNodeLocalDNSPod := TakeOne(newNodeLocalDNSPods.Items)
t.Logf("Setting up port forward to new node-local-dns pod: %s", newNodeLocalDNSPod.Name)

// Setup new port forward to the new node-local-dns pod
newPf, err := k8s.NewPortForwarder(config, k8s.PortForwardingOpts{
Namespace: newNodeLocalDNSPod.Namespace,
PodName: newNodeLocalDNSPod.Name,
LocalPort: recreatedPrometheusPort, // Use different port to avoid conflicts
DestPort: initialPrometheusPort,
})
require.NoError(t, err)

newPortForwardCtx, newCancel := context.WithTimeout(ctx, (retryAttempts+1)*retryDelay)
defer newCancel()

err = defaultRetrier.Do(newPortForwardCtx, func() error {
t.Logf("attempting port forward to new node-local-dns pod %s...", newNodeLocalDNSPod.Name)
return errors.Wrap(newPf.Forward(newPortForwardCtx), "could not start port forward to new pod")
})
require.NoError(t, err, "could not start port forward to new node-local-dns pod")
defer newPf.Stop()

t.Log("Port forward to new node-local-dns pod established")

// Use testLRPCase function with the new prometheus address
t.Log("Validating metrics with new node-local-dns pod")
testLRPCase(t, ctx, recreatedPod, []string{
"nslookup", "github.com", kubeDNS,
}, "", "", false, true, getPrometheusAddress(recreatedPrometheusPort))

t.Logf("SUCCESS: Metrics validation passed - traffic is being redirected to new node-local-dns pod %s", newNodeLocalDNSPod.Name)

// Step 7: Final cilium validation after node-local-dns restart
t.Log("Step 7: Final cilium validation - ensuring LRP is still active after node-local-dns restart")
validateCiliumLRP(t, ctx, cs, config)

}

// validateCiliumLRP checks that LRP is properly configured in cilium
func validateCiliumLRP(t *testing.T, ctx context.Context, cs *k8sclient.Clientset, config *rest.Config) {
ciliumPods, err := cs.CoreV1().Pods(kubeSystemNamespace).List(ctx, metav1.ListOptions{
LabelSelector: "k8s-app=cilium",
})
require.NoError(t, err)
require.NotEmpty(t, ciliumPods.Items)
ciliumPod := TakeOne(ciliumPods.Items)

// Get Kubernetes version to determine validation approach
serverVersion, err := cs.Discovery().ServerVersion()
require.NoError(t, err)
t.Logf("Detected Kubernetes version: %s", serverVersion.String())

// Get kube-dns service IP for validation
svc, err := kubernetes.GetService(ctx, cs, kubeSystemNamespace, dnsService)
require.NoError(t, err)
kubeDNSIP := svc.Spec.ClusterIP

// IMPORTANT: Get node-local-dns pod IP on the SAME node as the cilium pod we're using
selectedNode := ciliumPod.Spec.NodeName
t.Logf("Using cilium pod %s on node %s for validation", ciliumPod.Name, selectedNode)

// Get node-local-dns pod specifically on the same node as our cilium pod
nodeLocalDNSPods, err := kubernetes.GetPodsByNode(ctx, cs, kubeSystemNamespace, nodeLocalDNSLabelSelector, selectedNode)
require.NoError(t, err)
require.NotEmpty(t, nodeLocalDNSPods.Items, "No node-local-dns pod found on node %s", selectedNode)

// Use the first (and should be only) node-local-dns pod on this node
nodeLocalDNSPod := nodeLocalDNSPods.Items[0]
nodeLocalDNSIP := nodeLocalDNSPod.Status.PodIP
require.NotEmpty(t, nodeLocalDNSIP, "node-local-dns pod %s has no IP address", nodeLocalDNSPod.Name)

t.Logf("Validating LRP: kubeDNS IP=%s, nodeLocalDNS IP=%s (pod: %s), node=%s",
kubeDNSIP, nodeLocalDNSIP, nodeLocalDNSPod.Name, selectedNode)

// Check cilium lrp list
lrpListCmd := []string{"cilium", "lrp", "list"}
lrpOutput, _, err := kubernetes.ExecCmdOnPod(ctx, cs, ciliumPod.Namespace, ciliumPod.Name, "cilium-agent", lrpListCmd, config, false)
require.NoError(t, err)

// Validate the LRP output structure more thoroughly
lrpOutputStr := string(lrpOutput)
require.Contains(t, lrpOutputStr, "nodelocaldns", "LRP not found in cilium lrp list")

// Parse LRP list output to validate structure
lrpLines := strings.Split(lrpOutputStr, "\n")
nodelocaldnsFound := false

for _, line := range lrpLines {
line = strings.TrimSpace(line)
if strings.Contains(line, "nodelocaldns") && strings.Contains(line, "kube-system") {
// Validate that the line contains expected components
require.Contains(t, line, "kube-dns", "LRP line should reference kube-dns service")
nodelocaldnsFound = true
t.Logf("Found nodelocaldns LRP entry: %s", line)
break
}
}

require.True(t, nodelocaldnsFound, "nodelocaldns LRP entry not found with expected structure in output: %s", lrpOutputStr)

// Check cilium service list for localredirect
serviceListCmd := []string{"cilium", "service", "list"}
serviceOutput, _, err := kubernetes.ExecCmdOnPod(ctx, cs, ciliumPod.Namespace, ciliumPod.Name, "cilium-agent", serviceListCmd, config, false)
require.NoError(t, err)
require.Contains(t, string(serviceOutput), "LocalRedirect", "LocalRedirect not found in cilium service list")

// Validate LocalRedirect entries
serviceLines := strings.Split(string(serviceOutput), "\n")
tcpFound := false
udpFound := false
legacyFound := false

for _, line := range serviceLines {
if strings.Contains(line, "LocalRedirect") && strings.Contains(line, kubeDNSIP) {
// Check if this line contains the expected frontend (kube-dns) and backend (node-local-dns) IPs
if strings.Contains(line, nodeLocalDNSIP) {
// Check for both modern format (with /TCP or /UDP) and legacy format (without protocol)
if strings.Contains(line, "/TCP") {
tcpFound = true
t.Logf("Found TCP LocalRedirect: %s", strings.TrimSpace(line))
} else if strings.Contains(line, "/UDP") {
udpFound = true
t.Logf("Found UDP LocalRedirect: %s", strings.TrimSpace(line))
} else {
legacyFound = true
t.Logf("Found legacy LocalRedirect: %s", strings.TrimSpace(line))
}
}
}
}

// Validate that we found either legacy format or modern format entries
t.Log("Validating LocalRedirect entries - accepting either legacy format or modern TCP/UDP format")
require.True(t, legacyFound || (tcpFound && udpFound), "Either legacy LocalRedirect entry OR both TCP and UDP entries must be found with frontend IP %s and backend IP %s on node %s", kubeDNSIP, nodeLocalDNSIP, selectedNode)

t.Logf("Cilium LRP List Output:\n%s", string(lrpOutput))
t.Logf("Cilium Service List Output:\n%s", string(serviceOutput))
}

// restartClientPodsAndGetPod restarts the client daemonset and returns a new pod reference
func restartClientPodsAndGetPod(t *testing.T, ctx context.Context, cs *k8sclient.Clientset, originalPod corev1.Pod) corev1.Pod {
// Get the node name for consistent testing
nodeName := originalPod.Spec.NodeName

// Restart the daemonset (assumes it's named "lrp-test" based on the manifest)
err := kubernetes.MustRestartDaemonset(ctx, cs, originalPod.Namespace, "lrp-test")
require.NoError(t, err)

// Wait for the daemonset to be ready
kubernetes.WaitForPodDaemonset(ctx, cs, originalPod.Namespace, "lrp-test", clientLabelSelector)

// Get the new pod on the same node
clientPods, err := kubernetes.GetPodsByNode(ctx, cs, originalPod.Namespace, clientLabelSelector, nodeName)
require.NoError(t, err)
require.NotEmpty(t, clientPods.Items)

return TakeOne(clientPods.Items)
}

// deleteAndRecreateResources deletes and recreates client pods and LRP, returning new pod
func deleteAndRecreateResources(t *testing.T, ctx context.Context, cs *k8sclient.Clientset, originalPod corev1.Pod) corev1.Pod {
config := kubernetes.MustGetRestConfig()
ciliumCS, err := ciliumClientset.NewForConfig(config)
require.NoError(t, err)

nodeName := originalPod.Spec.NodeName

// Delete client daemonset
dsClient := cs.AppsV1().DaemonSets(originalPod.Namespace)
clientDS := kubernetes.MustParseDaemonSet(clientPath)
kubernetes.MustDeleteDaemonset(ctx, dsClient, clientDS)

// Delete LRP
lrpContent, err := os.ReadFile(lrpPath)
require.NoError(t, err)
var lrp ciliumv2.CiliumLocalRedirectPolicy
err = yaml.Unmarshal(lrpContent, &lrp)
require.NoError(t, err)

lrpClient := ciliumCS.CiliumV2().CiliumLocalRedirectPolicies(lrp.Namespace)
kubernetes.MustDeleteCiliumLocalRedirectPolicy(ctx, lrpClient, lrp)

// Wait for client pods to be deleted
t.Log("Waiting for client pods to be deleted...")
err = kubernetes.WaitForPodsDelete(ctx, cs, originalPod.Namespace, clientLabelSelector)
require.NoError(t, err)

// Wait for LRP to be deleted by polling
t.Log("Waiting for LRP to be deleted...")
err = kubernetes.WaitForLRPDelete(ctx, ciliumCS, lrp)
require.NoError(t, err)

// Recreate LRP
_, cleanupLRP := kubernetes.MustSetupLRP(ctx, ciliumCS, lrpPath)
t.Cleanup(cleanupLRP)

// Restart node-local-dns pods to pick up new LRP configuration
t.Log("Restarting node-local-dns pods after LRP recreation")
err = kubernetes.MustRestartDaemonset(ctx, cs, kubeSystemNamespace, "node-local-dns")
require.NoError(t, err)
kubernetes.WaitForPodDaemonset(ctx, cs, kubeSystemNamespace, "node-local-dns", nodeLocalDNSLabelSelector)

// Recreate client daemonset
_, cleanupClient := kubernetes.MustSetupDaemonset(ctx, cs, clientPath)
t.Cleanup(cleanupClient)

// Wait for pods to be ready
kubernetes.WaitForPodDaemonset(ctx, cs, clientDS.Namespace, clientDS.Name, clientLabelSelector)

// Get new pod on the same node
clientPods, err := kubernetes.GetPodsByNode(ctx, cs, clientDS.Namespace, clientLabelSelector, nodeName)
require.NoError(t, err)
require.NotEmpty(t, clientPods.Items)

return TakeOne(clientPods.Items)
}

// TakeOne takes one item from the slice randomly; if empty, it returns the empty value for the type
Expand Down
Loading
Loading