From b07b6977d6892f7a3032ea995a974fc17241b527 Mon Sep 17 00:00:00 2001 From: sivakami Date: Sat, 22 Nov 2025 15:52:19 -0800 Subject: [PATCH 1/7] Add SwiftV2 long-running pipeline with scheduled tests - Implemented scheduled pipeline running every 1 hour with persistent infrastructure - Split test execution into 2 jobs: Create (with 20min wait) and Delete - Added 8 test scenarios across 2 AKS clusters, 4 VNets, different subnets - Implemented two-phase deletion strategy to prevent PNI ReservationInUse errors - Added context timeouts on kubectl commands with force delete fallbacks - Resource naming uses RG name as BUILD_ID for uniqueness across parallel setups - Added SkipAutoDeleteTill tags to prevent automatic resource cleanup - Conditional setup stages controlled by runSetupStages parameter - Auto-generate RG name from location or allow custom names for parallel setups - Added comprehensive README with setup instructions and troubleshooting - Node selection by agentpool labels with usage tracking to prevent conflicts - Kubernetes naming compliance (RFC 1123) for all resources --- .pipelines/swiftv2-long-running/README.md | 314 ++++++++++ .pipelines/swiftv2-long-running/pipeline.yaml | 37 +- .../scripts/create_aks.sh | 129 ++-- .../scripts/create_storage.sh | 1 + .../scripts/create_vnets.sh | 159 +++-- .../long-running-pipeline-template.yaml | 162 ++++- go.mod | 18 +- go.sum | 34 +- hack/aks/Makefile | 15 +- .../swiftv2/long-running-cluster/pod.yaml | 33 + .../long-running-cluster/podnetwork.yaml | 15 + .../podnetworkinstance.yaml | 13 + .../integration/swiftv2/helpers/az_helpers.go | 286 +++++++++ .../swiftv2/longRunningCluster/datapath.go | 582 ++++++++++++++++++ .../datapath_create_test.go | 115 ++++ .../datapath_delete_test.go | 115 ++++ .../longRunningCluster/datapath_test.go | 129 ++++ 17 files changed, 2020 insertions(+), 137 deletions(-) create mode 100644 .pipelines/swiftv2-long-running/README.md create mode 100644 test/integration/manifests/swiftv2/long-running-cluster/pod.yaml create mode 100644 test/integration/manifests/swiftv2/long-running-cluster/podnetwork.yaml create mode 100644 test/integration/manifests/swiftv2/long-running-cluster/podnetworkinstance.yaml create mode 100644 test/integration/swiftv2/helpers/az_helpers.go create mode 100644 test/integration/swiftv2/longRunningCluster/datapath.go create mode 100644 test/integration/swiftv2/longRunningCluster/datapath_create_test.go create mode 100644 test/integration/swiftv2/longRunningCluster/datapath_delete_test.go create mode 100644 test/integration/swiftv2/longRunningCluster/datapath_test.go diff --git a/.pipelines/swiftv2-long-running/README.md b/.pipelines/swiftv2-long-running/README.md new file mode 100644 index 0000000000..fcb265dbbb --- /dev/null +++ b/.pipelines/swiftv2-long-running/README.md @@ -0,0 +1,314 @@ +# SwiftV2 Long-Running Pipeline + +This pipeline tests SwiftV2 pod networking in a persistent environment with scheduled test runs. + +## Architecture Overview + +**Infrastructure (Persistent)**: +- **2 AKS Clusters**: aks-1, aks-2 (4 nodes each: 2 low-NIC default pool, 2 high-NIC nplinux pool) +- **4 VNets**: cx_vnet_a1, cx_vnet_a2, cx_vnet_a3 (Customer 1 with PE to storage), cx_vnet_b1 (Customer 2) +- **VNet Peerings**: two of the three vnets of customer 1 are peered. +- **Storage Account**: With private endpoint from cx_vnet_a1 +- **NSGs**: Restricting traffic between subnets (s1, s2) in vnet cx_vnet_a1. + +**Test Scenarios (8 total)**: +- Multiple pods across 2 clusters, 4 VNets, different subnets (s1, s2), and node types (low-NIC, high-NIC) +- Each test run: Create all resources → Wait 20 minutes → Delete all resources +- Tests run automatically every 1 hour via scheduled trigger + +## Pipeline Modes + +### Mode 1: Scheduled Test Runs (Default) +**Trigger**: Automated cron schedule every 1 hour +**Purpose**: Continuous validation of long-running infrastructure +**Setup Stages**: Disabled +**Test Duration**: ~30-40 minutes per run +**Resource Group**: Static (default: `sv2-long-run-`, e.g., `sv2-long-run-centraluseuap`) + +```yaml +# Runs automatically every 1 hour +# No manual/external triggers allowed +``` + +### Mode 2: Initial Setup or Rebuild +**Trigger**: Manual run with parameter change +**Purpose**: Create new infrastructure or rebuild existing +**Setup Stages**: Enabled via `runSetupStages: true` +**Resource Group**: Auto-generated or custom + +**To create new infrastructure**: +1. Go to Pipeline → Run pipeline +2. Set `runSetupStages` = `true` +3. **Optional**: Leave `resourceGroupName` empty to auto-generate `sv2-long-run-` + - Or provide custom name for parallel setups (e.g., `sv2-long-run-eastus-dev`) +4. Optionally adjust `location`, `vmSkuDefault`, `vmSkuHighNIC` +5. Run pipeline + +## Pipeline Parameters + +Parameters are organized by usage: + +### Common Parameters (Always Relevant) +| Parameter | Default | Description | +|-----------|---------|-------------| +| `location` | `centraluseuap` | Azure region for resources. Auto-generates RG name: `sv2-long-run-`. | +| `runSetupStages` | `false` | Set to `true` to create new infrastructure. `false` for scheduled test runs. | +| `subscriptionId` | `37deca37-...` | Azure subscription ID. | +| `serviceConnection` | `Azure Container Networking...` | Azure DevOps service connection. | + +### Setup-Only Parameters (Only Used When runSetupStages=true) + +| Parameter | Default | Description | +|-----------|---------|-------------| +| `resourceGroupName` | `""` (empty) | **Leave empty** to auto-generate `sv2-long-run-`. Provide custom name only for parallel setups (e.g., `sv2-long-run-eastus-dev`). | +| `vmSkuDefault` | `Standard_D4s_v3` | VM SKU for low-NIC node pool (1 NIC). | +| `vmSkuHighNIC` | `Standard_D16s_v3` | VM SKU for high-NIC node pool (7 NICs). | + +**Note**: Setup-only parameters are ignored when `runSetupStages=false` (scheduled runs). + +## How It Works + +### Scheduled Test Flow +Every 1 hour, the pipeline: +1. Skips setup stages (infrastructure already exists) +2. **Job 1 - Create and Wait**: Creates 8 test scenarios (PodNetwork, PNI, Pods), then waits 20 minutes +3. **Job 2 - Delete Resources**: Deletes all test resources (Phase 1: Pods, Phase 2: PNI/PN/Namespaces) +4. Reports results + +### Setup Flow (When runSetupStages = true) +1. Create resource group with `SkipAutoDeleteTill=2032-12-31` tag +2. Create 2 AKS clusters with 2 node pools each (tagged for persistence) +3. Create 4 customer VNets with subnets and delegations (tagged for persistence) +4. Create VNet peerings +5. Create storage accounts with persistence tags +6. Create NSGs for subnet isolation +7. Run initial test (create → wait → delete) + +**All infrastructure resources are tagged with `SkipAutoDeleteTill=2032-12-31`** to prevent automatic cleanup by Azure subscription policies. + +## Resource Naming + +All test resources use the pattern: `-static-setup--` + +**Examples**: +- PodNetwork: `pn-static-setup-a1-s1` +- PodNetworkInstance: `pni-static-setup-a1-s1` +- Pod: `pod-c1-aks1-a1s1-low` +- Namespace: `pn-static-setup-a1-s1` + +VNet names are simplified: +- `cx_vnet_a1` → `a1` +- `cx_vnet_b1` → `b1` + +## Switching to a New Setup + +**Scenario**: You created a new setup in RG `sv2-long-run-eastus` and want scheduled runs to use it. + +**Steps**: +1. Go to Pipeline → Edit +2. Update location parameter default value: + ```yaml + - name: location + default: "centraluseuap" # Change this + ``` +3. Save and commit +4. RG name will automatically become `sv2-long-run-centraluseuap` + +Alternatively, manually trigger with the new location or override `resourceGroupName` directly. + +## Creating Multiple Test Setups + +**Use Case**: You want to create a new test environment without affecting the existing one (e.g., for testing different configurations, regions, or versions). + +**Steps**: +1. Go to Pipeline → Run pipeline +2. Set `runSetupStages` = `true` +3. **Set `resourceGroupName`** to a unique value: + - For different region: `sv2-long-run-eastus` + - For parallel test: `sv2-long-run-centraluseuap-dev` + - For experimental: `sv2-long-run-centraluseuap-v2` + - Or leave empty to use auto-generated `sv2-long-run-` +4. Optionally adjust `location`, `vmSkuDefault`, `vmSkuHighNIC` +5. Run pipeline + +**After setup completes**: +- The new infrastructure will be tagged with `SkipAutoDeleteTill=2032-12-31` +- Resources are isolated by the unique resource group name +- To run tests against the new setup, the scheduled pipeline would need to be updated with the new RG name + +**Example Scenarios**: +| Scenario | Resource Group Name | Purpose | +|----------|-------------------|---------| +| Default production | `sv2-long-run-centraluseuap` | Daily scheduled tests | +| East US environment | `sv2-long-run-eastus` | Regional testing | +| Test new features | `sv2-long-run-centraluseuap-dev` | Development/testing | +| Version upgrade | `sv2-long-run-centraluseuap-v2` | Parallel environment for upgrades | + +## Resource Naming + +The pipeline uses the **resource group name as the BUILD_ID** to ensure unique resource names per test setup. This allows multiple parallel test environments without naming collisions. + +**Generated Resource Names**: +``` +BUILD_ID = + +PodNetwork: pn--- +PodNetworkInstance: pni--- +Namespace: pn--- +Pod: pod- +``` + +**Example for `resourceGroupName=sv2-long-run-centraluseuap`**: +``` +pn-sv2-long-run-centraluseuap-b1-s1 (PodNetwork for cx_vnet_b1, subnet s1) +pni-sv2-long-run-centraluseuap-b1-s1 (PodNetworkInstance) +pn-sv2-long-run-centraluseuap-a1-s1 (PodNetwork for cx_vnet_a1, subnet s1) +pni-sv2-long-run-centraluseuap-a1-s2 (PodNetworkInstance for cx_vnet_a1, subnet s2) +``` + +**Example for different setup `resourceGroupName=sv2-long-run-eastus`**: +``` +pn-sv2-long-run-eastus-b1-s1 (Different from centraluseuap setup) +pni-sv2-long-run-eastus-b1-s1 +pn-sv2-long-run-eastus-a1-s1 +``` + +This ensures **no collision** between different test setups running in parallel. + +## Deletion Strategy +### Phase 1: Delete All Pods +Deletes all pods across all scenarios first. This ensures IP reservations are released. + +``` +Deleting pod pod-c2-aks2-b1s1-low... +Deleting pod pod-c2-aks2-b1s1-high... +... +``` + +### Phase 2: Delete Shared Resources +Groups resources by vnet/subnet/cluster and deletes PNI/PN/Namespace once per group. + +``` +Deleting PodNetworkInstance pni-static-setup-b1-s1... +Deleting PodNetwork pn-static-setup-b1-s1... +Deleting namespace pn-static-setup-b1-s1... +``` + +**Why**: Multiple pods can share the same PNI. Deleting PNI while pods exist causes "ReservationInUse" errors. + +## Troubleshooting + +### Tests are running on wrong cluster +- Check `resourceGroupName` parameter points to correct RG +- Verify RG contains aks-1 and aks-2 clusters +- Check kubeconfig retrieval in logs + +### Setup stages not running +- Verify `runSetupStages` parameter is set to `true` +- Check condition: `condition: eq(${{ parameters.runSetupStages }}, true)` + +### Schedule not triggering +- Verify cron expression: `"0 */1 * * *"` (every 1 hour) +- Check branch in schedule matches your working branch +- Ensure `always: true` is set (runs even without code changes) + +### PNI stuck with "ReservationInUse" +- Check if pods were deleted first (Phase 1 logs) +- Manual fix: Delete pod → Wait 10s → Patch PNI to remove finalizers + +### Pipeline timeout after 6 hours +- This is expected behavior (timeoutInMinutes: 360) +- Tests should complete in ~30-40 minutes +- If tests hang, check deletion logs for stuck resources + +## Manual Testing + +Run locally against existing infrastructure: + +```bash +export RG="sv2-long-run-centraluseuap" # Match your resource group +export BUILD_ID="$RG" # Use same RG name as BUILD_ID for unique resource names + +cd test/integration/swiftv2/longRunningCluster +ginkgo -v -trace --timeout=6h . +``` + +## Node Pool Configuration + +- **Low-NIC nodes** (`Standard_D4s_v3`): 1 NIC, label `agentpool!=nplinux` + - Can only run 1 pod at a time + +- **High-NIC nodes** (`Standard_D16s_v3`): 7 NICs, label `agentpool=nplinux` + - Currently limited to 1 pod per node in test logic + +## Schedule Modification + +To change test frequency, edit the cron schedule: + +```yaml +schedules: + - cron: "0 */1 * * *" # Every 1 hour (current) + # Examples: + # - cron: "0 */2 * * *" # Every 2 hours + # - cron: "0 */6 * * *" # Every 6 hours + # - cron: "0 0,8,16 * * *" # At 12am, 8am, 4pm + # - cron: "0 0 * * *" # Daily at midnight +``` + +## File Structure + +``` +.pipelines/swiftv2-long-running/ +├── pipeline.yaml # Main pipeline with schedule +├── README.md # This file +├── template/ +│ └── long-running-pipeline-template.yaml # Stage definitions (2 jobs) +└── scripts/ + ├── create_aks.sh # AKS cluster creation + ├── create_vnets.sh # VNet and subnet creation + ├── create_peerings.sh # VNet peering setup + ├── create_storage.sh # Storage account creation + ├── create_nsg.sh # Network security groups + └── create_pe.sh # Private endpoint setup + +test/integration/swiftv2/longRunningCluster/ +├── datapath_test.go # Original combined test (deprecated) +├── datapath_create_test.go # Create test scenarios (Job 1) +├── datapath_delete_test.go # Delete test scenarios (Job 2) +├── datapath.go # Resource orchestration +└── helpers/ + └── az_helpers.go # Azure/kubectl helper functions +``` + +## Best Practices + +1. **Keep infrastructure persistent**: Only recreate when necessary (cluster upgrades, config changes) +2. **Monitor scheduled runs**: Set up alerts for test failures +3. **Resource naming**: BUILD_ID is automatically set to the resource group name, ensuring unique resource names per setup +4. **Tag resources appropriately**: All setup resources automatically tagged with `SkipAutoDeleteTill=2032-12-31` + - AKS clusters + - AKS VNets + - Customer VNets (cx_vnet_a1, cx_vnet_a2, cx_vnet_a3, cx_vnet_b1) + - Storage accounts +5. **Avoid resource group collisions**: Always use unique `resourceGroupName` when creating new setups +6. **Document changes**: Update this README when modifying test scenarios or infrastructure + +## Resource Tags + +All infrastructure resources are automatically tagged during creation: + +```bash +SkipAutoDeleteTill=2032-12-31 +``` + +This prevents automatic cleanup by Azure subscription policies that delete resources after a certain period. The tag is applied to: +- Resource group (via create_resource_group job) +- AKS clusters (aks-1, aks-2) +- AKS cluster VNets +- Customer VNets (cx_vnet_a1, cx_vnet_a2, cx_vnet_a3, cx_vnet_b1) +- Storage accounts (sa1xxxx, sa2xxxx) + +To manually update the tag date: +```bash +az resource update --ids --set tags.SkipAutoDeleteTill=2033-12-31 +``` diff --git a/.pipelines/swiftv2-long-running/pipeline.yaml b/.pipelines/swiftv2-long-running/pipeline.yaml index b6d085901d..6856a7ae3e 100644 --- a/.pipelines/swiftv2-long-running/pipeline.yaml +++ b/.pipelines/swiftv2-long-running/pipeline.yaml @@ -1,4 +1,14 @@ trigger: none +pr: none + +# Schedule: Run every 1 hour +schedules: + - cron: "0 */1 * * *" # Every 1 hour at minute 0 + displayName: "Run tests every 1 hour" + branches: + include: + - sv2-long-running-pipeline + always: true # Run even if there are no code changes parameters: - name: subscriptionId @@ -6,31 +16,37 @@ parameters: type: string default: "37deca37-c375-4a14-b90a-043849bd2bf1" + - name: serviceConnection + displayName: "Azure Service Connection" + type: string + default: "Azure Container Networking - Standalone Test Service Connection" + - name: location displayName: "Deployment Region" type: string default: "centraluseuap" + - name: runSetupStages + displayName: "Create New Infrastructure Setup" + type: boolean + default: false + + # Setup-only parameters (only used when runSetupStages=true) - name: resourceGroupName - displayName: "Resource Group Name" + displayName: "Resource Group Name used when runSetupStages is true" type: string - default: "long-run-$(Build.BuildId)" + default: "sv2-long-run-$(Build.BuildId)" - name: vmSkuDefault - displayName: "VM SKU for Default Node Pool" + displayName: "VM SKU for Default Node Pool used when runSetupStages is true" type: string - default: "Standard_D2s_v3" + default: "Standard_D4s_v3" - name: vmSkuHighNIC - displayName: "VM SKU for High NIC Node Pool" + displayName: "VM SKU for additional Node Pool used when runSetupStages is true" type: string default: "Standard_D16s_v3" - - name: serviceConnection - displayName: "Azure Service Connection" - type: string - default: "Azure Container Networking - Standalone Test Service Connection" - extends: template: template/long-running-pipeline-template.yaml parameters: @@ -40,3 +56,4 @@ extends: vmSkuDefault: ${{ parameters.vmSkuDefault }} vmSkuHighNIC: ${{ parameters.vmSkuHighNIC }} serviceConnection: ${{ parameters.serviceConnection }} + runSetupStages: ${{ parameters.runSetupStages }} diff --git a/.pipelines/swiftv2-long-running/scripts/create_aks.sh b/.pipelines/swiftv2-long-running/scripts/create_aks.sh index 4ab38c0f42..421c4dccbb 100644 --- a/.pipelines/swiftv2-long-running/scripts/create_aks.sh +++ b/.pipelines/swiftv2-long-running/scripts/create_aks.sh @@ -7,57 +7,98 @@ RG=$3 VM_SKU_DEFAULT=$4 VM_SKU_HIGHNIC=$5 -CLUSTER_COUNT=2 -CLUSTER_PREFIX="aks" -DEFAULT_NODE_COUNT=1 -COMMON_TAGS="fastpathenabled=true RGOwner=LongRunningTestPipelines stampcreatorserviceinfo=true" - -wait_for_provisioning() { # Helper for safe retry/wait for provisioning states (basic) - local rg="$1" clusterName="$2" - echo "Waiting for AKS '$clusterName' in RG '$rg' to reach Succeeded/Failed (polling)..." +CLUSTER_COUNT=2 +CLUSTER_PREFIX="aks" + + +stamp_vnet() { + local vnet_id="$1" + + responseFile="response.txt" + modified_vnet="${vnet_id//\//%2F}" + cmd_stamp_curl="'curl -v -X PUT http://localhost:8080/VirtualNetwork/$modified_vnet/stampcreatorservicename'" + cmd_containerapp_exec="az containerapp exec -n subnetdelegator-westus-u3h4j -g subnetdelegator-westus --subscription 9b8218f9-902a-4d20-a65c-e98acec5362f --command $cmd_stamp_curl" + + max_retries=10 + sleep_seconds=15 + retry_count=0 + + while [[ $retry_count -lt $max_retries ]]; do + script --quiet -c "$cmd_containerapp_exec" "$responseFile" + if grep -qF "200 OK" "$responseFile"; then + echo "Subnet Delegator successfully stamped the vnet" + return 0 + else + echo "Subnet Delegator failed to stamp the vnet, attempt $((retry_count+1))" + cat "$responseFile" + retry_count=$((retry_count+1)) + sleep "$sleep_seconds" + fi + done + + echo "Failed to stamp the vnet even after $max_retries attempts" + exit 1 +} + +wait_for_provisioning() { + local rg="$1" clusterName="$2" + echo "Waiting for AKS '$clusterName' in RG '$rg'..." while :; do state=$(az aks show --resource-group "$rg" --name "$clusterName" --query provisioningState -o tsv 2>/dev/null || true) - if [ -z "$state" ]; then - sleep 3 - continue + if [[ "$state" =~ Succeeded ]]; then + echo "Provisioning state: $state" + break fi - case "$state" in - Succeeded|Succeeded*) echo "Provisioning state: $state"; break ;; - Failed|Canceled|Rejected) echo "Provisioning finished with state: $state"; break ;; - *) printf "."; sleep 6 ;; - esac + if [[ "$state" =~ Failed|Canceled ]]; then + echo "Provisioning finished with state: $state" + break + fi + sleep 6 done } +######################################### +# Main script starts here +######################################### + for i in $(seq 1 "$CLUSTER_COUNT"); do - echo "==============================" - echo " Working on cluster set #$i" - echo "==============================" - - CLUSTER_NAME="${CLUSTER_PREFIX}-${i}" - echo "Creating AKS cluster '$CLUSTER_NAME' in RG '$RG'" - - make -C ./hack/aks azcfg AZCLI=az REGION=$LOCATION - - make -C ./hack/aks swiftv2-podsubnet-cluster-up \ - AZCLI=az REGION=$LOCATION \ - SUB=$SUBSCRIPTION_ID \ - GROUP=$RG \ - CLUSTER=$CLUSTER_NAME \ - NODE_COUNT=$DEFAULT_NODE_COUNT \ - VM_SIZE=$VM_SKU_DEFAULT \ - - echo " - waiting for AKS provisioning state..." - wait_for_provisioning "$RG" "$CLUSTER_NAME" - - echo "Adding multi-tenant nodepool ' to '$CLUSTER_NAME'" - make -C ./hack/aks linux-swiftv2-nodepool-up \ - AZCLI=az REGION=$LOCATION \ - GROUP=$RG \ - VM_SIZE=$VM_SKU_HIGHNIC \ - CLUSTER=$CLUSTER_NAME \ - SUB=$SUBSCRIPTION_ID \ + echo "Creating cluster #$i..." + CLUSTER_NAME="${CLUSTER_PREFIX}-${i}" + + make -C ./hack/aks azcfg AZCLI=az REGION=$LOCATION + + # Create cluster with SkipAutoDeleteTill tag for persistent infrastructure + make -C ./hack/aks swiftv2-podsubnet-cluster-up \ + AZCLI=az REGION=$LOCATION \ + SUB=$SUBSCRIPTION_ID \ + GROUP=$RG \ + CLUSTER=$CLUSTER_NAME \ + VM_SIZE=$VM_SKU_DEFAULT + + # Add SkipAutoDeleteTill tag to cluster (2032-12-31 for long-term persistence) + az aks update -g "$RG" -n "$CLUSTER_NAME" --tags SkipAutoDeleteTill=2032-12-31 || echo "Warning: Failed to add tag to cluster" + + wait_for_provisioning "$RG" "$CLUSTER_NAME" + + vnet_id=$(az network vnet show -g "$RG" --name "$CLUSTER_NAME" --query id -o tsv) + echo "Found VNET: $vnet_id" + + # Add SkipAutoDeleteTill tag to AKS VNet + az network vnet update --ids "$vnet_id" --set tags.SkipAutoDeleteTill=2032-12-31 || echo "Warning: Failed to add tag to vnet" + + stamp_vnet "$vnet_id" + + make -C ./hack/aks linux-swiftv2-nodepool-up \ + AZCLI=az REGION=$LOCATION \ + GROUP=$RG \ + VM_SIZE=$VM_SKU_HIGHNIC \ + CLUSTER=$CLUSTER_NAME \ + SUB=$SUBSCRIPTION_ID + + az aks get-credentials -g "$RG" -n "$CLUSTER_NAME" --admin --overwrite-existing \ + --file "/tmp/${CLUSTER_NAME}.kubeconfig" done -echo "All done. Created $CLUSTER_COUNT cluster set(s)." + +echo "All clusters complete." diff --git a/.pipelines/swiftv2-long-running/scripts/create_storage.sh b/.pipelines/swiftv2-long-running/scripts/create_storage.sh index caefc69294..056f9a0857 100644 --- a/.pipelines/swiftv2-long-running/scripts/create_storage.sh +++ b/.pipelines/swiftv2-long-running/scripts/create_storage.sh @@ -26,6 +26,7 @@ for SA in "$SA1" "$SA2"; do --allow-shared-key-access false \ --https-only true \ --min-tls-version TLS1_2 \ + --tags SkipAutoDeleteTill=2032-12-31 \ --query "name" -o tsv \ && echo "Storage account $SA created successfully." # Verify creation success diff --git a/.pipelines/swiftv2-long-running/scripts/create_vnets.sh b/.pipelines/swiftv2-long-running/scripts/create_vnets.sh index eb894d06ff..b937f4d8a1 100644 --- a/.pipelines/swiftv2-long-running/scripts/create_vnets.sh +++ b/.pipelines/swiftv2-long-running/scripts/create_vnets.sh @@ -2,35 +2,30 @@ set -e trap 'echo "[ERROR] Failed while creating VNets or subnets. Check Azure CLI logs above." >&2' ERR -SUBSCRIPTION_ID=$1 +SUB_ID=$1 LOCATION=$2 RG=$3 +BUILD_ID=$4 -az account set --subscription "$SUBSCRIPTION_ID" - -# VNets and subnets -VNET_A1="cx_vnet_a1" -VNET_A2="cx_vnet_a2" -VNET_A3="cx_vnet_a3" -VNET_B1="cx_vnet_b1" - -A1_S1="10.10.1.0/24" -A1_S2="10.10.2.0/24" -A1_PE="10.10.100.0/24" - -A2_MAIN="10.11.1.0/24" - -A3_MAIN="10.12.1.0/24" - -B1_MAIN="10.20.1.0/24" +# --- VNet definitions --- +# Create customer vnets for two customers A and B. +VNAMES=( "cx_vnet_a1" "cx_vnet_a2" "cx_vnet_a3" "cx_vnet_b1" ) +VCIDRS=( "10.10.0.0/16" "10.11.0.0/16" "10.12.0.0/16" "10.13.0.0/16" ) +NODE_SUBNETS=( "10.10.0.0/24" "10.11.0.0/24" "10.12.0.0/24" "10.13.0.0/24" ) +EXTRA_SUBNETS_LIST=( "s1 s2 pe" "s1" "s1" "s1" ) +EXTRA_CIDRS_LIST=( "10.10.1.0/24,10.10.2.0/24,10.10.3.0/24" \ + "10.11.1.0/24" \ + "10.12.1.0/24" \ + "10.13.1.0/24" ) +az account set --subscription "$SUB_ID" # ------------------------------- # Verification functions # ------------------------------- verify_vnet() { - local rg="$1"; local vnet="$2" + local vnet="$1" echo "==> Verifying VNet: $vnet" - if az network vnet show -g "$rg" -n "$vnet" &>/dev/null; then + if az network vnet show -g "$RG" -n "$vnet" &>/dev/null; then echo "[OK] Verified VNet $vnet exists." else echo "[ERROR] VNet $vnet not found!" >&2 @@ -39,9 +34,9 @@ verify_vnet() { } verify_subnet() { - local rg="$1"; local vnet="$2"; local subnet="$3" + local vnet="$1"; local subnet="$2" echo "==> Verifying subnet: $subnet in $vnet" - if az network vnet subnet show -g "$rg" --vnet-name "$vnet" -n "$subnet" &>/dev/null; then + if az network vnet subnet show -g "$RG" --vnet-name "$vnet" -n "$subnet" &>/dev/null; then echo "[OK] Verified subnet $subnet exists in $vnet." else echo "[ERROR] Subnet $subnet not found in $vnet!" >&2 @@ -50,35 +45,99 @@ verify_subnet() { } # ------------------------------- -# Create VNets and Subnets -# ------------------------------- -# A1 -az network vnet create -g "$RG" -n "$VNET_A1" --address-prefix 10.10.0.0/16 --subnet-name s1 --subnet-prefix "$A1_S1" -l "$LOCATION" --output none \ - && echo "Created $VNET_A1 with subnet s1" -az network vnet subnet create -g "$RG" --vnet-name "$VNET_A1" -n s2 --address-prefix "$A1_S2" --output none \ - && echo "Created $VNET_A1 with subnet s2" -az network vnet subnet create -g "$RG" --vnet-name "$VNET_A1" -n pe --address-prefix "$A1_PE" --output none \ - && echo "Created $VNET_A1 with subnet pe" -# Verify A1 -verify_vnet "$RG" "$VNET_A1" -for sn in s1 s2 pe; do verify_subnet "$RG" "$VNET_A1" "$sn"; done +create_vnet_subets() { + local vnet="$1" + local vnet_cidr="$2" + local node_subnet_cidr="$3" + local extra_subnets="$4" + local extra_cidrs="$5" -# A2 -az network vnet create -g "$RG" -n "$VNET_A2" --address-prefix 10.11.0.0/16 --subnet-name s1 --subnet-prefix "$A2_MAIN" -l "$LOCATION" --output none \ - && echo "Created $VNET_A2 with subnet s1" -verify_vnet "$RG" "$VNET_A2" -verify_subnet "$RG" "$VNET_A2" "s1" + echo "==> Creating VNet: $vnet with CIDR: $vnet_cidr" + az network vnet create -g "$RG" -l "$LOCATION" --name "$vnet" --address-prefixes "$vnet_cidr" \ + --tags SkipAutoDeleteTill=2032-12-31 -o none + + IFS=' ' read -r -a extra_subnet_array <<< "$extra_subnets" + IFS=',' read -r -a extra_cidr_array <<< "$extra_cidrs" + + for i in "${!extra_subnet_array[@]}"; do + subnet_name="${extra_subnet_array[$i]}" + subnet_cidr="${extra_cidr_array[$i]}" + echo "==> Creating extra subnet: $subnet_name with CIDR: $subnet_cidr" + + # Only delegate pod subnets (not private endpoint subnets) + if [[ "$subnet_name" != "pe" ]]; then + az network vnet subnet create -g "$RG" \ + --vnet-name "$vnet" --name "$subnet_name" \ + --delegations Microsoft.SubnetDelegator/msfttestclients \ + --address-prefixes "$subnet_cidr" -o none + else + az network vnet subnet create -g "$RG" \ + --vnet-name "$vnet" --name "$subnet_name" \ + --address-prefixes "$subnet_cidr" -o none + fi + done +} + +delegate_subnet() { + local vnet="$1" + local subnet="$2" + local max_attempts=7 + local attempt=1 + + echo "==> Delegating subnet: $subnet in VNet: $vnet to Subnet Delegator" + subnet_id=$(az network vnet subnet show -g "$RG" --vnet-name "$vnet" -n "$subnet" --query id -o tsv) + modified_custsubnet="${subnet_id//\//%2F}" + + responseFile="delegate_response.txt" + cmd_delegator_curl="'curl -X PUT http://localhost:8080/DelegatedSubnet/$modified_custsubnet'" + cmd_containerapp_exec="az containerapp exec -n subnetdelegator-westus-u3h4j -g subnetdelegator-westus --subscription 9b8218f9-902a-4d20-a65c-e98acec5362f --command $cmd_delegator_curl" + + while [ $attempt -le $max_attempts ]; do + echo "Attempt $attempt of $max_attempts..." + + # Use script command to provide PTY for az containerapp exec + script --quiet -c "$cmd_containerapp_exec" "$responseFile" + + if grep -qF "success" "$responseFile"; then + echo "Subnet Delegator successfully registered the subnet" + rm -f "$responseFile" + return 0 + else + echo "Subnet Delegator failed to register the subnet (attempt $attempt)" + cat "$responseFile" + + if [ $attempt -lt $max_attempts ]; then + echo "Retrying in 5 seconds..." + sleep 5 + fi + fi + + ((attempt++)) + done + + echo "[ERROR] Failed to delegate subnet after $max_attempts attempts" + rm -f "$responseFile" + exit 1 +} -# A3 -az network vnet create -g "$RG" -n "$VNET_A3" --address-prefix 10.12.0.0/16 --subnet-name s1 --subnet-prefix "$A3_MAIN" -l "$LOCATION" --output none \ - && echo "Created $VNET_A3 with subnet s1" -verify_vnet "$RG" "$VNET_A3" -verify_subnet "$RG" "$VNET_A3" "s1" +# --- Loop over VNets --- +for i in "${!VNAMES[@]}"; do + VNET=${VNAMES[$i]} + VNET_CIDR=${VCIDRS[$i]} + NODE_SUBNET_CIDR=${NODE_SUBNETS[$i]} + EXTRA_SUBNETS=${EXTRA_SUBNETS_LIST[$i]} + EXTRA_SUBNET_CIDRS=${EXTRA_CIDRS_LIST[$i]} -# B1 -az network vnet create -g "$RG" -n "$VNET_B1" --address-prefix 10.20.0.0/16 --subnet-name s1 --subnet-prefix "$B1_MAIN" -l "$LOCATION" --output none \ - && echo "Created $VNET_B1 with subnet s1" -verify_vnet "$RG" "$VNET_B1" -verify_subnet "$RG" "$VNET_B1" "s1" + # Create VNet + subnets + create_vnet_subets "$VNET" "$VNET_CIDR" "$NODE_SUBNET_CIDR" "$EXTRA_SUBNETS" "$EXTRA_SUBNET_CIDRS" + verify_vnet "$VNET" + # Loop over extra subnets to verify and delegate the pod subnets. + for PODSUBNET in $EXTRA_SUBNETS; do + verify_subnet "$VNET" "$PODSUBNET" + if [[ "$PODSUBNET" != "pe" ]]; then + delegate_subnet "$VNET" "$PODSUBNET" + fi + done +done -echo " All VNets and subnets created and verified successfully." +echo "All VNets and subnets created and verified successfully." \ No newline at end of file diff --git a/.pipelines/swiftv2-long-running/template/long-running-pipeline-template.yaml b/.pipelines/swiftv2-long-running/template/long-running-pipeline-template.yaml index cc6016f17a..a7a34d0891 100644 --- a/.pipelines/swiftv2-long-running/template/long-running-pipeline-template.yaml +++ b/.pipelines/swiftv2-long-running/template/long-running-pipeline-template.yaml @@ -11,10 +11,24 @@ parameters: type: string - name: serviceConnection type: string + - name: runSetupStages + type: boolean + default: false + +variables: + - name: rgName + ${{ if eq(parameters.runSetupStages, true) }}: + value: ${{ parameters.resourceGroupName }} + ${{ else }}: + value: sv2-long-run-${{ parameters.location }} stages: + # ================================================================= + # Stage 1: AKS Cluster and Networking Setup (Conditional) + # ================================================================= - stage: AKSClusterAndNetworking displayName: "Stage: AKS Cluster and Networking Setup" + condition: eq(${{ parameters.runSetupStages }}, true) jobs: # ------------------------------------------------------------ # Job 1: Create Resource Group @@ -32,10 +46,13 @@ stages: scriptType: bash scriptLocation: inlineScript inlineScript: | - echo "==> Creating resource group ${{ parameters.resourceGroupName }} in ${{ parameters.location }}" + echo "Org: $SYSTEM_COLLECTIONURI" + echo "Project: $SYSTEM_TEAMPROJECT" + echo "==> Creating resource group $(rgName) in ${{ parameters.location }}" az group create \ - --name "${{ parameters.resourceGroupName }}" \ + --name "$(rgName)" \ --location "${{ parameters.location }}" \ + --tags SkipAutoDeleteTill=2032-12-31 \ --subscription "${{ parameters.subscriptionId }}" echo "Resource group created successfully." @@ -59,7 +76,7 @@ stages: arguments: > ${{ parameters.subscriptionId }} ${{ parameters.location }} - ${{ parameters.resourceGroupName }} + $(rgName) ${{ parameters.vmSkuDefault }} ${{ parameters.vmSkuHighNIC }} @@ -67,6 +84,7 @@ stages: # Job 3: Networking & Storage # ------------------------------------------------------------ - job: NetworkingAndStorage + timeoutInMinutes: 0 displayName: "Networking and Storage Setup" dependsOn: CreateResourceGroup pool: @@ -85,7 +103,8 @@ stages: arguments: > ${{ parameters.subscriptionId }} ${{ parameters.location }} - ${{ parameters.resourceGroupName }} + $(rgName) + $(Build.BuildId) # Task 2: Create Peerings - task: AzureCLI@2 @@ -96,7 +115,7 @@ stages: scriptLocation: scriptPath scriptPath: ".pipelines/swiftv2-long-running/scripts/create_peerings.sh" arguments: > - ${{ parameters.resourceGroupName }} + $(rgName) # Task 3: Create Storage Accounts - task: AzureCLI@2 @@ -110,7 +129,7 @@ stages: arguments: > ${{ parameters.subscriptionId }} ${{ parameters.location }} - ${{ parameters.resourceGroupName }} + $(rgName) # Task 4: Create NSG - task: AzureCLI@2 @@ -122,7 +141,7 @@ stages: scriptPath: ".pipelines/swiftv2-long-running/scripts/create_nsg.sh" arguments: > ${{ parameters.subscriptionId }} - ${{ parameters.resourceGroupName }} + $(rgName) ${{ parameters.location }} # Task 5: Create Private Endpoint @@ -136,5 +155,132 @@ stages: arguments: > ${{ parameters.subscriptionId }} ${{ parameters.location }} - ${{ parameters.resourceGroupName }} + $(rgName) $(CreateStorageAccounts.StorageAccount1) + # ================================================================= + # Stage 2: Datapath Tests + # ================================================================= + - stage: DataPathTests + displayName: "Stage: Swiftv2 Data Path Tests" + dependsOn: AKSClusterAndNetworking + condition: or(eq(${{ parameters.runSetupStages }}, false), succeeded()) + jobs: + # ------------------------------------------------------------ + # Job 1: Create Test Resources and Wait + # ------------------------------------------------------------ + - job: CreateTestResources + displayName: "Create Resources and Wait 20 Minutes" + timeoutInMinutes: 90 + pool: + vmImage: ubuntu-latest + steps: + - checkout: self + + - task: GoTool@0 + displayName: "Use Go 1.22.5" + inputs: + version: "1.22.5" + + - task: AzureCLI@2 + displayName: "Create Test Resources" + inputs: + azureSubscription: ${{ parameters.serviceConnection }} + scriptType: bash + scriptLocation: inlineScript + inlineScript: | + echo "==> Installing Ginkgo CLI" + go install github.com/onsi/ginkgo/v2/ginkgo@latest + + echo "==> Adding Go bin to PATH" + export PATH=$PATH:$(go env GOPATH)/bin + + echo "==> Downloading Go dependencies" + go mod download + + echo "==> Setting up kubeconfig for cluster aks-1" + az aks get-credentials \ + --resource-group $(rgName) \ + --name aks-1 \ + --file /tmp/aks-1.kubeconfig \ + --overwrite-existing \ + --admin + + echo "==> Setting up kubeconfig for cluster aks-2" + az aks get-credentials \ + --resource-group $(rgName) \ + --name aks-2 \ + --file /tmp/aks-2.kubeconfig \ + --overwrite-existing \ + --admin + + echo "==> Verifying cluster aks-1 connectivity" + kubectl --kubeconfig /tmp/aks-1.kubeconfig get nodes + + echo "==> Verifying cluster aks-2 connectivity" + kubectl --kubeconfig /tmp/aks-2.kubeconfig get nodes + + echo "==> Creating test resources (8 scenarios)" + export RG="$(rgName)" + export BUILD_ID="$(rgName)" + ginkgo -v -trace --timeout=1h ./test/integration/swiftv2/longRunningCluster --focus="Datapath Create" + + - script: | + echo "Waiting 20 minutes for pods to run..." + sleep 1200 + echo "Wait period complete" + displayName: "Wait 20 minutes" + + # ------------------------------------------------------------ + # Job 2: Delete Test Resources + # ------------------------------------------------------------ + - job: DeleteTestResources + displayName: "Delete PodNetwork, PNI, and Pods" + dependsOn: CreateTestResources + timeoutInMinutes: 60 + pool: + vmImage: ubuntu-latest + steps: + - checkout: self + + - task: GoTool@0 + displayName: "Use Go 1.22.5" + inputs: + version: "1.22.5" + + - task: AzureCLI@2 + displayName: "Delete Test Resources" + inputs: + azureSubscription: ${{ parameters.serviceConnection }} + scriptType: bash + scriptLocation: inlineScript + inlineScript: | + echo "==> Installing Ginkgo CLI" + go install github.com/onsi/ginkgo/v2/ginkgo@latest + + echo "==> Adding Go bin to PATH" + export PATH=$PATH:$(go env GOPATH)/bin + + echo "==> Downloading Go dependencies" + go mod download + + echo "==> Setting up kubeconfig for cluster aks-1" + az aks get-credentials \ + --resource-group $(rgName) \ + --name aks-1 \ + --file /tmp/aks-1.kubeconfig \ + --overwrite-existing \ + --admin + + echo "==> Setting up kubeconfig for cluster aks-2" + az aks get-credentials \ + --resource-group $(rgName) \ + --name aks-2 \ + --file /tmp/aks-2.kubeconfig \ + --overwrite-existing \ + --admin + + echo "==> Deleting test resources (8 scenarios)" + export RG="$(rgName)" + export BUILD_ID="$(rgName)" + ginkgo -v -trace --timeout=1h ./test/integration/swiftv2/longRunningCluster --focus="Datapath Delete" + \ No newline at end of file diff --git a/go.mod b/go.mod index bf07d7f6ac..8096f632b3 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,8 @@ module github.com/Azure/azure-container-networking -go 1.24.1 +go 1.24.0 + +toolchain go1.24.10 require ( github.com/Azure/azure-sdk-for-go/sdk/azcore v1.19.1 @@ -68,7 +70,6 @@ require ( github.com/gofrs/uuid v4.4.0+incompatible // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect - github.com/hpcloud/tail v1.0.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect @@ -104,12 +105,9 @@ require ( golang.org/x/term v0.36.0 // indirect golang.org/x/text v0.30.0 // indirect golang.org/x/time v0.14.0 - golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect - gopkg.in/fsnotify.v1 v1.4.7 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect - gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 // indirect sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect @@ -125,6 +123,7 @@ require ( github.com/cilium/cilium v1.15.16 github.com/cilium/ebpf v0.19.0 github.com/jsternberg/zap-logfmt v1.3.0 + github.com/onsi/ginkgo/v2 v2.23.4 golang.org/x/sync v0.17.0 gotest.tools/v3 v3.5.2 k8s.io/kubectl v0.34.1 @@ -147,9 +146,11 @@ require ( github.com/go-openapi/spec v0.20.11 // indirect github.com/go-openapi/strfmt v0.21.9 // indirect github.com/go-openapi/validate v0.22.3 // indirect + github.com/go-task/slim-sprig/v3 v3.0.0 // indirect github.com/go-viper/mapstructure/v2 v2.4.0 // indirect github.com/google/btree v1.1.3 // indirect github.com/google/gopacket v1.1.19 // indirect + github.com/google/pprof v0.0.0-20250630185457-6e76a2b096b5 // indirect github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 // indirect github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect github.com/kr/pretty v0.3.1 // indirect @@ -174,10 +175,12 @@ require ( go.opentelemetry.io/otel/sdk v1.38.0 // indirect go.opentelemetry.io/otel/sdk/metric v1.38.0 // indirect go.opentelemetry.io/otel/trace v1.38.0 // indirect + go.uber.org/automaxprocs v1.6.0 // indirect go.uber.org/dig v1.17.1 // indirect go.yaml.in/yaml/v2 v2.4.3 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect go4.org/netipx v0.0.0-20231129151722-fdeea329fbba // indirect + golang.org/x/tools v0.37.0 // indirect gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect sigs.k8s.io/randfill v1.0.0 // indirect sigs.k8s.io/structured-merge-diff/v6 v6.3.0 // indirect @@ -193,11 +196,6 @@ require ( k8s.io/kubelet v0.34.1 ) -replace ( - github.com/onsi/ginkgo => github.com/onsi/ginkgo v1.12.0 - github.com/onsi/gomega => github.com/onsi/gomega v1.10.0 -) - retract ( v1.16.17 // contains only retractions, new version to retract 1.15.22. v1.16.16 // contains only retractions, has to be newer than 1.16.15. diff --git a/go.sum b/go.sum index dbcced8ba9..c1ac6b2891 100644 --- a/go.sum +++ b/go.sum @@ -114,6 +114,7 @@ github.com/evanphx/json-patch/v5 v5.9.11/go.mod h1:3j+LviiESTElxA4p3EMKAB9HXj3/X github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8= github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= +github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw= github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k= github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= @@ -160,6 +161,7 @@ github.com/go-openapi/validate v0.22.3 h1:KxG9mu5HBRYbecRb37KRCihvGGtND2aXziBAv0 github.com/go-openapi/validate v0.22.3/go.mod h1:kVxh31KbfsxU8ZyoHaDbLBWU5CnMdqBUEtadQ2G4d5M= github.com/go-quicktest/qt v1.101.1-0.20240301121107-c6c8733fa1e6 h1:teYtXy9B7y5lHTp8V9KPxpYRAVA7dozigQcMiBust1s= github.com/go-quicktest/qt v1.101.1-0.20240301121107-c6c8733fa1e6/go.mod h1:p4lGIVX+8Wa6ZPNDvqcxq36XpUDLh42FLetFU7odllI= +github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE= github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= github.com/go-viper/mapstructure/v2 v2.4.0 h1:EBsztssimR/CONLSZZ04E8qAkxNYq4Qp9LvH92wZUgs= @@ -186,6 +188,7 @@ github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrU github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= +github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= @@ -224,7 +227,6 @@ github.com/hashicorp/go-version v1.7.0 h1:5tqGy27NaOTB8yJKUZELlFAS/LTKJkrmONwQKe github.com/hashicorp/go-version v1.7.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= -github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= @@ -297,16 +299,24 @@ github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRW github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f h1:y5//uYreIhSUg3J1GEMiLbxo1LJaP8RfCpH6pymGZus= github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= +github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= +github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= github.com/nxadm/tail v1.4.11 h1:8feyoE3OzPrcshW5/MJ4sGESc5cqmGkGCWlco4l0bqY= github.com/nxadm/tail v1.4.11/go.mod h1:OTaG3NK980DZzxbRq6lEuzgU+mug70nY11sMd4JXXHc= github.com/oklog/ulid v1.3.1 h1:EGfNDEx6MqHz8B3uNV6QAib1UR2Lm97sHi3ocA6ESJ4= github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U= -github.com/onsi/ginkgo v1.12.0 h1:Iw5WCbBcaAAd0fpRb1c9r5YCylv4XDoCSigm1zLevwU= -github.com/onsi/ginkgo v1.12.0/go.mod h1:oUhWkIvk5aDxtKvDDuw8gItl8pKl42LzjC9KZE0HfGg= +github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= +github.com/onsi/ginkgo v1.8.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= +github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk= +github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE= +github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU= github.com/onsi/ginkgo/v2 v2.23.4 h1:ktYTpKJAVZnDT4VjxSbiBenUjmlL/5QkBEocaWXiQus= github.com/onsi/ginkgo/v2 v2.23.4/go.mod h1:Bt66ApGPBFzHyR+JO10Zbt0Gsp4uWxu5mIOTusL46e8= -github.com/onsi/gomega v1.10.0 h1:Gwkk+PTu/nfOwNMtUB/mRUv0X7ewW5dO4AERT1ThVKo= -github.com/onsi/gomega v1.10.0/go.mod h1:Ho0h+IUsWyvy1OpqCwxlQ/21gkhVunqlU8fDGcoTdcA= +github.com/onsi/gomega v1.5.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= +github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= +github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo= +github.com/onsi/gomega v1.37.0 h1:CdEG8g0S133B4OswTDC/5XPSzE1OeP29QOioj2PID2Y= +github.com/onsi/gomega v1.37.0/go.mod h1:8D9+Txp43QWKhM24yyOBEdpkzN8FvJyAwecBgsU4KU0= github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b h1:FfH+VrHHk6Lxt9HdVS0PXzSXFyS2NbZKXv33FYPol0A= github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b/go.mod h1:AC62GU6hc0BrNm+9RK9VSiwa/EUe1bkIeFORAMcHvJU= github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc= @@ -325,6 +335,8 @@ github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRI github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c h1:ncq/mPwQF4JjgDlrVEn3C11VoGHZN7m8qihwgMEtzYw= github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= +github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g= +github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U= github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o= github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= @@ -367,6 +379,7 @@ github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpE github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= @@ -465,6 +478,7 @@ golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= @@ -489,11 +503,15 @@ golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -532,6 +550,7 @@ golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBn golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= @@ -541,8 +560,6 @@ golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8T golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 h1:H2TDz8ibqkAF6YGhCdN3jS9O0/s90v0rJh3X/OLHEUk= -golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8= gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw= gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= @@ -582,7 +599,6 @@ gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntN gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4= gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= -gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= @@ -590,8 +606,10 @@ gopkg.in/natefinch/lumberjack.v2 v2.2.1 h1:bBRl1b0OH9s/DuPhuXpNl+VtCaJXFZ5/uEFST gopkg.in/natefinch/lumberjack.v2 v2.2.1/go.mod h1:YD8tP3GAjkrDg1eZH7EGmyESg/lsYskCTPBJVb9jqSc= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= +gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/hack/aks/Makefile b/hack/aks/Makefile index 5e1c8f3f9b..3b31345ec5 100644 --- a/hack/aks/Makefile +++ b/hack/aks/Makefile @@ -29,6 +29,7 @@ PUBLIC_IPv6 ?= $(PUBLIC_IP_ID)/$(IP_PREFIX)-$(CLUSTER)-v6 KUBE_PROXY_JSON_PATH ?= ./kube-proxy.json LTS ?= false + # overrideable variables SUB ?= $(AZURE_SUBSCRIPTION) CLUSTER ?= $(USER)-$(REGION) @@ -280,22 +281,22 @@ swiftv2-dummy-cluster-up: rg-up ipv4 swift-net-up ## Bring up a SWIFT AzCNI clus --network-plugin azure \ --vnet-subnet-id /subscriptions/$(SUB)/resourceGroups/$(GROUP)/providers/Microsoft.Network/virtualNetworks/$(VNET)/subnets/nodenet \ --pod-subnet-id /subscriptions/$(SUB)/resourceGroups/$(GROUP)/providers/Microsoft.Network/virtualNetworks/$(VNET)/subnets/podnet \ + --tags stampcreatorserviceinfo=true \ --load-balancer-outbound-ips $(PUBLIC_IPv4) \ --no-ssh-key \ --yes @$(MAKE) set-kubeconf swiftv2-podsubnet-cluster-up: ipv4 swift-net-up ## Bring up a SWIFTv2 PodSubnet cluster - $(COMMON_AKS_FIELDS) + $(COMMON_AKS_FIELDS) \ --network-plugin azure \ - --nodepool-name nodepool1 \ - --load-balancer-outbound-ips $(PUBLIC_IPv4) \ + --node-vm-size $(VM_SIZE) \ --vnet-subnet-id /subscriptions/$(SUB)/resourceGroups/$(GROUP)/providers/Microsoft.Network/virtualNetworks/$(VNET)/subnets/nodenet \ --pod-subnet-id /subscriptions/$(SUB)/resourceGroups/$(GROUP)/providers/Microsoft.Network/virtualNetworks/$(VNET)/subnets/podnet \ - --service-cidr "10.0.0.0/16" \ - --dns-service-ip "10.0.0.10" \ - --tags fastpathenabled=true RGOwner=LongRunningTestPipelines stampcreatorserviceinfo=true \ + --nodepool-tags fastpathenabled=true aks-nic-enable-multi-tenancy=true \ + --tags stampcreatorserviceinfo=true \ --aks-custom-headers AKSHTTPCustomFeatures=Microsoft.ContainerService/NetworkingMultiTenancyPreview \ + --load-balancer-outbound-ips $(PUBLIC_IPv4) \ --yes @$(MAKE) set-kubeconf @@ -446,7 +447,7 @@ linux-swiftv2-nodepool-up: ## Add linux node pool to swiftv2 cluster --os-type Linux \ --max-pods 250 \ --subscription $(SUB) \ - --tags fastpathenabled=true,aks-nic-enable-multi-tenancy=true \ + --tags fastpathenabled=true aks-nic-enable-multi-tenancy=true stampcreatorserviceinfo=true\ --aks-custom-headers AKSHTTPCustomFeatures=Microsoft.ContainerService/NetworkingMultiTenancyPreview \ --pod-subnet-id /subscriptions/$(SUB)/resourceGroups/$(GROUP)/providers/Microsoft.Network/virtualNetworks/$(VNET)/subnets/podnet diff --git a/test/integration/manifests/swiftv2/long-running-cluster/pod.yaml b/test/integration/manifests/swiftv2/long-running-cluster/pod.yaml new file mode 100644 index 0000000000..e2c33f2d8f --- /dev/null +++ b/test/integration/manifests/swiftv2/long-running-cluster/pod.yaml @@ -0,0 +1,33 @@ +apiVersion: v1 +kind: Pod +metadata: + name: {{ .PodName }} + namespace: {{ .Namespace }} + labels: + kubernetes.azure.com/pod-network-instance: {{ .PNIName }} + kubernetes.azure.com/pod-network: {{ .PNName }} +spec: + nodeSelector: + kubernetes.io/hostname: {{ .NodeName }} + containers: + - name: net-debugger + image: {{ .Image }} + command: ["/bin/sh", "-c"] + args: + - | + echo "Pod Network Diagnostics started on $(hostname)"; + while true; do + ip addr show + ip route show + sleep 60 + done + resources: + limits: + cpu: 300m + memory: 600Mi + requests: + cpu: 300m + memory: 600Mi + securityContext: + privileged: true + restartPolicy: Always diff --git a/test/integration/manifests/swiftv2/long-running-cluster/podnetwork.yaml b/test/integration/manifests/swiftv2/long-running-cluster/podnetwork.yaml new file mode 100644 index 0000000000..25a7491d90 --- /dev/null +++ b/test/integration/manifests/swiftv2/long-running-cluster/podnetwork.yaml @@ -0,0 +1,15 @@ +apiVersion: multitenancy.acn.azure.com/v1alpha1 +kind: PodNetwork +metadata: + name: {{ .PNName }} +{{- if .SubnetToken }} + labels: + kubernetes.azure.com/override-subnet-token: "{{ .SubnetToken }}" +{{- end }} +spec: + networkID: "{{ .VnetGUID }}" +{{- if not .SubnetToken }} + subnetGUID: "{{ .SubnetGUID }}" +{{- end }} + subnetResourceID: "{{ .SubnetARMID }}" + deviceType: acn.azure.com/vnet-nic diff --git a/test/integration/manifests/swiftv2/long-running-cluster/podnetworkinstance.yaml b/test/integration/manifests/swiftv2/long-running-cluster/podnetworkinstance.yaml new file mode 100644 index 0000000000..4d1f8ca384 --- /dev/null +++ b/test/integration/manifests/swiftv2/long-running-cluster/podnetworkinstance.yaml @@ -0,0 +1,13 @@ +apiVersion: multitenancy.acn.azure.com/v1alpha1 +kind: PodNetworkInstance +metadata: + name: {{ .PNIName }} + namespace: {{ .Namespace }} +spec: + podNetworkConfigs: + - podNetwork: {{ .PNName }} + {{- if eq .Type "explicit" }} + podIPReservationSize: {{ .Reservations }} + {{- else }} + podIPReservationSize: 1 + {{- end }} diff --git a/test/integration/swiftv2/helpers/az_helpers.go b/test/integration/swiftv2/helpers/az_helpers.go new file mode 100644 index 0000000000..148a8b34d9 --- /dev/null +++ b/test/integration/swiftv2/helpers/az_helpers.go @@ -0,0 +1,286 @@ +package helpers + +import ( + "context" + "fmt" + "os/exec" + "strings" + "time" +) + +func runAzCommand(cmd string, args ...string) (string, error) { + out, err := exec.Command(cmd, args...).CombinedOutput() + if err != nil { + return "", fmt.Errorf("failed to run %s %v: %w\nOutput: %s", cmd, args, err, string(out)) + } + return strings.TrimSpace(string(out)), nil +} + +func GetVnetGUID(rg, vnet string) (string, error) { + return runAzCommand("az", "network", "vnet", "show", "--resource-group", rg, "--name", vnet, "--query", "resourceGuid", "-o", "tsv") +} + +func GetSubnetARMID(rg, vnet, subnet string) (string, error) { + return runAzCommand("az", "network", "vnet", "subnet", "show", "--resource-group", rg, "--vnet-name", vnet, "--name", subnet, "--query", "id", "-o", "tsv") +} + +func GetSubnetGUID(rg, vnet, subnet string) (string, error) { + subnetID, err := GetSubnetARMID(rg, vnet, subnet) + if err != nil { + return "", err + } + return runAzCommand("az", "resource", "show", "--ids", subnetID, "--api-version", "2023-09-01", "--query", "properties.serviceAssociationLinks[0].properties.subnetId", "-o", "tsv") +} + +func GetSubnetToken(rg, vnet, subnet string) (string, error) { + // Optionally implement if you use subnet token override + return "", nil +} + +// GetClusterNodes returns a slice of node names from a cluster using the given kubeconfig +func GetClusterNodes(kubeconfig string) ([]string, error) { + cmd := exec.Command("kubectl", "--kubeconfig", kubeconfig, "get", "nodes", "-o", "name") + out, err := cmd.CombinedOutput() + if err != nil { + return nil, fmt.Errorf("failed to get nodes using kubeconfig %s: %w\nOutput: %s", kubeconfig, err, string(out)) + } + + lines := strings.Split(strings.TrimSpace(string(out)), "\n") + nodes := make([]string, 0, len(lines)) + + for _, line := range lines { + // kubectl returns "node/", we strip the prefix + if strings.HasPrefix(line, "node/") { + nodes = append(nodes, strings.TrimPrefix(line, "node/")) + } + } + return nodes, nil +} + +// EnsureNamespaceExists checks if a namespace exists and creates it if it doesn't +func EnsureNamespaceExists(kubeconfig, namespace string) error { + cmd := exec.Command("kubectl", "--kubeconfig", kubeconfig, "get", "namespace", namespace) + err := cmd.Run() + + if err == nil { + return nil // Namespace exists + } + + // Namespace doesn't exist, create it + cmd = exec.Command("kubectl", "--kubeconfig", kubeconfig, "create", "namespace", namespace) + out, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("failed to create namespace %s: %s\n%s", namespace, err, string(out)) + } + + return nil +} + +// DeletePod deletes a pod in the specified namespace and waits for it to be fully removed +func DeletePod(kubeconfig, namespace, podName string) error { + fmt.Printf("Deleting pod %s in namespace %s...\n", podName, namespace) + + // Initiate pod deletion with context timeout + ctx, cancel := context.WithTimeout(context.Background(), 90*time.Second) + defer cancel() + + cmd := exec.CommandContext(ctx, "kubectl", "--kubeconfig", kubeconfig, "delete", "pod", podName, "-n", namespace, "--ignore-not-found=true") + out, err := cmd.CombinedOutput() + if err != nil { + if ctx.Err() == context.DeadlineExceeded { + fmt.Printf("kubectl delete pod command timed out after 90s, attempting force delete...\n") + } else { + return fmt.Errorf("failed to delete pod %s in namespace %s: %s\n%s", podName, namespace, err, string(out)) + } + } + + // Wait for pod to be completely gone (critical for IP release) + fmt.Printf("Waiting for pod %s to be fully removed...\n", podName) + for attempt := 1; attempt <= 30; attempt++ { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + checkCmd := exec.CommandContext(ctx, "kubectl", "--kubeconfig", kubeconfig, "get", "pod", podName, "-n", namespace, "--ignore-not-found=true", "-o", "name") + checkOut, _ := checkCmd.CombinedOutput() + cancel() + + if strings.TrimSpace(string(checkOut)) == "" { + fmt.Printf("Pod %s fully removed after %d seconds\n", podName, attempt*2) + // Extra wait to ensure IP reservation is released in DNC + time.Sleep(5 * time.Second) + return nil + } + + if attempt%5 == 0 { + fmt.Printf("Pod %s still terminating (attempt %d/30)...\n", podName, attempt) + } + time.Sleep(2 * time.Second) + } + + // If pod still exists after 60 seconds, force delete + fmt.Printf("Pod %s still exists after 60s, attempting force delete...\n", podName) + ctx, cancel = context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + forceCmd := exec.CommandContext(ctx, "kubectl", "--kubeconfig", kubeconfig, "delete", "pod", podName, "-n", namespace, "--grace-period=0", "--force", "--ignore-not-found=true") + forceOut, forceErr := forceCmd.CombinedOutput() + if forceErr != nil { + fmt.Printf("Warning: Force delete failed: %s\n%s\n", forceErr, string(forceOut)) + } + + // Wait a bit more for force delete to complete + time.Sleep(10 * time.Second) + fmt.Printf("Pod %s deletion completed (may have required force)\n", podName) + return nil +} + +// DeletePodNetworkInstance deletes a PodNetworkInstance and waits for it to be removed +func DeletePodNetworkInstance(kubeconfig, namespace, pniName string) error { + fmt.Printf("Deleting PodNetworkInstance %s in namespace %s...\n", pniName, namespace) + + // Initiate PNI deletion + cmd := exec.Command("kubectl", "--kubeconfig", kubeconfig, "delete", "podnetworkinstance", pniName, "-n", namespace, "--ignore-not-found=true") + out, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("failed to delete PodNetworkInstance %s: %s\n%s", pniName, err, string(out)) + } + + // Wait for PNI to be completely gone (it may take time for DNC to release reservations) + fmt.Printf("Waiting for PodNetworkInstance %s to be fully removed...\n", pniName) + for attempt := 1; attempt <= 60; attempt++ { + checkCmd := exec.Command("kubectl", "--kubeconfig", kubeconfig, "get", "podnetworkinstance", pniName, "-n", namespace, "--ignore-not-found=true", "-o", "name") + checkOut, _ := checkCmd.CombinedOutput() + + if strings.TrimSpace(string(checkOut)) == "" { + fmt.Printf("PodNetworkInstance %s fully removed after %d seconds\n", pniName, attempt*2) + return nil + } + + if attempt%10 == 0 { + // Check for ReservationInUse errors + descCmd := exec.Command("kubectl", "--kubeconfig", kubeconfig, "describe", "podnetworkinstance", pniName, "-n", namespace) + descOut, _ := descCmd.CombinedOutput() + descStr := string(descOut) + + if strings.Contains(descStr, "ReservationInUse") { + fmt.Printf("PNI %s still has active reservations (attempt %d/60). Waiting for DNC to release...\n", pniName, attempt) + } else { + fmt.Printf("PNI %s still terminating (attempt %d/60)...\n", pniName, attempt) + } + } + time.Sleep(2 * time.Second) + } + + // If PNI still exists after 120 seconds, try to remove finalizers + fmt.Printf("PNI %s still exists after 120s, attempting to remove finalizers...\n", pniName) + patchCmd := exec.Command("kubectl", "--kubeconfig", kubeconfig, "patch", "podnetworkinstance", pniName, "-n", namespace, "-p", `{"metadata":{"finalizers":[]}}`, "--type=merge") + patchOut, patchErr := patchCmd.CombinedOutput() + if patchErr != nil { + fmt.Printf("Warning: Failed to remove finalizers: %s\n%s\n", patchErr, string(patchOut)) + } else { + fmt.Printf("Finalizers removed, waiting for deletion...\n") + time.Sleep(5 * time.Second) + } + + fmt.Printf("PodNetworkInstance %s deletion completed\n", pniName) + return nil +} + +// DeletePodNetwork deletes a PodNetwork and waits for it to be removed +func DeletePodNetwork(kubeconfig, pnName string) error { + fmt.Printf("Deleting PodNetwork %s...\n", pnName) + + cmd := exec.Command("kubectl", "--kubeconfig", kubeconfig, "delete", "podnetwork", pnName, "--ignore-not-found=true") + out, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("failed to delete PodNetwork %s: %s\n%s", pnName, err, string(out)) + } + + // Wait for PN to be completely gone + fmt.Printf("Waiting for PodNetwork %s to be fully removed...\n", pnName) + for attempt := 1; attempt <= 30; attempt++ { + checkCmd := exec.Command("kubectl", "--kubeconfig", kubeconfig, "get", "podnetwork", pnName, "--ignore-not-found=true", "-o", "name") + checkOut, _ := checkCmd.CombinedOutput() + + if strings.TrimSpace(string(checkOut)) == "" { + fmt.Printf("PodNetwork %s fully removed after %d seconds\n", pnName, attempt*2) + return nil + } + + if attempt%10 == 0 { + fmt.Printf("PodNetwork %s still terminating (attempt %d/30)...\n", pnName, attempt) + } + time.Sleep(2 * time.Second) + } + + // Try to remove finalizers if still stuck + fmt.Printf("PodNetwork %s still exists, attempting to remove finalizers...\n", pnName) + patchCmd := exec.Command("kubectl", "--kubeconfig", kubeconfig, "patch", "podnetwork", pnName, "-p", `{"metadata":{"finalizers":[]}}`, "--type=merge") + patchOut, patchErr := patchCmd.CombinedOutput() + if patchErr != nil { + fmt.Printf("Warning: Failed to remove finalizers: %s\n%s\n", patchErr, string(patchOut)) + } + + time.Sleep(5 * time.Second) + fmt.Printf("PodNetwork %s deletion completed\n", pnName) + return nil +} + +// DeleteNamespace deletes a namespace and waits for it to be removed +func DeleteNamespace(kubeconfig, namespace string) error { + fmt.Printf("Deleting namespace %s...\n", namespace) + + cmd := exec.Command("kubectl", "--kubeconfig", kubeconfig, "delete", "namespace", namespace, "--ignore-not-found=true") + out, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("failed to delete namespace %s: %s\n%s", namespace, err, string(out)) + } + + // Wait for namespace to be completely gone + fmt.Printf("Waiting for namespace %s to be fully removed...\n", namespace) + for attempt := 1; attempt <= 60; attempt++ { + checkCmd := exec.Command("kubectl", "--kubeconfig", kubeconfig, "get", "namespace", namespace, "--ignore-not-found=true", "-o", "name") + checkOut, _ := checkCmd.CombinedOutput() + + if strings.TrimSpace(string(checkOut)) == "" { + fmt.Printf("Namespace %s fully removed after %d seconds\n", namespace, attempt*2) + return nil + } + + if attempt%15 == 0 { + fmt.Printf("Namespace %s still terminating (attempt %d/60)...\n", namespace, attempt) + } + time.Sleep(2 * time.Second) + } + + // Try to remove finalizers if still stuck + fmt.Printf("Namespace %s still exists, attempting to remove finalizers...\n", namespace) + patchCmd := exec.Command("kubectl", "--kubeconfig", kubeconfig, "patch", "namespace", namespace, "-p", `{"metadata":{"finalizers":[]}}`, "--type=merge") + patchOut, patchErr := patchCmd.CombinedOutput() + if patchErr != nil { + fmt.Printf("Warning: Failed to remove finalizers: %s\n%s\n", patchErr, string(patchOut)) + } + + time.Sleep(5 * time.Second) + fmt.Printf("Namespace %s deletion completed\n", namespace) + return nil +} + +// WaitForPodRunning waits for a pod to reach Running state with retries +func WaitForPodRunning(kubeconfig, namespace, podName string, maxRetries, sleepSeconds int) error { + for attempt := 1; attempt <= maxRetries; attempt++ { + cmd := exec.Command("kubectl", "--kubeconfig", kubeconfig, "get", "pod", podName, "-n", namespace, "-o", "jsonpath={.status.phase}") + out, err := cmd.CombinedOutput() + + if err == nil && strings.TrimSpace(string(out)) == "Running" { + fmt.Printf("Pod %s is now Running\n", podName) + return nil + } + + if attempt < maxRetries { + fmt.Printf("Pod %s not running yet (attempt %d/%d), status: %s. Waiting %d seconds...\n", + podName, attempt, maxRetries, strings.TrimSpace(string(out)), sleepSeconds) + time.Sleep(time.Duration(sleepSeconds) * time.Second) + } + } + + return fmt.Errorf("pod %s did not reach Running state after %d attempts", podName, maxRetries) +} diff --git a/test/integration/swiftv2/longRunningCluster/datapath.go b/test/integration/swiftv2/longRunningCluster/datapath.go new file mode 100644 index 0000000000..46ccd1d983 --- /dev/null +++ b/test/integration/swiftv2/longRunningCluster/datapath.go @@ -0,0 +1,582 @@ +package longRunningCluster + +import ( + "bytes" + "fmt" + "os/exec" + "strings" + "text/template" + + "github.com/Azure/azure-container-networking/test/integration/swiftv2/helpers" +) + +func applyTemplate(templatePath string, data interface{}, kubeconfig string) error { + tmpl, err := template.ParseFiles(templatePath) + if err != nil { + return err + } + + var buf bytes.Buffer + if err := tmpl.Execute(&buf, data); err != nil { + return err + } + + cmd := exec.Command("kubectl", "--kubeconfig", kubeconfig, "apply", "-f", "-") + cmd.Stdin = &buf + out, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("kubectl apply failed: %s\n%s", err, string(out)) + } + + fmt.Println(string(out)) + return nil +} + +// ------------------------- +// PodNetwork +// ------------------------- +type PodNetworkData struct { + PNName string + VnetGUID string + SubnetGUID string + SubnetARMID string + SubnetToken string +} + +func CreatePodNetwork(kubeconfig string, data PodNetworkData, templatePath string) error { + return applyTemplate(templatePath, data, kubeconfig) +} + +// ------------------------- +// PodNetworkInstance +// ------------------------- +type PNIData struct { + PNIName string + PNName string + Namespace string + Type string + Reservations int +} + +func CreatePodNetworkInstance(kubeconfig string, data PNIData, templatePath string) error { + return applyTemplate(templatePath, data, kubeconfig) +} + +// ------------------------- +// Pod +// ------------------------- +type PodData struct { + PodName string + NodeName string + OS string + PNName string + PNIName string + Namespace string + Image string +} + +func CreatePod(kubeconfig string, data PodData, templatePath string) error { + return applyTemplate(templatePath, data, kubeconfig) +} + +// ------------------------- +// High-level orchestration +// ------------------------- + +// TestResources holds all the configuration needed for creating test resources +type TestResources struct { + Kubeconfig string + PNName string + PNIName string + VnetGUID string + SubnetGUID string + SubnetARMID string + SubnetToken string + PodNetworkTemplate string + PNITemplate string + PodTemplate string + PodImage string +} + +// PodScenario defines a single pod creation scenario +type PodScenario struct { + Name string // Descriptive name for the scenario + Cluster string // "aks-1" or "aks-2" + VnetName string // e.g., "cx_vnet_a1", "cx_vnet_b1" + SubnetName string // e.g., "s1", "s2" + NodeSelector string // "low-nic" or "high-nic" + PodNameSuffix string // Unique suffix for pod name +} + +// TestScenarios holds all pod scenarios to test +type TestScenarios struct { + ResourceGroup string + BuildID string + PodImage string + Scenarios []PodScenario + VnetSubnetCache map[string]VnetSubnetInfo // Cache for vnet/subnet info + UsedNodes map[string]bool // Tracks which nodes are already used (one pod per node for low-NIC) +} + +// VnetSubnetInfo holds network information for a vnet/subnet combination +type VnetSubnetInfo struct { + VnetGUID string + SubnetGUID string + SubnetARMID string + SubnetToken string +} + +// NodePoolInfo holds information about nodes in different pools +type NodePoolInfo struct { + LowNicNodes []string + HighNicNodes []string +} + +// GetNodesByNicCount categorizes nodes by NIC count based on node pool labels +func GetNodesByNicCount(kubeconfig string) (NodePoolInfo, error) { + nodeInfo := NodePoolInfo{ + LowNicNodes: []string{}, + HighNicNodes: []string{}, + } + + // Get nodes from default node pool (low-NIC nodes) + cmd := exec.Command("kubectl", "--kubeconfig", kubeconfig, "get", "nodes", + "-l", "agentpool!=nplinux", "-o", "name") + out, err := cmd.CombinedOutput() + if err != nil { + return NodePoolInfo{}, fmt.Errorf("failed to get default pool nodes: %w\nOutput: %s", err, string(out)) + } + + lines := strings.Split(strings.TrimSpace(string(out)), "\n") + for _, line := range lines { + if strings.HasPrefix(line, "node/") { + nodeInfo.LowNicNodes = append(nodeInfo.LowNicNodes, strings.TrimPrefix(line, "node/")) + } + } + + // Get nodes from nplinux node pool (high-NIC nodes) + cmd = exec.Command("kubectl", "--kubeconfig", kubeconfig, "get", "nodes", + "-l", "agentpool=nplinux", "-o", "name") + out, err = cmd.CombinedOutput() + if err != nil { + return NodePoolInfo{}, fmt.Errorf("failed to get nplinux pool nodes: %w\nOutput: %s", err, string(out)) + } + + lines = strings.Split(strings.TrimSpace(string(out)), "\n") + for _, line := range lines { + if line != "" && strings.HasPrefix(line, "node/") { + nodeInfo.HighNicNodes = append(nodeInfo.HighNicNodes, strings.TrimPrefix(line, "node/")) + } + } + + fmt.Printf("Found %d low-NIC nodes (default pool) and %d high-NIC nodes (nplinux pool)\n", + len(nodeInfo.LowNicNodes), len(nodeInfo.HighNicNodes)) + + return nodeInfo, nil +} + +// CreatePodNetworkResource creates a PodNetwork +func CreatePodNetworkResource(resources TestResources) error { + err := CreatePodNetwork(resources.Kubeconfig, PodNetworkData{ + PNName: resources.PNName, + VnetGUID: resources.VnetGUID, + SubnetGUID: resources.SubnetGUID, + SubnetARMID: resources.SubnetARMID, + SubnetToken: resources.SubnetToken, + }, resources.PodNetworkTemplate) + if err != nil { + return fmt.Errorf("failed to create PodNetwork: %w", err) + } + return nil +} + +// CreateNamespaceResource creates a namespace +func CreateNamespaceResource(kubeconfig, namespace string) error { + err := helpers.EnsureNamespaceExists(kubeconfig, namespace) + if err != nil { + return fmt.Errorf("failed to create namespace: %w", err) + } + return nil +} + +// CreatePodNetworkInstanceResource creates a PodNetworkInstance +func CreatePodNetworkInstanceResource(resources TestResources) error { + err := CreatePodNetworkInstance(resources.Kubeconfig, PNIData{ + PNIName: resources.PNIName, + PNName: resources.PNName, + Namespace: resources.PNName, + Type: "explicit", + Reservations: 2, + }, resources.PNITemplate) + if err != nil { + return fmt.Errorf("failed to create PodNetworkInstance: %w", err) + } + return nil +} + +// CreatePodResource creates a single pod on a specified node and waits for it to be running +func CreatePodResource(resources TestResources, podName, nodeName string) error { + err := CreatePod(resources.Kubeconfig, PodData{ + PodName: podName, + NodeName: nodeName, + OS: "linux", + PNName: resources.PNName, + PNIName: resources.PNIName, + Namespace: resources.PNName, + Image: resources.PodImage, + }, resources.PodTemplate) + if err != nil { + return fmt.Errorf("failed to create pod %s: %w", podName, err) + } + + // Wait for pod to be running with retries + err = helpers.WaitForPodRunning(resources.Kubeconfig, resources.PNName, podName, 10, 30) + if err != nil { + return fmt.Errorf("pod %s did not reach running state: %w", podName, err) + } + + return nil +} + +// GetOrFetchVnetSubnetInfo retrieves cached network info or fetches it from Azure +func GetOrFetchVnetSubnetInfo(rg, vnetName, subnetName string, cache map[string]VnetSubnetInfo) (VnetSubnetInfo, error) { + key := fmt.Sprintf("%s/%s", vnetName, subnetName) + + if info, exists := cache[key]; exists { + return info, nil + } + + // Fetch from Azure + vnetGUID, err := helpers.GetVnetGUID(rg, vnetName) + if err != nil { + return VnetSubnetInfo{}, fmt.Errorf("failed to get VNet GUID: %w", err) + } + + subnetGUID, err := helpers.GetSubnetGUID(rg, vnetName, subnetName) + if err != nil { + return VnetSubnetInfo{}, fmt.Errorf("failed to get Subnet GUID: %w", err) + } + + subnetARMID, err := helpers.GetSubnetARMID(rg, vnetName, subnetName) + if err != nil { + return VnetSubnetInfo{}, fmt.Errorf("failed to get Subnet ARM ID: %w", err) + } + + subnetToken, err := helpers.GetSubnetToken(rg, vnetName, subnetName) + if err != nil { + return VnetSubnetInfo{}, fmt.Errorf("failed to get Subnet Token: %w", err) + } + + info := VnetSubnetInfo{ + VnetGUID: vnetGUID, + SubnetGUID: subnetGUID, + SubnetARMID: subnetARMID, + SubnetToken: subnetToken, + } + + cache[key] = info + return info, nil +} + +// CreateScenarioResources creates all resources for a specific pod scenario +func CreateScenarioResources(scenario PodScenario, testScenarios TestScenarios) error { + // Get kubeconfig for the cluster + kubeconfig := fmt.Sprintf("/tmp/%s.kubeconfig", scenario.Cluster) + + // Get network info + netInfo, err := GetOrFetchVnetSubnetInfo(testScenarios.ResourceGroup, scenario.VnetName, scenario.SubnetName, testScenarios.VnetSubnetCache) + if err != nil { + return fmt.Errorf("failed to get network info for %s/%s: %w", scenario.VnetName, scenario.SubnetName, err) + } + + // Create unique names for this scenario (simplify vnet name and make K8s compatible) + // Remove "cx_vnet_" prefix and replace underscores with hyphens + vnetShort := strings.TrimPrefix(scenario.VnetName, "cx_vnet_") + vnetShort = strings.ReplaceAll(vnetShort, "_", "-") + subnetNameSafe := strings.ReplaceAll(scenario.SubnetName, "_", "-") + pnName := fmt.Sprintf("pn-%s-%s-%s", testScenarios.BuildID, vnetShort, subnetNameSafe) + pniName := fmt.Sprintf("pni-%s-%s-%s", testScenarios.BuildID, vnetShort, subnetNameSafe) + + resources := TestResources{ + Kubeconfig: kubeconfig, + PNName: pnName, + PNIName: pniName, + VnetGUID: netInfo.VnetGUID, + SubnetGUID: netInfo.SubnetGUID, + SubnetARMID: netInfo.SubnetARMID, + SubnetToken: netInfo.SubnetToken, + PodNetworkTemplate: "../../manifests/swiftv2/long-running-cluster/podnetwork.yaml", + PNITemplate: "../../manifests/swiftv2/long-running-cluster/podnetworkinstance.yaml", + PodTemplate: "../../manifests/swiftv2/long-running-cluster/pod.yaml", + PodImage: testScenarios.PodImage, + } + + // Step 1: Create PodNetwork + err = CreatePodNetworkResource(resources) + if err != nil { + return fmt.Errorf("scenario %s: %w", scenario.Name, err) + } + + // Step 2: Create namespace + err = CreateNamespaceResource(resources.Kubeconfig, resources.PNName) + if err != nil { + return fmt.Errorf("scenario %s: %w", scenario.Name, err) + } + + // Step 3: Create PodNetworkInstance + err = CreatePodNetworkInstanceResource(resources) + if err != nil { + return fmt.Errorf("scenario %s: %w", scenario.Name, err) + } + + // Step 4: Get nodes by NIC count + nodeInfo, err := GetNodesByNicCount(kubeconfig) + if err != nil { + return fmt.Errorf("scenario %s: failed to get nodes: %w", scenario.Name, err) + } + + // Step 5: Select appropriate node based on scenario + var targetNode string + + // Initialize used nodes tracker if not exists + if testScenarios.UsedNodes == nil { + testScenarios.UsedNodes = make(map[string]bool) + } + + if scenario.NodeSelector == "low-nic" { + if len(nodeInfo.LowNicNodes) == 0 { + return fmt.Errorf("scenario %s: no low-NIC nodes available", scenario.Name) + } + // Find first unused node in the pool (low-NIC nodes can only handle one pod) + targetNode = "" + for _, node := range nodeInfo.LowNicNodes { + if !testScenarios.UsedNodes[node] { + targetNode = node + testScenarios.UsedNodes[node] = true + break + } + } + if targetNode == "" { + return fmt.Errorf("scenario %s: all low-NIC nodes already in use", scenario.Name) + } + } else { // "high-nic" + if len(nodeInfo.HighNicNodes) == 0 { + return fmt.Errorf("scenario %s: no high-NIC nodes available", scenario.Name) + } + // Find first unused node in the pool + targetNode = "" + for _, node := range nodeInfo.HighNicNodes { + if !testScenarios.UsedNodes[node] { + targetNode = node + testScenarios.UsedNodes[node] = true + break + } + } + if targetNode == "" { + return fmt.Errorf("scenario %s: all high-NIC nodes already in use", scenario.Name) + } + } + + // Step 6: Create pod + podName := fmt.Sprintf("pod-%s", scenario.PodNameSuffix) + err = CreatePodResource(resources, podName, targetNode) + if err != nil { + return fmt.Errorf("scenario %s: %w", scenario.Name, err) + } + + fmt.Printf("Successfully created scenario: %s (pod: %s on node: %s)\n", scenario.Name, podName, targetNode) + return nil +} + +// DeleteScenarioResources deletes all resources for a specific pod scenario +func DeleteScenarioResources(scenario PodScenario, buildID string) error { + kubeconfig := fmt.Sprintf("/tmp/%s.kubeconfig", scenario.Cluster) + + // Create same names as creation (simplify vnet name and make K8s compatible) + // Remove "cx_vnet_" prefix and replace underscores with hyphens + vnetShort := strings.TrimPrefix(scenario.VnetName, "cx_vnet_") + vnetShort = strings.ReplaceAll(vnetShort, "_", "-") + subnetNameSafe := strings.ReplaceAll(scenario.SubnetName, "_", "-") + pnName := fmt.Sprintf("pn-%s-%s-%s", buildID, vnetShort, subnetNameSafe) + pniName := fmt.Sprintf("pni-%s-%s-%s", buildID, vnetShort, subnetNameSafe) + podName := fmt.Sprintf("pod-%s", scenario.PodNameSuffix) + + // Delete pod + err := helpers.DeletePod(kubeconfig, pnName, podName) + if err != nil { + return fmt.Errorf("scenario %s: failed to delete pod: %w", scenario.Name, err) + } + + // Delete PodNetworkInstance + err = helpers.DeletePodNetworkInstance(kubeconfig, pnName, pniName) + if err != nil { + return fmt.Errorf("scenario %s: failed to delete PNI: %w", scenario.Name, err) + } + + // Delete PodNetwork + err = helpers.DeletePodNetwork(kubeconfig, pnName) + if err != nil { + return fmt.Errorf("scenario %s: failed to delete PN: %w", scenario.Name, err) + } + + // Delete namespace + err = helpers.DeleteNamespace(kubeconfig, pnName) + if err != nil { + return fmt.Errorf("scenario %s: failed to delete namespace: %w", scenario.Name, err) + } + + fmt.Printf("Successfully deleted scenario: %s\n", scenario.Name) + return nil +} + +// CreateAllScenarios creates resources for all test scenarios +func CreateAllScenarios(testScenarios TestScenarios) error { + for _, scenario := range testScenarios.Scenarios { + fmt.Printf("\n=== Creating scenario: %s ===\n", scenario.Name) + err := CreateScenarioResources(scenario, testScenarios) + if err != nil { + return err + } + } + return nil +} + +// DeleteAllScenarios deletes resources for all test scenarios +// Strategy: Delete all pods first, then delete shared PNI/PN/Namespace resources +func DeleteAllScenarios(testScenarios TestScenarios) error { + // Phase 1: Delete all pods first + fmt.Printf("\n=== Phase 1: Deleting all pods ===\n") + for _, scenario := range testScenarios.Scenarios { + kubeconfig := fmt.Sprintf("/tmp/%s.kubeconfig", scenario.Cluster) + vnetShort := strings.TrimPrefix(scenario.VnetName, "cx_vnet_") + vnetShort = strings.ReplaceAll(vnetShort, "_", "-") + subnetNameSafe := strings.ReplaceAll(scenario.SubnetName, "_", "-") + pnName := fmt.Sprintf("pn-%s-%s-%s", testScenarios.BuildID, vnetShort, subnetNameSafe) + podName := fmt.Sprintf("pod-%s", scenario.PodNameSuffix) + + fmt.Printf("Deleting pod for scenario: %s\n", scenario.Name) + err := helpers.DeletePod(kubeconfig, pnName, podName) + if err != nil { + fmt.Printf("Warning: Failed to delete pod for scenario %s: %v\n", scenario.Name, err) + } + } + + // Phase 2: Delete shared PNI/PN/Namespace resources (grouped by vnet/subnet/cluster) + fmt.Printf("\n=== Phase 2: Deleting shared PNI/PN/Namespace resources ===\n") + resourceGroups := make(map[string]bool) + + for _, scenario := range testScenarios.Scenarios { + kubeconfig := fmt.Sprintf("/tmp/%s.kubeconfig", scenario.Cluster) + vnetShort := strings.TrimPrefix(scenario.VnetName, "cx_vnet_") + vnetShort = strings.ReplaceAll(vnetShort, "_", "-") + subnetNameSafe := strings.ReplaceAll(scenario.SubnetName, "_", "-") + pnName := fmt.Sprintf("pn-%s-%s-%s", testScenarios.BuildID, vnetShort, subnetNameSafe) + pniName := fmt.Sprintf("pni-%s-%s-%s", testScenarios.BuildID, vnetShort, subnetNameSafe) + + // Create unique key for this vnet/subnet/cluster combination + resourceKey := fmt.Sprintf("%s:%s", scenario.Cluster, pnName) + + // Skip if we already deleted resources for this combination + if resourceGroups[resourceKey] { + continue + } + resourceGroups[resourceKey] = true + + fmt.Printf("\nDeleting shared resources for %s/%s on %s\n", scenario.VnetName, scenario.SubnetName, scenario.Cluster) + + // Delete PodNetworkInstance + err := helpers.DeletePodNetworkInstance(kubeconfig, pnName, pniName) + if err != nil { + fmt.Printf("Warning: Failed to delete PNI %s: %v\n", pniName, err) + } + + // Delete PodNetwork + err = helpers.DeletePodNetwork(kubeconfig, pnName) + if err != nil { + fmt.Printf("Warning: Failed to delete PN %s: %v\n", pnName, err) + } + + // Delete namespace + err = helpers.DeleteNamespace(kubeconfig, pnName) + if err != nil { + fmt.Printf("Warning: Failed to delete namespace %s: %v\n", pnName, err) + } + } + + fmt.Printf("\n=== All scenarios deleted ===\n") + return nil +} + +// Legacy function kept for backward compatibility +// CreateTestResources creates PodNetwork, PodNetworkInstance, namespace, and Pods +func CreateTestResources(resources TestResources) error { + // Step 1: Create PodNetwork + err := CreatePodNetworkResource(resources) + if err != nil { + return err + } + + // Step 2: Create namespace + err = CreateNamespaceResource(resources.Kubeconfig, resources.PNName) + if err != nil { + return err + } + + // Step 3: Create PodNetworkInstance + err = CreatePodNetworkInstanceResource(resources) + if err != nil { + return err + } + + // Step 4: Get cluster nodes + nodes, err := helpers.GetClusterNodes(resources.Kubeconfig) + if err != nil { + return fmt.Errorf("failed to get cluster nodes: %w", err) + } + if len(nodes) < 2 { + return fmt.Errorf("need at least 2 nodes, found %d", len(nodes)) + } + + // Step 5: Create pods on first two nodes (one at a time) + for i, node := range nodes[:2] { + podName := fmt.Sprintf("pod-c2-%d", i) + err = CreatePodResource(resources, podName, node) + if err != nil { + return err + } + } + + return nil +} + +// DeleteTestResources deletes all test resources in reverse order +func DeleteTestResources(kubeconfig, pnName, pniName string) error { + // Delete pods (first two nodes only, matching creation) + for i := 0; i < 2; i++ { + podName := fmt.Sprintf("pod-c2-%d", i) + err := helpers.DeletePod(kubeconfig, pnName, podName) + if err != nil { + return fmt.Errorf("failed to delete pod %s: %w", podName, err) + } + } + + // Delete PodNetworkInstance + err := helpers.DeletePodNetworkInstance(kubeconfig, pnName, pniName) + if err != nil { + return fmt.Errorf("failed to delete PodNetworkInstance: %w", err) + } + + // Delete PodNetwork + err = helpers.DeletePodNetwork(kubeconfig, pnName) + if err != nil { + return fmt.Errorf("failed to delete PodNetwork: %w", err) + } + + // Delete namespace + err = helpers.DeleteNamespace(kubeconfig, pnName) + if err != nil { + return fmt.Errorf("failed to delete namespace: %w", err) + } + + return nil +} diff --git a/test/integration/swiftv2/longRunningCluster/datapath_create_test.go b/test/integration/swiftv2/longRunningCluster/datapath_create_test.go new file mode 100644 index 0000000000..65b05fffa6 --- /dev/null +++ b/test/integration/swiftv2/longRunningCluster/datapath_create_test.go @@ -0,0 +1,115 @@ +package longRunningCluster + +import ( + "fmt" + "os" + "testing" + + "github.com/onsi/ginkgo/v2" + "github.com/onsi/gomega" +) + +func TestDatapathCreate(t *testing.T) { + gomega.RegisterFailHandler(ginkgo.Fail) + suiteConfig, reporterConfig := ginkgo.GinkgoConfiguration() + suiteConfig.Timeout = 0 + ginkgo.RunSpecs(t, "Datapath Create Suite", suiteConfig, reporterConfig) +} + +var _ = ginkgo.Describe("Datapath Create Tests", func() { + rg := os.Getenv("RG") + buildId := os.Getenv("BUILD_ID") + + if rg == "" || buildId == "" { + ginkgo.Fail(fmt.Sprintf("Missing required environment variables: RG='%s', BUILD_ID='%s'", rg, buildId)) + } + + ginkgo.It("creates PodNetwork, PodNetworkInstance, and Pods", ginkgo.NodeTimeout(0), func() { + // Define all test scenarios + scenarios := []PodScenario{ + // Customer 2 scenarios on aks-2 with cx_vnet_b1 + { + Name: "Customer2-AKS2-VnetB1-S1-LowNic", + Cluster: "aks-2", + VnetName: "cx_vnet_b1", + SubnetName: "s1", + NodeSelector: "low-nic", + PodNameSuffix: "c2-aks2-b1s1-low", + }, + { + Name: "Customer2-AKS2-VnetB1-S1-HighNic", + Cluster: "aks-2", + VnetName: "cx_vnet_b1", + SubnetName: "s1", + NodeSelector: "high-nic", + PodNameSuffix: "c2-aks2-b1s1-high", + }, + // Customer 1 scenarios + { + Name: "Customer1-AKS1-VnetA1-S1-LowNic", + Cluster: "aks-1", + VnetName: "cx_vnet_a1", + SubnetName: "s1", + NodeSelector: "low-nic", + PodNameSuffix: "c1-aks1-a1s1-low", + }, + { + Name: "Customer1-AKS1-VnetA1-S2-LowNic", + Cluster: "aks-1", + VnetName: "cx_vnet_a1", + SubnetName: "s2", + NodeSelector: "low-nic", + PodNameSuffix: "c1-aks1-a1s2-low", + }, + { + Name: "Customer1-AKS1-VnetA1-S2-HighNic", + Cluster: "aks-1", + VnetName: "cx_vnet_a1", + SubnetName: "s2", + NodeSelector: "high-nic", + PodNameSuffix: "c1-aks1-a1s2-high", + }, + { + Name: "Customer1-AKS1-VnetA2-S1-HighNic", + Cluster: "aks-1", + VnetName: "cx_vnet_a2", + SubnetName: "s1", + NodeSelector: "high-nic", + PodNameSuffix: "c1-aks1-a2s1-high", + }, + { + Name: "Customer1-AKS2-VnetA2-S1-LowNic", + Cluster: "aks-2", + VnetName: "cx_vnet_a2", + SubnetName: "s1", + NodeSelector: "low-nic", + PodNameSuffix: "c1-aks2-a2s1-low", + }, + { + Name: "Customer1-AKS2-VnetA3-S1-HighNic", + Cluster: "aks-2", + VnetName: "cx_vnet_a3", + SubnetName: "s1", + NodeSelector: "high-nic", + PodNameSuffix: "c1-aks2-a3s1-high", + }, + } + + // Initialize test scenarios with cache + testScenarios := TestScenarios{ + ResourceGroup: rg, + BuildID: buildId, + PodImage: "weibeld/ubuntu-networking", + Scenarios: scenarios, + VnetSubnetCache: make(map[string]VnetSubnetInfo), + UsedNodes: make(map[string]bool), + } + + // Create all scenario resources + ginkgo.By(fmt.Sprintf("Creating all test scenarios (%d scenarios)", len(scenarios))) + err := CreateAllScenarios(testScenarios) + gomega.Expect(err).To(gomega.BeNil(), "Failed to create test scenarios") + + ginkgo.By("Successfully created all test scenarios") + }) +}) diff --git a/test/integration/swiftv2/longRunningCluster/datapath_delete_test.go b/test/integration/swiftv2/longRunningCluster/datapath_delete_test.go new file mode 100644 index 0000000000..af9209ea52 --- /dev/null +++ b/test/integration/swiftv2/longRunningCluster/datapath_delete_test.go @@ -0,0 +1,115 @@ +package longRunningCluster + +import ( + "fmt" + "os" + "testing" + + "github.com/onsi/ginkgo/v2" + "github.com/onsi/gomega" +) + +func TestDatapathDelete(t *testing.T) { + gomega.RegisterFailHandler(ginkgo.Fail) + suiteConfig, reporterConfig := ginkgo.GinkgoConfiguration() + suiteConfig.Timeout = 0 + ginkgo.RunSpecs(t, "Datapath Delete Suite", suiteConfig, reporterConfig) +} + +var _ = ginkgo.Describe("Datapath Delete Tests", func() { + rg := os.Getenv("RG") + buildId := os.Getenv("BUILD_ID") + + if rg == "" || buildId == "" { + ginkgo.Fail(fmt.Sprintf("Missing required environment variables: RG='%s', BUILD_ID='%s'", rg, buildId)) + } + + ginkgo.It("deletes PodNetwork, PodNetworkInstance, and Pods", ginkgo.NodeTimeout(0), func() { + // Define all test scenarios (same as create) + scenarios := []PodScenario{ + // Customer 2 scenarios on aks-2 with cx_vnet_b1 + { + Name: "Customer2-AKS2-VnetB1-S1-LowNic", + Cluster: "aks-2", + VnetName: "cx_vnet_b1", + SubnetName: "s1", + NodeSelector: "low-nic", + PodNameSuffix: "c2-aks2-b1s1-low", + }, + { + Name: "Customer2-AKS2-VnetB1-S1-HighNic", + Cluster: "aks-2", + VnetName: "cx_vnet_b1", + SubnetName: "s1", + NodeSelector: "high-nic", + PodNameSuffix: "c2-aks2-b1s1-high", + }, + // Customer 1 scenarios + { + Name: "Customer1-AKS1-VnetA1-S1-LowNic", + Cluster: "aks-1", + VnetName: "cx_vnet_a1", + SubnetName: "s1", + NodeSelector: "low-nic", + PodNameSuffix: "c1-aks1-a1s1-low", + }, + { + Name: "Customer1-AKS1-VnetA1-S2-LowNic", + Cluster: "aks-1", + VnetName: "cx_vnet_a1", + SubnetName: "s2", + NodeSelector: "low-nic", + PodNameSuffix: "c1-aks1-a1s2-low", + }, + { + Name: "Customer1-AKS1-VnetA1-S2-HighNic", + Cluster: "aks-1", + VnetName: "cx_vnet_a1", + SubnetName: "s2", + NodeSelector: "high-nic", + PodNameSuffix: "c1-aks1-a1s2-high", + }, + { + Name: "Customer1-AKS1-VnetA2-S1-HighNic", + Cluster: "aks-1", + VnetName: "cx_vnet_a2", + SubnetName: "s1", + NodeSelector: "high-nic", + PodNameSuffix: "c1-aks1-a2s1-high", + }, + { + Name: "Customer1-AKS2-VnetA2-S1-LowNic", + Cluster: "aks-2", + VnetName: "cx_vnet_a2", + SubnetName: "s1", + NodeSelector: "low-nic", + PodNameSuffix: "c1-aks2-a2s1-low", + }, + { + Name: "Customer1-AKS2-VnetA3-S1-HighNic", + Cluster: "aks-2", + VnetName: "cx_vnet_a3", + SubnetName: "s1", + NodeSelector: "high-nic", + PodNameSuffix: "c1-aks2-a3s1-high", + }, + } + + // Initialize test scenarios with cache + testScenarios := TestScenarios{ + ResourceGroup: rg, + BuildID: buildId, + PodImage: "weibeld/ubuntu-networking", + Scenarios: scenarios, + VnetSubnetCache: make(map[string]VnetSubnetInfo), + UsedNodes: make(map[string]bool), + } + + // Delete all scenario resources + ginkgo.By("Deleting all test scenarios") + err := DeleteAllScenarios(testScenarios) + gomega.Expect(err).To(gomega.BeNil(), "Failed to delete test scenarios") + + ginkgo.By("Successfully deleted all test scenarios") + }) +}) diff --git a/test/integration/swiftv2/longRunningCluster/datapath_test.go b/test/integration/swiftv2/longRunningCluster/datapath_test.go new file mode 100644 index 0000000000..3c771c97d4 --- /dev/null +++ b/test/integration/swiftv2/longRunningCluster/datapath_test.go @@ -0,0 +1,129 @@ +package longRunningCluster + +import ( + "fmt" + "os" + "testing" + "time" + + "github.com/onsi/ginkgo/v2" + "github.com/onsi/gomega" +) + +func TestDatapath(t *testing.T) { + gomega.RegisterFailHandler(ginkgo.Fail) + // Set suite timeout to 0 (unlimited) for long-running tests + suiteConfig, reporterConfig := ginkgo.GinkgoConfiguration() + suiteConfig.Timeout = 0 + ginkgo.RunSpecs(t, "Datapath Suite", suiteConfig, reporterConfig) +} + +var _ = ginkgo.Describe("Datapath Tests", func() { + rg := os.Getenv("RG") + buildId := os.Getenv("BUILD_ID") + + if rg == "" || buildId == "" { + ginkgo.Fail(fmt.Sprintf("Missing required environment variables: RG='%s', BUILD_ID='%s'", rg, buildId)) + } + + ginkgo.It("creates and deletes PodNetwork, PodNetworkInstance, and Pods", ginkgo.NodeTimeout(0), func() { + // Define all test scenarios + scenarios := []PodScenario{ + // Customer 2 scenarios on aks-2 with cx_vnet_b1 + { + Name: "Customer2-AKS2-VnetB1-S1-LowNic", + Cluster: "aks-2", + VnetName: "cx_vnet_b1", + SubnetName: "s1", + NodeSelector: "low-nic", + PodNameSuffix: "c2-aks2-b1s1-low", + }, + { + Name: "Customer2-AKS2-VnetB1-S1-HighNic", + Cluster: "aks-2", + VnetName: "cx_vnet_b1", + SubnetName: "s1", + NodeSelector: "high-nic", + PodNameSuffix: "c2-aks2-b1s1-high", + }, + // Customer 1 scenarios + { + Name: "Customer1-AKS1-VnetA1-S1-LowNic", + Cluster: "aks-1", + VnetName: "cx_vnet_a1", + SubnetName: "s1", + NodeSelector: "low-nic", + PodNameSuffix: "c1-aks1-a1s1-low", + }, + { + Name: "Customer1-AKS1-VnetA1-S2-LowNic", + Cluster: "aks-1", + VnetName: "cx_vnet_a1", + SubnetName: "s2", + NodeSelector: "low-nic", + PodNameSuffix: "c1-aks1-a1s2-low", + }, + { + Name: "Customer1-AKS1-VnetA1-S2-HighNic", + Cluster: "aks-1", + VnetName: "cx_vnet_a1", + SubnetName: "s2", + NodeSelector: "high-nic", + PodNameSuffix: "c1-aks1-a1s2-high", + }, + { + Name: "Customer1-AKS1-VnetA2-S1-HighNic", + Cluster: "aks-1", + VnetName: "cx_vnet_a2", + SubnetName: "s1", + NodeSelector: "high-nic", + PodNameSuffix: "c1-aks1-a2s1-high", + }, + { + Name: "Customer1-AKS2-VnetA2-S1-LowNic", + Cluster: "aks-2", + VnetName: "cx_vnet_a2", + SubnetName: "s1", + NodeSelector: "low-nic", + PodNameSuffix: "c1-aks2-a2s1-low", + }, + { + Name: "Customer1-AKS2-VnetA3-S1-HighNic", + Cluster: "aks-2", + VnetName: "cx_vnet_a3", + SubnetName: "s1", + NodeSelector: "high-nic", + PodNameSuffix: "c1-aks2-a3s1-high", + }, + } + + // Initialize test scenarios with cache + testScenarios := TestScenarios{ + ResourceGroup: rg, + BuildID: buildId, + PodImage: "weibeld/ubuntu-networking", + Scenarios: scenarios, + VnetSubnetCache: make(map[string]VnetSubnetInfo), + UsedNodes: make(map[string]bool), + } + + // Single iteration per pipeline run + ginkgo.By(fmt.Sprintf("Starting test run at %s", time.Now().Format(time.RFC3339))) + + // Create all scenario resources + ginkgo.By(fmt.Sprintf("Creating all test scenarios (%d scenarios)", len(scenarios))) + err := CreateAllScenarios(testScenarios) + gomega.Expect(err).To(gomega.BeNil(), "Failed to create test scenarios") + + // Wait for 20 minutes + ginkgo.By("Waiting for 20 minutes before deletion") + time.Sleep(20 * time.Minute) + + // Delete all scenario resources + ginkgo.By("Deleting all test scenarios") + err = DeleteAllScenarios(testScenarios) + gomega.Expect(err).To(gomega.BeNil(), "Failed to delete test scenarios") + + ginkgo.By(fmt.Sprintf("Completed test run at %s", time.Now().Format(time.RFC3339))) + }) +}) From 84ef13123c783bd1a70e689a9e793cbe4b9f41a5 Mon Sep 17 00:00:00 2001 From: sivakami Date: Sat, 22 Nov 2025 16:52:26 -0800 Subject: [PATCH 2/7] fix ginkgo flag. --- .../template/long-running-pipeline-template.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pipelines/swiftv2-long-running/template/long-running-pipeline-template.yaml b/.pipelines/swiftv2-long-running/template/long-running-pipeline-template.yaml index a7a34d0891..dea155460e 100644 --- a/.pipelines/swiftv2-long-running/template/long-running-pipeline-template.yaml +++ b/.pipelines/swiftv2-long-running/template/long-running-pipeline-template.yaml @@ -222,7 +222,7 @@ stages: echo "==> Creating test resources (8 scenarios)" export RG="$(rgName)" export BUILD_ID="$(rgName)" - ginkgo -v -trace --timeout=1h ./test/integration/swiftv2/longRunningCluster --focus="Datapath Create" + ginkgo -v -trace --timeout=1h --focus="Datapath Create" ./test/integration/swiftv2/longRunningCluster - script: | echo "Waiting 20 minutes for pods to run..." @@ -282,5 +282,5 @@ stages: echo "==> Deleting test resources (8 scenarios)" export RG="$(rgName)" export BUILD_ID="$(rgName)" - ginkgo -v -trace --timeout=1h ./test/integration/swiftv2/longRunningCluster --focus="Datapath Delete" + ginkgo -v -trace --timeout=1h --focus="Datapath Delete" ./test/integration/swiftv2/longRunningCluster \ No newline at end of file From 6e1c1d410b21012edae9ebdc983cdf171d565c80 Mon Sep 17 00:00:00 2001 From: sivakami Date: Sat, 22 Nov 2025 18:28:23 -0800 Subject: [PATCH 3/7] Delete old test file. --- .../longRunningCluster/datapath_test.go | 129 ------------------ 1 file changed, 129 deletions(-) delete mode 100644 test/integration/swiftv2/longRunningCluster/datapath_test.go diff --git a/test/integration/swiftv2/longRunningCluster/datapath_test.go b/test/integration/swiftv2/longRunningCluster/datapath_test.go deleted file mode 100644 index 3c771c97d4..0000000000 --- a/test/integration/swiftv2/longRunningCluster/datapath_test.go +++ /dev/null @@ -1,129 +0,0 @@ -package longRunningCluster - -import ( - "fmt" - "os" - "testing" - "time" - - "github.com/onsi/ginkgo/v2" - "github.com/onsi/gomega" -) - -func TestDatapath(t *testing.T) { - gomega.RegisterFailHandler(ginkgo.Fail) - // Set suite timeout to 0 (unlimited) for long-running tests - suiteConfig, reporterConfig := ginkgo.GinkgoConfiguration() - suiteConfig.Timeout = 0 - ginkgo.RunSpecs(t, "Datapath Suite", suiteConfig, reporterConfig) -} - -var _ = ginkgo.Describe("Datapath Tests", func() { - rg := os.Getenv("RG") - buildId := os.Getenv("BUILD_ID") - - if rg == "" || buildId == "" { - ginkgo.Fail(fmt.Sprintf("Missing required environment variables: RG='%s', BUILD_ID='%s'", rg, buildId)) - } - - ginkgo.It("creates and deletes PodNetwork, PodNetworkInstance, and Pods", ginkgo.NodeTimeout(0), func() { - // Define all test scenarios - scenarios := []PodScenario{ - // Customer 2 scenarios on aks-2 with cx_vnet_b1 - { - Name: "Customer2-AKS2-VnetB1-S1-LowNic", - Cluster: "aks-2", - VnetName: "cx_vnet_b1", - SubnetName: "s1", - NodeSelector: "low-nic", - PodNameSuffix: "c2-aks2-b1s1-low", - }, - { - Name: "Customer2-AKS2-VnetB1-S1-HighNic", - Cluster: "aks-2", - VnetName: "cx_vnet_b1", - SubnetName: "s1", - NodeSelector: "high-nic", - PodNameSuffix: "c2-aks2-b1s1-high", - }, - // Customer 1 scenarios - { - Name: "Customer1-AKS1-VnetA1-S1-LowNic", - Cluster: "aks-1", - VnetName: "cx_vnet_a1", - SubnetName: "s1", - NodeSelector: "low-nic", - PodNameSuffix: "c1-aks1-a1s1-low", - }, - { - Name: "Customer1-AKS1-VnetA1-S2-LowNic", - Cluster: "aks-1", - VnetName: "cx_vnet_a1", - SubnetName: "s2", - NodeSelector: "low-nic", - PodNameSuffix: "c1-aks1-a1s2-low", - }, - { - Name: "Customer1-AKS1-VnetA1-S2-HighNic", - Cluster: "aks-1", - VnetName: "cx_vnet_a1", - SubnetName: "s2", - NodeSelector: "high-nic", - PodNameSuffix: "c1-aks1-a1s2-high", - }, - { - Name: "Customer1-AKS1-VnetA2-S1-HighNic", - Cluster: "aks-1", - VnetName: "cx_vnet_a2", - SubnetName: "s1", - NodeSelector: "high-nic", - PodNameSuffix: "c1-aks1-a2s1-high", - }, - { - Name: "Customer1-AKS2-VnetA2-S1-LowNic", - Cluster: "aks-2", - VnetName: "cx_vnet_a2", - SubnetName: "s1", - NodeSelector: "low-nic", - PodNameSuffix: "c1-aks2-a2s1-low", - }, - { - Name: "Customer1-AKS2-VnetA3-S1-HighNic", - Cluster: "aks-2", - VnetName: "cx_vnet_a3", - SubnetName: "s1", - NodeSelector: "high-nic", - PodNameSuffix: "c1-aks2-a3s1-high", - }, - } - - // Initialize test scenarios with cache - testScenarios := TestScenarios{ - ResourceGroup: rg, - BuildID: buildId, - PodImage: "weibeld/ubuntu-networking", - Scenarios: scenarios, - VnetSubnetCache: make(map[string]VnetSubnetInfo), - UsedNodes: make(map[string]bool), - } - - // Single iteration per pipeline run - ginkgo.By(fmt.Sprintf("Starting test run at %s", time.Now().Format(time.RFC3339))) - - // Create all scenario resources - ginkgo.By(fmt.Sprintf("Creating all test scenarios (%d scenarios)", len(scenarios))) - err := CreateAllScenarios(testScenarios) - gomega.Expect(err).To(gomega.BeNil(), "Failed to create test scenarios") - - // Wait for 20 minutes - ginkgo.By("Waiting for 20 minutes before deletion") - time.Sleep(20 * time.Minute) - - // Delete all scenario resources - ginkgo.By("Deleting all test scenarios") - err = DeleteAllScenarios(testScenarios) - gomega.Expect(err).To(gomega.BeNil(), "Failed to delete test scenarios") - - ginkgo.By(fmt.Sprintf("Completed test run at %s", time.Now().Format(time.RFC3339))) - }) -}) From a023ab78c9f40586ff1cafdae39ad4e2dbfa4114 Mon Sep 17 00:00:00 2001 From: sivakami Date: Sat, 22 Nov 2025 19:24:06 -0800 Subject: [PATCH 4/7] Ginkgo run specs only on specified files. --- .../template/long-running-pipeline-template.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.pipelines/swiftv2-long-running/template/long-running-pipeline-template.yaml b/.pipelines/swiftv2-long-running/template/long-running-pipeline-template.yaml index dea155460e..61ddf51bc3 100644 --- a/.pipelines/swiftv2-long-running/template/long-running-pipeline-template.yaml +++ b/.pipelines/swiftv2-long-running/template/long-running-pipeline-template.yaml @@ -222,7 +222,8 @@ stages: echo "==> Creating test resources (8 scenarios)" export RG="$(rgName)" export BUILD_ID="$(rgName)" - ginkgo -v -trace --timeout=1h --focus="Datapath Create" ./test/integration/swiftv2/longRunningCluster + cd ./test/integration/swiftv2/longRunningCluster + ginkgo -v -trace --timeout=1h --focus-file=datapath_create_test.go - script: | echo "Waiting 20 minutes for pods to run..." @@ -282,5 +283,6 @@ stages: echo "==> Deleting test resources (8 scenarios)" export RG="$(rgName)" export BUILD_ID="$(rgName)" - ginkgo -v -trace --timeout=1h --focus="Datapath Delete" ./test/integration/swiftv2/longRunningCluster + cd ./test/integration/swiftv2/longRunningCluster + ginkgo -v -trace --timeout=1h --focus-file=datapath_delete_test.go \ No newline at end of file From 4377c7b33142d6bbe707505fd3245d9791e57f46 Mon Sep 17 00:00:00 2001 From: sivakami Date: Sat, 22 Nov 2025 19:25:34 -0800 Subject: [PATCH 5/7] update pipeline params. --- .pipelines/swiftv2-long-running/pipeline.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.pipelines/swiftv2-long-running/pipeline.yaml b/.pipelines/swiftv2-long-running/pipeline.yaml index 6856a7ae3e..c7a320cd72 100644 --- a/.pipelines/swiftv2-long-running/pipeline.yaml +++ b/.pipelines/swiftv2-long-running/pipeline.yaml @@ -33,17 +33,17 @@ parameters: # Setup-only parameters (only used when runSetupStages=true) - name: resourceGroupName - displayName: "Resource Group Name used when runSetupStages is true" + displayName: "Resource Group Name used when Create new Infrastructure Setup is selected" type: string default: "sv2-long-run-$(Build.BuildId)" - name: vmSkuDefault - displayName: "VM SKU for Default Node Pool used when runSetupStages is true" + displayName: "VM SKU for Default Node Pool used when Create new Infrastructure Setup is selected" type: string default: "Standard_D4s_v3" - name: vmSkuHighNIC - displayName: "VM SKU for additional Node Pool used when runSetupStages is true" + displayName: "VM SKU for additional Node Pool used when Create new Infrastructure Setup is selected" type: string default: "Standard_D16s_v3" From 311e38cfa1ff14958a3dabd2470593ace0e79637 Mon Sep 17 00:00:00 2001 From: sivakami Date: Sat, 22 Nov 2025 19:56:55 -0800 Subject: [PATCH 6/7] Add ginkgo tags --- .../template/long-running-pipeline-template.yaml | 4 ++-- .../swiftv2/longRunningCluster/datapath_create_test.go | 3 +++ .../swiftv2/longRunningCluster/datapath_delete_test.go | 2 ++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.pipelines/swiftv2-long-running/template/long-running-pipeline-template.yaml b/.pipelines/swiftv2-long-running/template/long-running-pipeline-template.yaml index 61ddf51bc3..7234938605 100644 --- a/.pipelines/swiftv2-long-running/template/long-running-pipeline-template.yaml +++ b/.pipelines/swiftv2-long-running/template/long-running-pipeline-template.yaml @@ -223,7 +223,7 @@ stages: export RG="$(rgName)" export BUILD_ID="$(rgName)" cd ./test/integration/swiftv2/longRunningCluster - ginkgo -v -trace --timeout=1h --focus-file=datapath_create_test.go + ginkgo -v -trace --timeout=1h --tags=create_test - script: | echo "Waiting 20 minutes for pods to run..." @@ -284,5 +284,5 @@ stages: export RG="$(rgName)" export BUILD_ID="$(rgName)" cd ./test/integration/swiftv2/longRunningCluster - ginkgo -v -trace --timeout=1h --focus-file=datapath_delete_test.go + ginkgo -v -trace --timeout=1h --tags=delete_test \ No newline at end of file diff --git a/test/integration/swiftv2/longRunningCluster/datapath_create_test.go b/test/integration/swiftv2/longRunningCluster/datapath_create_test.go index 65b05fffa6..e87f7c8f74 100644 --- a/test/integration/swiftv2/longRunningCluster/datapath_create_test.go +++ b/test/integration/swiftv2/longRunningCluster/datapath_create_test.go @@ -1,3 +1,6 @@ +//go:build create_test +// +build create_test + package longRunningCluster import ( diff --git a/test/integration/swiftv2/longRunningCluster/datapath_delete_test.go b/test/integration/swiftv2/longRunningCluster/datapath_delete_test.go index af9209ea52..aed552dafd 100644 --- a/test/integration/swiftv2/longRunningCluster/datapath_delete_test.go +++ b/test/integration/swiftv2/longRunningCluster/datapath_delete_test.go @@ -1,3 +1,5 @@ +// +build delete_test + package longRunningCluster import ( From 1a3e324439bd82d2240b61dbc8b5d20002091a8f Mon Sep 17 00:00:00 2001 From: sivakami Date: Sat, 22 Nov 2025 23:44:42 -0800 Subject: [PATCH 7/7] remove scheduled run. --- .pipelines/swiftv2-long-running/pipeline.yaml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.pipelines/swiftv2-long-running/pipeline.yaml b/.pipelines/swiftv2-long-running/pipeline.yaml index c7a320cd72..b8d90406b2 100644 --- a/.pipelines/swiftv2-long-running/pipeline.yaml +++ b/.pipelines/swiftv2-long-running/pipeline.yaml @@ -2,13 +2,13 @@ trigger: none pr: none # Schedule: Run every 1 hour -schedules: - - cron: "0 */1 * * *" # Every 1 hour at minute 0 - displayName: "Run tests every 1 hour" - branches: - include: - - sv2-long-running-pipeline - always: true # Run even if there are no code changes +# schedules: +# - cron: "0 */1 * * *" # Every 1 hour at minute 0 +# displayName: "Run tests every 1 hour" +# branches: +# include: +# - sv2-long-running-pipeline +# always: true # Run even if there are no code changes parameters: - name: subscriptionId