Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
438 changes: 438 additions & 0 deletions .pipelines/swiftv2-long-running/README.md

Large diffs are not rendered by default.

43 changes: 24 additions & 19 deletions .pipelines/swiftv2-long-running/pipeline.yaml
Original file line number Diff line number Diff line change
@@ -1,42 +1,47 @@
trigger: none
pr: none

# Schedule: Run every 1 hour
schedules:
- cron: "0 */3 * * *" # Every 3 hours at minute 0
displayName: "Run tests every 3 hours"
branches:
include:
- sv2-long-running-pipeline-stage2
always: true # Run even if there are no code changes

parameters:
- name: subscriptionId
displayName: "Azure Subscription ID"
type: string
default: "37deca37-c375-4a14-b90a-043849bd2bf1"

- name: serviceConnection
displayName: "Azure Service Connection"
type: string
default: "Azure Container Networking - Standalone Test Service Connection"

- name: location
displayName: "Deployment Region"
type: string
default: "centraluseuap"

- name: resourceGroupName
displayName: "Resource Group Name"
type: string
default: "long-run-$(Build.BuildId)"

- name: vmSkuDefault
displayName: "VM SKU for Default Node Pool"
type: string
default: "Standard_D2s_v3"

- name: vmSkuHighNIC
displayName: "VM SKU for High NIC Node Pool"
type: string
default: "Standard_D16s_v3"
- name: runSetupStages
displayName: "Create New Infrastructure Setup"
type: boolean
default: false

- name: serviceConnection
displayName: "Azure Service Connection"
# Setup-only parameters (only used when runSetupStages=true)
- name: resourceGroupName
displayName: "Resource Group Name used when Create new Infrastructure Setup is selected"
type: string
default: "Azure Container Networking - Standalone Test Service Connection"
default: "sv2-long-run-$(Build.BuildId)"

extends:
template: template/long-running-pipeline-template.yaml
parameters:
subscriptionId: ${{ parameters.subscriptionId }}
location: ${{ parameters.location }}
resourceGroupName: ${{ parameters.resourceGroupName }}
vmSkuDefault: ${{ parameters.vmSkuDefault }}
vmSkuHighNIC: ${{ parameters.vmSkuHighNIC }}
serviceConnection: ${{ parameters.serviceConnection }}
runSetupStages: ${{ parameters.runSetupStages }}
144 changes: 100 additions & 44 deletions .pipelines/swiftv2-long-running/scripts/create_aks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,57 +7,113 @@ RG=$3
VM_SKU_DEFAULT=$4
VM_SKU_HIGHNIC=$5

CLUSTER_COUNT=2
CLUSTER_PREFIX="aks"
DEFAULT_NODE_COUNT=1
COMMON_TAGS="fastpathenabled=true RGOwner=LongRunningTestPipelines stampcreatorserviceinfo=true"

wait_for_provisioning() { # Helper for safe retry/wait for provisioning states (basic)
local rg="$1" clusterName="$2"
echo "Waiting for AKS '$clusterName' in RG '$rg' to reach Succeeded/Failed (polling)..."
CLUSTER_COUNT=2
CLUSTER_PREFIX="aks"


stamp_vnet() {
local vnet_id="$1"

responseFile="response.txt"
modified_vnet="${vnet_id//\//%2F}"
cmd_stamp_curl="'curl -v -X PUT http://localhost:8080/VirtualNetwork/$modified_vnet/stampcreatorservicename'"
cmd_containerapp_exec="az containerapp exec -n subnetdelegator-westus-u3h4j -g subnetdelegator-westus --subscription 9b8218f9-902a-4d20-a65c-e98acec5362f --command $cmd_stamp_curl"
Copy link

Copilot AI Nov 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same hardcoded credentials issue. The script contains hardcoded subscription ID 9b8218f9-902a-4d20-a65c-e98acec5362f and references to subnetdelegator-westus-u3h4j container app. Consider parameterizing these values.

Copilot uses AI. Check for mistakes.

max_retries=10
sleep_seconds=15
retry_count=0

while [[ $retry_count -lt $max_retries ]]; do
script --quiet -c "$cmd_containerapp_exec" "$responseFile"
if grep -qF "200 OK" "$responseFile"; then
echo "Subnet Delegator successfully stamped the vnet"
return 0
else
echo "Subnet Delegator failed to stamp the vnet, attempt $((retry_count+1))"
cat "$responseFile"
retry_count=$((retry_count+1))
sleep "$sleep_seconds"
fi
done

echo "Failed to stamp the vnet even after $max_retries attempts"
exit 1
}

wait_for_provisioning() {
local rg="$1" clusterName="$2"
echo "Waiting for AKS '$clusterName' in RG '$rg'..."
while :; do
state=$(az aks show --resource-group "$rg" --name "$clusterName" --query provisioningState -o tsv 2>/dev/null || true)
if [ -z "$state" ]; then
sleep 3
continue
if [[ "$state" =~ Succeeded ]]; then
echo "Provisioning state: $state"
break
fi
case "$state" in
Succeeded|Succeeded*) echo "Provisioning state: $state"; break ;;
Failed|Canceled|Rejected) echo "Provisioning finished with state: $state"; break ;;
*) printf "."; sleep 6 ;;
esac
if [[ "$state" =~ Failed|Canceled ]]; then
echo "Provisioning finished with state: $state"
break
fi
sleep 6
done
}


#########################################
# Main script starts here
#########################################

for i in $(seq 1 "$CLUSTER_COUNT"); do
echo "=============================="
echo " Working on cluster set #$i"
echo "=============================="

CLUSTER_NAME="${CLUSTER_PREFIX}-${i}"
echo "Creating AKS cluster '$CLUSTER_NAME' in RG '$RG'"

make -C ./hack/aks azcfg AZCLI=az REGION=$LOCATION

make -C ./hack/aks swiftv2-podsubnet-cluster-up \
AZCLI=az REGION=$LOCATION \
SUB=$SUBSCRIPTION_ID \
GROUP=$RG \
CLUSTER=$CLUSTER_NAME \
NODE_COUNT=$DEFAULT_NODE_COUNT \
VM_SIZE=$VM_SKU_DEFAULT \

echo " - waiting for AKS provisioning state..."
wait_for_provisioning "$RG" "$CLUSTER_NAME"

echo "Adding multi-tenant nodepool ' to '$CLUSTER_NAME'"
make -C ./hack/aks linux-swiftv2-nodepool-up \
AZCLI=az REGION=$LOCATION \
GROUP=$RG \
VM_SIZE=$VM_SKU_HIGHNIC \
CLUSTER=$CLUSTER_NAME \
SUB=$SUBSCRIPTION_ID \
echo "Creating cluster #$i..."

CLUSTER_NAME="${CLUSTER_PREFIX}-${i}"

make -C ./hack/aks azcfg AZCLI=az REGION=$LOCATION

# Create cluster with SkipAutoDeleteTill tag for persistent infrastructure
make -C ./hack/aks swiftv2-podsubnet-cluster-up \
AZCLI=az REGION=$LOCATION \
SUB=$SUBSCRIPTION_ID \
GROUP=$RG \
CLUSTER=$CLUSTER_NAME \
VM_SIZE=$VM_SKU_DEFAULT

# Add SkipAutoDeleteTill tag to cluster (2032-12-31 for long-term persistence)
az aks update -g "$RG" -n "$CLUSTER_NAME" --tags SkipAutoDeleteTill=2032-12-31 || echo "Warning: Failed to add tag to cluster"

wait_for_provisioning "$RG" "$CLUSTER_NAME"

vnet_id=$(az network vnet show -g "$RG" --name "$CLUSTER_NAME" --query id -o tsv)
echo "Found VNET: $vnet_id"

# Add SkipAutoDeleteTill tag to AKS VNet
az network vnet update --ids "$vnet_id" --set tags.SkipAutoDeleteTill=2032-12-31 || echo "Warning: Failed to add tag to vnet"

stamp_vnet "$vnet_id"

make -C ./hack/aks linux-swiftv2-nodepool-up \
AZCLI=az REGION=$LOCATION \
GROUP=$RG \
VM_SIZE=$VM_SKU_HIGHNIC \
CLUSTER=$CLUSTER_NAME \
SUB=$SUBSCRIPTION_ID

az aks get-credentials -g "$RG" -n "$CLUSTER_NAME" --admin --overwrite-existing \
--file "/tmp/${CLUSTER_NAME}.kubeconfig"

# Label all nodes with workload-type and nic-capacity labels
echo "==> Labeling all nodes in $CLUSTER_NAME with workload-type=swiftv2-linux"
kubectl --kubeconfig "/tmp/${CLUSTER_NAME}.kubeconfig" label nodes --all workload-type=swiftv2-linux --overwrite
echo "[OK] All nodes labeled with workload-type=swiftv2-linux"

# Label default nodepool (nodepool1) with low-nic capacity
echo "==> Labeling default nodepool (nodepool1) nodes with nic-capacity=low-nic"
kubectl --kubeconfig "/tmp/${CLUSTER_NAME}.kubeconfig" label nodes -l agentpool=nodepool1 nic-capacity=low-nic --overwrite
echo "[OK] Default nodepool nodes labeled with nic-capacity=low-nic"

# Label nplinux nodepool with high-nic capacity
echo "==> Labeling nplinux nodepool nodes with nic-capacity=high-nic"
kubectl --kubeconfig "/tmp/${CLUSTER_NAME}.kubeconfig" label nodes -l agentpool=nplinux nic-capacity=high-nic --overwrite
echo "[OK] nplinux nodepool nodes labeled with nic-capacity=high-nic"
done
echo "All done. Created $CLUSTER_COUNT cluster set(s)."

echo "All clusters complete."
Loading
Loading