4646 name : " $(BUILD_POOL_NAME_DEFAULT)"
4747 strategy :
4848 matrix :
49+ npm_linux_amd64 :
50+ arch : amd64
51+ name : npm
52+ os : linux
4953 npm_windows2022_amd64 :
5054 arch : amd64
5155 name : npm
7478 FQDN : empty
7579 strategy :
7680 matrix :
77- v2-windows :
78- PROFILE : " scale-win"
81+ # v2-linux:
82+ # PROFILE: "sc-lin"
83+ # NUM_NETPOLS: 800
84+ # INITIAL_CONNECTIVITY_TIMEOUT: 60
85+ ws22 :
86+ PROFILE : " sc-ws22"
87+ NUM_NETPOLS : 50
88+ INITIAL_CONNECTIVITY_TIMEOUT : 720
7989 steps :
8090 - checkout : self
8191 - bash : |
@@ -115,44 +125,46 @@ jobs:
115125 az extension add --name aks-preview
116126 az extension update --name aks-preview
117127
118- export CLUSTER_NAME=$(RESOURCE_GROUP)-$(PROFILE)
119-
120- echo "Creating resource group named $CLUSTER_NAME"
121- az group create --name $CLUSTER_NAME -l $(LOCATION) -o table
128+ echo "Creating resource group named $(RESOURCE_GROUP)"
129+ az group create --name $(RESOURCE_GROUP) -l $(LOCATION) -o table
122130
123- echo "Creating resource group named $CLUSTER_NAME"
131+ export CLUSTER_NAME=$(RESOURCE_GROUP)-$(PROFILE)
132+ echo "Creating cluster named $CLUSTER_NAME"
124133 az aks create \
125- --resource-group $CLUSTER_NAME \
134+ --resource-group $(RESOURCE_GROUP) \
126135 --name $CLUSTER_NAME \
127136 --generate-ssh-keys \
128137 --windows-admin-username e2eadmin \
129138 --windows-admin-password alpha@numeric!password2 \
130139 --network-plugin azure \
131140 --vm-set-type VirtualMachineScaleSets \
132141 --node-vm-size Standard_D4s_v3 \
133- --node-count 1
134-
135- # don't schedule anything on the linux system pool
136- echo "Updating $CLUSTER_NAME to not schedule anything on linux pool..."
137- az aks nodepool update \
138- --cluster-name $CLUSTER_NAME \
139- -g $CLUSTER_NAME \
140- -n nodepool1 \
141- --node-taints CriticalAddonsOnly=true:NoSchedule
142-
143- echo "Adding Windows nodepool to $CLUSTER_NAME"
144- az aks nodepool add \
145- --resource-group $CLUSTER_NAME \
146- --cluster-name $CLUSTER_NAME \
147- --name awin22 \
148- --os-type Windows \
149- --os-sku Windows2022 \
150- --node-vm-size Standard_D4s_v3 \
151142 --node-count 1 \
152143 --max-pods 100
153144
145+ if [[ $(PROFILE) == *ws22 ]]; then
146+ # don't schedule anything on the linux system pool
147+ echo "Updating $CLUSTER_NAME to not schedule anything on linux pool..."
148+ az aks nodepool update \
149+ --cluster-name $CLUSTER_NAME \
150+ -g $(RESOURCE_GROUP) \
151+ -n nodepool1 \
152+ --node-taints CriticalAddonsOnly=true:NoSchedule
153+
154+ echo "Adding Windows nodepool to $CLUSTER_NAME"
155+ az aks nodepool add \
156+ --resource-group $(RESOURCE_GROUP) \
157+ --cluster-name $CLUSTER_NAME \
158+ --name awin22 \
159+ --os-type Windows \
160+ --os-sku Windows2022 \
161+ --node-vm-size Standard_D4s_v3 \
162+ --node-count 1 \
163+ --max-pods 100
164+ fi
165+
154166 echo "Getting credentials to $CLUSTER_NAME"
155- az aks get-credentials -g $CLUSTER_NAME -n $CLUSTER_NAME --overwrite-existing --file ./kubeconfig
167+ az aks get-credentials -g $(RESOURCE_GROUP) -n $CLUSTER_NAME --overwrite-existing --file ./kubeconfig
156168 mkdir -p ~/.kube/
157169 cp ./kubeconfig ~/.kube/config
158170
@@ -168,28 +180,42 @@ jobs:
168180 set -e
169181
170182 # deploy azure-npm
171- cp $(Pipeline.Workspace)/s/npm/examples/windows/azure-npm.yaml azure-npm.yaml
172- # set higher memory limit
183+ cp $(Pipeline.Workspace)/s/npm/azure-npm.yaml azure-npm.yaml
173184 sed -i 's/memory: 300Mi/memory: 1000Mi/g' azure-npm.yaml
174185 kubectl apply -f azure-npm.yaml
175186
187+ cp $(Pipeline.Workspace)/s/npm/examples/windows/azure-npm.yaml azure-npm-win.yaml
188+ # set higher memory limit
189+ sed -i 's/memory: 300Mi/memory: 1000Mi/g' azure-npm-win.yaml
190+ kubectl apply -f azure-npm-win.yaml
191+
176192 # swap azure-npm image with one built during run
193+ kubectl set image daemonset/azure-npm -n kube-system azure-npm=$IMAGE_REGISTRY/azure-npm:linux-amd64-$(TAG)
177194 kubectl set image daemonset/azure-npm-win -n kube-system azure-npm=$IMAGE_REGISTRY/azure-npm:windows-amd64-ltsc2022-$(TAG)
178195
179- sleep 5s
196+ sleep 30s
180197 echo "waiting for NPM to start running..."
181- kubectl wait --for=condition=Ready pod -l k8s-app=azure-npm -n kube-system --timeout=20m
198+ kubectl wait --for=condition=Ready pod -l k8s-app=azure-npm -n kube-system --timeout=15m || {
199+ kubectl describe pod -n kube-system -l k8s-app=azure-npm
200+ echo "##vso[task.logissue type=error]NPM failed to start running"
201+ exit 1
202+ }
182203 echo "sleep 3m to let NPM restart in case of bootup failure due to HNS errors"
183204 sleep 3m
184205
185206 kubectl get po -n kube-system -owide -A
186207
187- echo "labeling Windows nodes for scale test"
188- kubectl get node -o wide | grep "Windows Server 2022 Datacenter" | awk '{print $1}' | xargs -n 1 -I {} kubectl label node {} scale-test=true connectivity-test=true
208+ if [[ $(PROFILE) == *ws22 ]]; then
209+ echo "labeling Windows nodes for scale test"
210+ kubectl get node -o wide | grep "Windows Server 2022 Datacenter" | awk '{print $1}' | xargs -n 1 -I {} kubectl label node {} scale-test=true connectivity-test=true
211+ else
212+ echo "labeling Linux nodes for scale test"
213+ kubectl get node -o wide | grep "Ubuntu" | awk '{print $1}' | xargs -n 1 -I {} kubectl label node {} scale-test=true connectivity-test=true
214+ fi
189215
190216 export CLUSTER_NAME=$(RESOURCE_GROUP)-$(PROFILE)
191217 echo "Showing cluster status for $CLUSTER_NAME"
192- FQDN=`az aks show -n $CLUSTER_NAME -g $CLUSTER_NAME --query fqdn -o tsv`
218+ FQDN=`az aks show -n $CLUSTER_NAME -g $(RESOURCE_GROUP) --query fqdn -o tsv`
193219 echo "##vso[task.setvariable variable=FQDN]$FQDN"
194220
195221 - task : AzureCLI@2
@@ -202,15 +228,16 @@ jobs:
202228 condition : succeeded()
203229 inlineScript : |
204230 set -e
205- mkdir -p $(System.DefaultWorkingDirectory)/$(RESOURCE_GROUP)-$(PROFILE)
231+ export CLUSTER_NAME=$(RESOURCE_GROUP)-$(PROFILE)
232+ mkdir -p $(System.DefaultWorkingDirectory)/$CLUSTER_NAME
206233 ./kwok --kubeconfig ~/.kube/config \
207234 --cidr=155.0.0.0/16 \
208235 --node-ip=155.0.0.1 \
209236 --manage-all-nodes=false \
210237 --manage-nodes-with-annotation-selector=kwok.x-k8s.io/node=fake \
211238 --manage-nodes-with-label-selector= \
212239 --disregard-status-with-annotation-selector=kwok.x-k8s.io/status=custom \
213- --disregard-status-with-label-selector= > $(System.DefaultWorkingDirectory)/$(RESOURCE_GROUP)-$(PROFILE) /kwok-scale-up.log &
240+ --disregard-status-with-label-selector= > $(System.DefaultWorkingDirectory)/$CLUSTER_NAME /kwok-scale-up.log &
214241 kwok_pid=$!
215242
216243 # 20 kwok nodes
@@ -229,8 +256,8 @@ jobs:
229256 --max-real-pods-per-node=30 \
230257 --num-real-deployments=10 \
231258 --num-real-replicas=3 \
232- --num-network-policies=50 \
233- --num-unapplied-network-policies=50 \
259+ --num-network-policies=$(NUM_NETPOLS) \
260+ --num-unapplied-network-policies=$(NUM_NETPOLS) \
234261 --num-unique-labels-per-pod=2 \
235262 --num-unique-labels-per-deployment=2 \
236263 --num-shared-labels-per-pod=10
@@ -248,28 +275,30 @@ jobs:
248275 condition : succeeded()
249276 inlineScript : |
250277 set -e
251- mkdir -p $(System.DefaultWorkingDirectory)/$(RESOURCE_GROUP)-$(PROFILE)
278+ export CLUSTER_NAME=$(RESOURCE_GROUP)-$(PROFILE)
279+ mkdir -p $(System.DefaultWorkingDirectory)/$CLUSTER_NAME
252280 ./kwok --kubeconfig ~/.kube/config \
253281 --cidr=155.0.0.0/16 \
254282 --node-ip=155.0.0.1 \
255283 --manage-all-nodes=false \
256284 --manage-nodes-with-annotation-selector=kwok.x-k8s.io/node=fake \
257285 --manage-nodes-with-label-selector= \
258286 --disregard-status-with-annotation-selector=kwok.x-k8s.io/status=custom \
259- --disregard-status-with-label-selector= > $(System.DefaultWorkingDirectory)/$(RESOURCE_GROUP)-$(PROFILE) /kwok-bootup-latency.log &
287+ --disregard-status-with-label-selector= > $(System.DefaultWorkingDirectory)/$CLUSTER_NAME /kwok-bootup-latency.log &
260288 kwok_pid=$!
261289
262290 kubectl rollout restart -n kube-system ds azure-npm-win
263291 echo "sleeping 3 minutes to allow NPM pods to restart after scale-up..."
264292 sleep 3m
265293
266294 cd $(Pipeline.Workspace)/s/test/scale/connectivity/
295+ # notes for Windows:
267296 # initial connectivity should be established within 15 minutes of NPM restart (12 minute timeout since we already waited 3 minutes above)
268297 # adding new network policy to all 30 Pods should happen within 30 seconds
269298 set +e
270299 ./test-connectivity.sh --kubectl-binary=$kubectlPath \
271300 --num-scale-pods-to-verify=all \
272- --max-wait-for-initial-connectivity=$((12*60) ) \
301+ --max-wait-for-initial-connectivity=$(INITIAL_CONNECTIVITY_TIMEOUT ) \
273302 --max-wait-after-adding-netpol=30
274303 rc=$?
275304 if [[ $rc != 0 ]]; then
@@ -286,18 +315,19 @@ jobs:
286315 scriptType : " bash"
287316 scriptLocation : " inlineScript"
288317 failOnStderr : true
289- # condition: succeeded()
318+ condition : succeeded()
290319 inlineScript : |
291320 set -e
292- mkdir -p $(System.DefaultWorkingDirectory)/$(RESOURCE_GROUP)-$(PROFILE)
321+ export CLUSTER_NAME=$(RESOURCE_GROUP)-$(PROFILE)
322+ mkdir -p $(System.DefaultWorkingDirectory)/$CLUSTER_NAME
293323 ./kwok --kubeconfig ~/.kube/config \
294324 --cidr=155.0.0.0/16 \
295325 --node-ip=155.0.0.1 \
296326 --manage-all-nodes=false \
297327 --manage-nodes-with-annotation-selector=kwok.x-k8s.io/node=fake \
298328 --manage-nodes-with-label-selector= \
299329 --disregard-status-with-annotation-selector=kwok.x-k8s.io/status=custom \
300- --disregard-status-with-label-selector= > $(System.DefaultWorkingDirectory)/$(RESOURCE_GROUP)-$(PROFILE) /kwok-crud.log &
330+ --disregard-status-with-label-selector= > $(System.DefaultWorkingDirectory)/$CLUSTER_NAME /kwok-crud.log &
301331 kwok_pid=$!
302332
303333 # will delete scale-test and connectivity-test namespaces from previous run
@@ -342,15 +372,16 @@ jobs:
342372 condition : succeeded()
343373 inlineScript : |
344374 set -e
345- mkdir -p $(System.DefaultWorkingDirectory)/$(RESOURCE_GROUP)-$(PROFILE)
375+ export CLUSTER_NAME=$(RESOURCE_GROUP)-$(PROFILE)
376+ mkdir -p $(System.DefaultWorkingDirectory)/$CLUSTER_NAME
346377 ./kwok --kubeconfig ~/.kube/config \
347378 --cidr=155.0.0.0/16 \
348379 --node-ip=155.0.0.1 \
349380 --manage-all-nodes=false \
350381 --manage-nodes-with-annotation-selector=kwok.x-k8s.io/node=fake \
351382 --manage-nodes-with-label-selector= \
352383 --disregard-status-with-annotation-selector=kwok.x-k8s.io/status=custom \
353- --disregard-status-with-label-selector= > $(System.DefaultWorkingDirectory)/$(RESOURCE_GROUP)-$(PROFILE) /kwok-crud-connectivity.log &
384+ --disregard-status-with-label-selector= > $(System.DefaultWorkingDirectory)/$CLUSTER_NAME /kwok-crud-connectivity.log &
354385 kwok_pid=$!
355386
356387 cd $(Pipeline.Workspace)/s/test/scale/connectivity/
@@ -371,14 +402,15 @@ jobs:
371402
372403 - bash : |
373404 export CLUSTER_NAME=$(RESOURCE_GROUP)-$(PROFILE)
374- cp cyclonus-$CLUSTER_NAME $(System.DefaultWorkingDirectory)/$CLUSTER_NAME/cyclonus-$CLUSTER_NAME
375405 echo "Getting cluster state for $CLUSTER_NAME"
376406 mkdir -p $(System.DefaultWorkingDirectory)/$CLUSTER_NAME
377- kubectl get pods -n kube-system | grep npm
378- kubectl logs -n kube-system -l k8s-app=azure-npm --tail -1 --prefix > $(System.DefaultWorkingDirectory)/$CLUSTER_NAME/npm-logs_$(PROFILE).txt
379- # capture any previous logs in case there was a crash
380- npmPodList=`kubectl get pods -n kube-system | grep npm | awk '{print $1}'`
407+ kubectl get pods -n kube-system -owide | grep npm | grep -v kwok
408+ npmPodList=`kubectl get pods -n kube-system -owide | grep npm | grep -v kwok | awk '{print $1}'`
381409 for npmPod in $npmPodList; do
410+ logFile=$(System.DefaultWorkingDirectory)/$CLUSTER_NAME/npm-logs_$(PROFILE)-$npmPod.txt
411+ kubectl logs -n kube-system $npmPod > $logFile
412+
413+ # capture any previous logs in case there was a crash
382414 previousLogFile=$(System.DefaultWorkingDirectory)/$CLUSTER_NAME/previous-npm-logs_$(PROFILE).txt
383415 kubectl logs -n kube-system $npmPod -p > $previousLogFile
384416 if [[ $? -ne 0 ]]; then
@@ -413,6 +445,7 @@ jobs:
413445 azureSubscription : $(BUILD_VALIDATIONS_SERVICE_CONNECTION)
414446 scriptType : " bash"
415447 scriptLocation : " inlineScript"
448+ condition : succeeded()
416449 inlineScript : |
417450 echo Deleting $(RESOURCE_GROUP)
418451 az group delete -n $(RESOURCE_GROUP) --yes
0 commit comments