|
91 | 91 | with: |
92 | 92 | driver: docker |
93 | 93 |
|
| 94 | + - name: Verify Docker Installation |
| 95 | + shell: pwsh |
| 96 | + run: | |
| 97 | + Write-Host "Verifying Docker installation..." |
| 98 | + docker --version |
| 99 | + docker info |
| 100 | + Write-Host "✅ Docker is ready" |
| 101 | +
|
| 102 | + - name: Login to Azure Container Registry |
| 103 | + shell: pwsh |
| 104 | + run: | |
| 105 | + Write-Host "Pre-authenticating to ACR..." |
| 106 | + # Note: Full ACR login will happen in resourcedeployment.ps1 |
| 107 | + # This is just to verify ACR credentials are working |
| 108 | + if ("${{ inputs.BUILD_DOCKER_IMAGE }}" -eq "true") { |
| 109 | + $ACR_NAME = "${{ secrets.ACR_TEST_USERNAME }}" |
| 110 | + Write-Host "ACR Name: $ACR_NAME" |
| 111 | + az acr login --name $ACR_NAME |
| 112 | + Write-Host "✅ ACR authentication successful" |
| 113 | + } else { |
| 114 | + Write-Host "Skipping ACR pre-authentication (using existing images)" |
| 115 | + } |
| 116 | +
|
94 | 117 | - name: Configure Parameters Based on WAF Setting |
95 | 118 | shell: bash |
96 | 119 | run: | |
@@ -222,14 +245,33 @@ jobs: |
222 | 245 | - name: Run Deployment Script with Input |
223 | 246 | shell: pwsh |
224 | 247 | run: | |
| 248 | + $ErrorActionPreference = "Stop" |
| 249 | + |
| 250 | + # Verify Docker is still running |
| 251 | + Write-Host "Verifying Docker before deployment..." |
| 252 | + docker ps |
| 253 | + |
225 | 254 | cd Deployment |
226 | 255 | $input = @" |
227 | 256 | ${{ secrets.EMAIL }} |
228 | 257 | yes |
229 | 258 | "@ |
| 259 | + |
| 260 | + Write-Host "Starting resourcedeployment.ps1..." |
230 | 261 | $input | pwsh ./resourcedeployment.ps1 |
| 262 | + |
| 263 | + if ($LASTEXITCODE -ne 0) { |
| 264 | + Write-Host "❌ resourcedeployment.ps1 failed with exit code: $LASTEXITCODE" |
| 265 | + exit 1 |
| 266 | + } |
| 267 | + |
| 268 | + Write-Host "✅ resourcedeployment.ps1 completed successfully" |
231 | 269 | Write-Host "Resource Group Name is ${{ env.RESOURCE_GROUP_NAME }}" |
232 | 270 | Write-Host "Kubernetes resource group is ${{ env.AZURE_AKS_NAME }}" |
| 271 | + |
| 272 | + # Verify pods are created |
| 273 | + Write-Host "Checking pod status..." |
| 274 | + kubectl get pods -n ns-km |
233 | 275 | env: |
234 | 276 | # From GitHub secrets (for login) |
235 | 277 | AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }} |
@@ -333,6 +375,73 @@ jobs: |
333 | 375 | "WEB_APPURL=$WEB_APP_URL" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append |
334 | 376 | "WEB_APPURL=$WEB_APP_URL" | Out-File -FilePath $env:GITHUB_OUTPUT -Encoding utf8 -Append |
335 | 377 |
|
| 378 | + - name: Verify ACR Images |
| 379 | + shell: bash |
| 380 | + run: | |
| 381 | + echo "🔍 Checking if Docker images exist in ACR..." |
| 382 | + ACR_NAME="${{ env.AZURE_CONTAINER_REGISTRY_NAME }}" |
| 383 | + |
| 384 | + echo "Listing all repositories in ACR: $ACR_NAME" |
| 385 | + az acr repository list --name "$ACR_NAME" --output table || echo "No repositories found" |
| 386 | + |
| 387 | + echo "" |
| 388 | + echo "Checking for required images (kmgs namespace)..." |
| 389 | + for repo in aiservice kernelmemory frontapp; do |
| 390 | + echo "Checking kmgs/$repo..." |
| 391 | + tags=$(az acr repository show-tags --name "$ACR_NAME" --repository "kmgs/$repo" --output table 2>/dev/null || echo "NOT FOUND") |
| 392 | + if [ "$tags" = "NOT FOUND" ]; then |
| 393 | + echo "❌ Image kmgs/$repo not found in ACR!" |
| 394 | + else |
| 395 | + echo "✅ Found tags: $tags" |
| 396 | + fi |
| 397 | + done |
| 398 | +
|
| 399 | + - name: Check Pod Status and Logs |
| 400 | + shell: bash |
| 401 | + run: | |
| 402 | + echo "🔍 Checking Kubernetes pod status..." |
| 403 | + kubectl get pods -n ns-km -o wide |
| 404 | + |
| 405 | + echo "" |
| 406 | + echo "📊 Checking pod events..." |
| 407 | + kubectl get events -n ns-km --sort-by='.lastTimestamp' | tail -20 |
| 408 | + |
| 409 | + # Check if any pods are in ImagePullBackOff or Error state |
| 410 | + failed_pods=$(kubectl get pods -n ns-km -o json | jq -r '.items[] | select(.status.phase != "Running") | .metadata.name') |
| 411 | + |
| 412 | + if [ -n "$failed_pods" ]; then |
| 413 | + echo "⚠️ Found pods not in Running state:" |
| 414 | + echo "$failed_pods" |
| 415 | + |
| 416 | + # Describe each failed pod for detailed error information |
| 417 | + for pod in $failed_pods; do |
| 418 | + echo "" |
| 419 | + echo "📋 Describing pod: $pod" |
| 420 | + kubectl describe pod "$pod" -n ns-km | tail -30 |
| 421 | + |
| 422 | + echo "" |
| 423 | + echo "📄 Checking pod logs (if available):" |
| 424 | + kubectl logs "$pod" -n ns-km --tail=50 || echo "No logs available yet" |
| 425 | + done |
| 426 | + |
| 427 | + # Check if ImagePullBackOff is the issue |
| 428 | + image_pull_errors=$(kubectl get pods -n ns-km -o json | jq -r '.items[] | select(.status.containerStatuses[].state.waiting.reason == "ImagePullBackOff") | .metadata.name') |
| 429 | + |
| 430 | + if [ -n "$image_pull_errors" ]; then |
| 431 | + echo "" |
| 432 | + echo "❌ ERROR: Pods are failing to pull Docker images!" |
| 433 | + echo "This usually means:" |
| 434 | + echo "1. Docker images weren't built/pushed to ACR" |
| 435 | + echo "2. AKS doesn't have permission to pull from ACR" |
| 436 | + echo "3. Image tags are incorrect" |
| 437 | + echo "" |
| 438 | + echo "Failing pods: $image_pull_errors" |
| 439 | + exit 1 |
| 440 | + fi |
| 441 | + else |
| 442 | + echo "✅ All pods are running successfully" |
| 443 | + fi |
| 444 | +
|
336 | 445 | - name: Validate Deployment |
337 | 446 | shell: bash |
338 | 447 | run: | |
|
0 commit comments