Skip to content

Commit aa6108d

Browse files
added docker logs
1 parent bfd953c commit aa6108d

File tree

1 file changed

+109
-0
lines changed

1 file changed

+109
-0
lines changed

.github/workflows/job-deploy-windows.yml

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,29 @@ jobs:
9191
with:
9292
driver: docker
9393

94+
- name: Verify Docker Installation
95+
shell: pwsh
96+
run: |
97+
Write-Host "Verifying Docker installation..."
98+
docker --version
99+
docker info
100+
Write-Host "✅ Docker is ready"
101+
102+
- name: Login to Azure Container Registry
103+
shell: pwsh
104+
run: |
105+
Write-Host "Pre-authenticating to ACR..."
106+
# Note: Full ACR login will happen in resourcedeployment.ps1
107+
# This is just to verify ACR credentials are working
108+
if ("${{ inputs.BUILD_DOCKER_IMAGE }}" -eq "true") {
109+
$ACR_NAME = "${{ secrets.ACR_TEST_USERNAME }}"
110+
Write-Host "ACR Name: $ACR_NAME"
111+
az acr login --name $ACR_NAME
112+
Write-Host "✅ ACR authentication successful"
113+
} else {
114+
Write-Host "Skipping ACR pre-authentication (using existing images)"
115+
}
116+
94117
- name: Configure Parameters Based on WAF Setting
95118
shell: bash
96119
run: |
@@ -222,14 +245,33 @@ jobs:
222245
- name: Run Deployment Script with Input
223246
shell: pwsh
224247
run: |
248+
$ErrorActionPreference = "Stop"
249+
250+
# Verify Docker is still running
251+
Write-Host "Verifying Docker before deployment..."
252+
docker ps
253+
225254
cd Deployment
226255
$input = @"
227256
${{ secrets.EMAIL }}
228257
yes
229258
"@
259+
260+
Write-Host "Starting resourcedeployment.ps1..."
230261
$input | pwsh ./resourcedeployment.ps1
262+
263+
if ($LASTEXITCODE -ne 0) {
264+
Write-Host "❌ resourcedeployment.ps1 failed with exit code: $LASTEXITCODE"
265+
exit 1
266+
}
267+
268+
Write-Host "✅ resourcedeployment.ps1 completed successfully"
231269
Write-Host "Resource Group Name is ${{ env.RESOURCE_GROUP_NAME }}"
232270
Write-Host "Kubernetes resource group is ${{ env.AZURE_AKS_NAME }}"
271+
272+
# Verify pods are created
273+
Write-Host "Checking pod status..."
274+
kubectl get pods -n ns-km
233275
env:
234276
# From GitHub secrets (for login)
235277
AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
@@ -333,6 +375,73 @@ jobs:
333375
"WEB_APPURL=$WEB_APP_URL" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
334376
"WEB_APPURL=$WEB_APP_URL" | Out-File -FilePath $env:GITHUB_OUTPUT -Encoding utf8 -Append
335377
378+
- name: Verify ACR Images
379+
shell: bash
380+
run: |
381+
echo "🔍 Checking if Docker images exist in ACR..."
382+
ACR_NAME="${{ env.AZURE_CONTAINER_REGISTRY_NAME }}"
383+
384+
echo "Listing all repositories in ACR: $ACR_NAME"
385+
az acr repository list --name "$ACR_NAME" --output table || echo "No repositories found"
386+
387+
echo ""
388+
echo "Checking for required images (kmgs namespace)..."
389+
for repo in aiservice kernelmemory frontapp; do
390+
echo "Checking kmgs/$repo..."
391+
tags=$(az acr repository show-tags --name "$ACR_NAME" --repository "kmgs/$repo" --output table 2>/dev/null || echo "NOT FOUND")
392+
if [ "$tags" = "NOT FOUND" ]; then
393+
echo "❌ Image kmgs/$repo not found in ACR!"
394+
else
395+
echo "✅ Found tags: $tags"
396+
fi
397+
done
398+
399+
- name: Check Pod Status and Logs
400+
shell: bash
401+
run: |
402+
echo "🔍 Checking Kubernetes pod status..."
403+
kubectl get pods -n ns-km -o wide
404+
405+
echo ""
406+
echo "📊 Checking pod events..."
407+
kubectl get events -n ns-km --sort-by='.lastTimestamp' | tail -20
408+
409+
# Check if any pods are in ImagePullBackOff or Error state
410+
failed_pods=$(kubectl get pods -n ns-km -o json | jq -r '.items[] | select(.status.phase != "Running") | .metadata.name')
411+
412+
if [ -n "$failed_pods" ]; then
413+
echo "⚠️ Found pods not in Running state:"
414+
echo "$failed_pods"
415+
416+
# Describe each failed pod for detailed error information
417+
for pod in $failed_pods; do
418+
echo ""
419+
echo "📋 Describing pod: $pod"
420+
kubectl describe pod "$pod" -n ns-km | tail -30
421+
422+
echo ""
423+
echo "📄 Checking pod logs (if available):"
424+
kubectl logs "$pod" -n ns-km --tail=50 || echo "No logs available yet"
425+
done
426+
427+
# Check if ImagePullBackOff is the issue
428+
image_pull_errors=$(kubectl get pods -n ns-km -o json | jq -r '.items[] | select(.status.containerStatuses[].state.waiting.reason == "ImagePullBackOff") | .metadata.name')
429+
430+
if [ -n "$image_pull_errors" ]; then
431+
echo ""
432+
echo "❌ ERROR: Pods are failing to pull Docker images!"
433+
echo "This usually means:"
434+
echo "1. Docker images weren't built/pushed to ACR"
435+
echo "2. AKS doesn't have permission to pull from ACR"
436+
echo "3. Image tags are incorrect"
437+
echo ""
438+
echo "Failing pods: $image_pull_errors"
439+
exit 1
440+
fi
441+
else
442+
echo "✅ All pods are running successfully"
443+
fi
444+
336445
- name: Validate Deployment
337446
shell: bash
338447
run: |

0 commit comments

Comments
 (0)