Skip to content

Commit 52bc98f

Browse files
committed
Add long running retry for getting public image
1 parent 6739b4a commit 52bc98f

File tree

1 file changed

+51
-3
lines changed

1 file changed

+51
-3
lines changed

.github/workflows/publish-serverless-init-to-ghcr.yml

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ on:
44
workflow_dispatch:
55
inputs:
66
source_image:
7-
description: 'Source image from registry.ddbuild.io (e.g., registry.ddbuild.io/ci/datadog-agent/serverless-init:1.7.8)'
7+
description: 'Source image from registry.datadoghq.com (e.g., registry.datadoghq.com/serverless-init:1.7.8)'
88
required: true
99
type: string
1010
version:
@@ -49,6 +49,33 @@ jobs:
4949
username: ${{ github.actor }}
5050
password: ${{ secrets.GITHUB_TOKEN }}
5151

52+
- name: Wait for image availability
53+
run: |
54+
SOURCE_IMAGE="${{ inputs.source_image }}"
55+
MAX_ATTEMPTS=20
56+
RETRY_DELAY=30
57+
# Maximum wait time: 20 attempts × 30s = 600s (10 minutes)
58+
59+
echo "⏳ Waiting for image to be available: ${SOURCE_IMAGE}"
60+
echo "Will check every ${RETRY_DELAY}s for up to $((MAX_ATTEMPTS * RETRY_DELAY))s"
61+
62+
for i in $(seq 1 $MAX_ATTEMPTS); do
63+
echo "Attempt $i/$MAX_ATTEMPTS: Checking if image exists..."
64+
65+
if crane manifest ${SOURCE_IMAGE} >/dev/null 2>&1; then
66+
echo "✅ Image is available!"
67+
exit 0
68+
fi
69+
70+
if [ $i -lt $MAX_ATTEMPTS ]; then
71+
echo "⏳ Image not yet available, waiting ${RETRY_DELAY}s..."
72+
sleep $RETRY_DELAY
73+
fi
74+
done
75+
76+
echo "❌ Image did not become available after $((MAX_ATTEMPTS * RETRY_DELAY))s"
77+
exit 1
78+
5279
- name: Copy image to GHCR
5380
run: |
5481
SOURCE_IMAGE="${{ inputs.source_image }}"
@@ -65,8 +92,29 @@ jobs:
6592
echo " - ${DEST_BASE}:${VERSION}${IMAGE_SUFFIX}"
6693
echo " - ${DEST_BASE}:v${PIPELINE_ID}${IMAGE_SUFFIX}"
6794
68-
# Copy with version tag
69-
crane copy ${SOURCE_IMAGE} ${DEST_BASE}:${VERSION}${IMAGE_SUFFIX}
95+
# Copy with version tag (with retry logic)
96+
# Maximum retry duration: 3 attempts with 10s delays between retries
97+
# This workflow is triggered in parallel with the publish attempt to registry.datadoghq.com
98+
# Registry.datadoghq.com should normally need about ~30 seconds to recieve the new image
99+
MAX_COPY_ATTEMPTS=3
100+
COPY_RETRY_DELAY=10
101+
102+
for i in $(seq 1 $MAX_COPY_ATTEMPTS); do
103+
echo "Copying image (attempt $i/$MAX_COPY_ATTEMPTS)..."
104+
105+
if crane copy ${SOURCE_IMAGE} ${DEST_BASE}:${VERSION}${IMAGE_SUFFIX}; then
106+
echo "✅ Image copied successfully!"
107+
break
108+
fi
109+
110+
if [ $i -lt $MAX_COPY_ATTEMPTS ]; then
111+
echo "⚠️ Copy failed, retrying in ${COPY_RETRY_DELAY}s..."
112+
sleep $COPY_RETRY_DELAY
113+
else
114+
echo "❌ Failed to copy image after $MAX_COPY_ATTEMPTS attempts"
115+
exit 1
116+
fi
117+
done
70118
71119
# Tag for pipeline ID
72120
crane tag ${DEST_BASE}:${VERSION}${IMAGE_SUFFIX} v${PIPELINE_ID}${IMAGE_SUFFIX}

0 commit comments

Comments
 (0)