Skip to content

Commit 965f096

Browse files
authored
Merge pull request #3906 from Agenta-AI/fix/railway-preview-build-cache
fix(ci): speed up Railway preview image builds with shared cache
2 parents ff39e0d + 58ffab8 commit 965f096

File tree

5 files changed

+260
-81
lines changed

5 files changed

+260
-81
lines changed

.github/workflows/06-railway-preview-build.yml

Lines changed: 75 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -27,24 +27,78 @@ concurrency:
2727
cancel-in-progress: true
2828

2929
jobs:
30-
build-and-push:
30+
prepare:
3131
if: github.event_name == 'workflow_dispatch' || !github.event.pull_request.draft
3232
runs-on: ubuntu-latest
3333
outputs:
34-
image_tag: ${{ steps.tag.outputs.image_tag }}
35-
pr_number: ${{ steps.tag.outputs.pr_number }}
34+
image_tag: ${{ steps.meta.outputs.image_tag }}
35+
pr_number: ${{ steps.meta.outputs.pr_number }}
36+
cache_scope: ${{ steps.meta.outputs.cache_scope }}
3637
steps:
3738
- uses: actions/checkout@v4
3839

39-
- name: Determine image tag
40-
id: tag
40+
- name: Determine build metadata
41+
id: meta
4142
run: |
4243
PR="${{ github.event.pull_request.number || inputs.pr_number }}"
4344
SHA="$(git rev-parse --short HEAD)"
44-
TAG="pr-${PR}-${SHA}"
45+
46+
if [ -n "$PR" ]; then
47+
TAG="pr-${PR}-${SHA}"
48+
else
49+
TAG="manual-${SHA}"
50+
fi
51+
52+
if [ -n "$PR" ]; then
53+
CACHE_SCOPE="pr-${PR}"
54+
else
55+
REF="$(printf "%s" "${GITHUB_REF_NAME}" | tr '[:upper:]' '[:lower:]' | tr -cs 'a-z0-9._-' '-')"
56+
REF="${REF#-}"
57+
REF="${REF%-}"
58+
CACHE_SCOPE="${REF:-manual}"
59+
fi
60+
61+
CACHE_SCOPE="${CACHE_SCOPE:0:80}"
62+
4563
echo "image_tag=${TAG}" >> "$GITHUB_OUTPUT"
4664
echo "pr_number=${PR}" >> "$GITHUB_OUTPUT"
65+
echo "cache_scope=${CACHE_SCOPE}" >> "$GITHUB_OUTPUT"
4766
echo "Image tag: ${TAG}"
67+
echo "Cache scope: ${CACHE_SCOPE}"
68+
69+
- name: Summary
70+
run: |
71+
TAG="${{ steps.meta.outputs.image_tag }}"
72+
echo "### Railway Preview Images" >> "$GITHUB_STEP_SUMMARY"
73+
echo "" >> "$GITHUB_STEP_SUMMARY"
74+
echo "| Image | Tag |" >> "$GITHUB_STEP_SUMMARY"
75+
echo "|-------|-----|" >> "$GITHUB_STEP_SUMMARY"
76+
echo "| agenta-api | \`${TAG}\` |" >> "$GITHUB_STEP_SUMMARY"
77+
echo "| agenta-web | \`${TAG}\` |" >> "$GITHUB_STEP_SUMMARY"
78+
echo "| agenta-services | \`${TAG}\` |" >> "$GITHUB_STEP_SUMMARY"
79+
echo "" >> "$GITHUB_STEP_SUMMARY"
80+
echo "- Cache scope: \`${{ steps.meta.outputs.cache_scope }}\`" >> "$GITHUB_STEP_SUMMARY"
81+
echo "- Shared cache: \`buildcache-shared\`" >> "$GITHUB_STEP_SUMMARY"
82+
83+
build-and-push:
84+
needs: prepare
85+
if: needs.prepare.outputs.image_tag != ''
86+
runs-on: ubuntu-latest
87+
strategy:
88+
fail-fast: false
89+
matrix:
90+
include:
91+
- image_name: agenta-api
92+
context: api
93+
dockerfile: api/oss/docker/Dockerfile.gh
94+
- image_name: agenta-web
95+
context: web
96+
dockerfile: web/oss/docker/Dockerfile.gh
97+
- image_name: agenta-services
98+
context: services
99+
dockerfile: services/oss/docker/Dockerfile.gh
100+
steps:
101+
- uses: actions/checkout@v4
48102

49103
- name: Log in to GHCR
50104
uses: docker/login-action@v3
@@ -56,52 +110,29 @@ jobs:
56110
- name: Set up Docker Buildx
57111
uses: docker/setup-buildx-action@v3
58112

59-
- name: Build and push API image
60-
uses: docker/build-push-action@v6
61-
with:
62-
context: api
63-
file: api/oss/docker/Dockerfile.gh
64-
push: true
65-
tags: ghcr.io/agenta-ai/agenta-api:${{ steps.tag.outputs.image_tag }}
66-
cache-from: type=gha,scope=api
67-
cache-to: type=gha,mode=max,scope=api
68-
69-
- name: Build and push Web image
70-
uses: docker/build-push-action@v6
71-
with:
72-
context: web
73-
file: web/oss/docker/Dockerfile.gh
74-
push: true
75-
tags: ghcr.io/agenta-ai/agenta-web:${{ steps.tag.outputs.image_tag }}
76-
cache-from: type=gha,scope=web
77-
cache-to: type=gha,mode=max,scope=web
78-
79-
- name: Build and push Services image
113+
- name: Build and push image
80114
uses: docker/build-push-action@v6
81115
with:
82-
context: services
83-
file: services/oss/docker/Dockerfile.gh
116+
context: ${{ matrix.context }}
117+
file: ${{ matrix.dockerfile }}
84118
push: true
85-
tags: ghcr.io/agenta-ai/agenta-services:${{ steps.tag.outputs.image_tag }}
86-
cache-from: type=gha,scope=services
87-
cache-to: type=gha,mode=max,scope=services
119+
tags: ghcr.io/agenta-ai/${{ matrix.image_name }}:${{ needs.prepare.outputs.image_tag }}
120+
cache-from: |
121+
type=registry,ref=ghcr.io/agenta-ai/${{ matrix.image_name }}:buildcache-shared
122+
type=registry,ref=ghcr.io/agenta-ai/${{ matrix.image_name }}:buildcache-${{ needs.prepare.outputs.cache_scope }}
123+
cache-to: |
124+
type=registry,ref=ghcr.io/agenta-ai/${{ matrix.image_name }}:buildcache-shared,mode=max
125+
type=registry,ref=ghcr.io/agenta-ai/${{ matrix.image_name }}:buildcache-${{ needs.prepare.outputs.cache_scope }},mode=max
88126
89127
- name: Summary
90128
run: |
91-
TAG="${{ steps.tag.outputs.image_tag }}"
92-
echo "### Railway Preview Images" >> "$GITHUB_STEP_SUMMARY"
93-
echo "" >> "$GITHUB_STEP_SUMMARY"
94-
echo "| Image | Tag |" >> "$GITHUB_STEP_SUMMARY"
95-
echo "|-------|-----|" >> "$GITHUB_STEP_SUMMARY"
96-
echo "| agenta-api | \`${TAG}\` |" >> "$GITHUB_STEP_SUMMARY"
97-
echo "| agenta-web | \`${TAG}\` |" >> "$GITHUB_STEP_SUMMARY"
98-
echo "| agenta-services | \`${TAG}\` |" >> "$GITHUB_STEP_SUMMARY"
129+
echo "- Built \`${{ matrix.image_name }}:${{ needs.prepare.outputs.image_tag }}\`" >> "$GITHUB_STEP_SUMMARY"
99130
100131
deploy:
101-
needs: build-and-push
102-
if: needs.build-and-push.outputs.pr_number != ''
132+
needs: [prepare, build-and-push]
133+
if: needs.prepare.outputs.pr_number != '' && needs.build-and-push.result == 'success'
103134
uses: ./.github/workflows/07-railway-preview-deploy.yml
104135
with:
105-
image_tag: ${{ needs.build-and-push.outputs.image_tag }}
106-
pr_number: ${{ needs.build-and-push.outputs.pr_number }}
136+
image_tag: ${{ needs.prepare.outputs.image_tag }}
137+
pr_number: ${{ needs.prepare.outputs.pr_number }}
107138
secrets: inherit

.github/workflows/07-railway-preview-deploy.yml

Lines changed: 113 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -55,25 +55,115 @@ jobs:
5555
env:
5656
PR_NUMBER: ${{ inputs.pr_number }}
5757
IMAGE_TAG: ${{ inputs.image_tag }}
58+
RAILWAY_POST_BOOTSTRAP_SLEEP: "2"
59+
RAILWAY_INFRA_SETTLE_SECONDS: "20"
60+
RAILWAY_APP_SETTLE_SECONDS: "0"
61+
RAILWAY_ALEMBIC_MAX_ATTEMPTS: "5"
62+
RAILWAY_RETRY_MAX: "7"
63+
RAILWAY_RETRY_DELAY: "5"
5864
SMOKE_AUTO_REPAIR: "false"
5965
SMOKE_MAX_RETRIES: "20"
60-
SMOKE_SLEEP_SECONDS: "10"
66+
SMOKE_SLEEP_SECONDS: "5"
67+
SMOKE_MAX_WAIT_SECONDS: "300"
68+
SMOKE_DOMAIN_MAX_WAIT_SECONDS: "180"
6169
run: |
6270
chmod +x hosting/railway/oss/scripts/*.sh
63-
output="$(hosting/railway/oss/scripts/preview-create-or-update.sh 2>&1)" || {
64-
echo "$output"
65-
echo "deploy_failed=true" >> "$GITHUB_OUTPUT"
66-
exit 1
71+
# shellcheck source=/dev/null
72+
source hosting/railway/oss/scripts/lib.sh
73+
74+
log_file="$(mktemp)"
75+
cleanup() {
76+
rm -f "$log_file"
6777
}
68-
echo "$output"
78+
trap cleanup EXIT
79+
80+
set +e
81+
hosting/railway/oss/scripts/preview-create-or-update.sh 2>&1 | tee "$log_file"
82+
deploy_status=${PIPESTATUS[0]}
83+
set -e
84+
85+
deploy_failed=false
86+
if [ "$deploy_status" -ne 0 ]; then
87+
deploy_failed=true
88+
fi
6989
7090
# Extract preview URL from output
71-
url="$(echo "$output" | grep -oP 'Preview URL: \K.*' || true)"
72-
echo "preview_url=${url}" >> "$GITHUB_OUTPUT"
91+
url="$(grep -oP 'Preview URL: \K.*' "$log_file" || true)"
92+
url="${url%%$'\n'*}"
93+
url="${url//$'\r'/}"
7394
7495
# Extract project name from output
75-
project="$(echo "$output" | grep -oP "Preview deploy completed for '\K[^']*" || true)"
96+
project="$(grep -oP "Preview deploy completed for '\K[^']*" "$log_file" || true)"
97+
if [ -z "$project" ]; then
98+
project="$(grep -oP "Bootstrap completed for project '\K[^']*" "$log_file" || true)"
99+
fi
100+
project="${project%%$'\n'*}"
101+
project="${project//$'\r'/}"
102+
103+
environment_name="$(grep -oP "Preview deploy completed for '[^']+' \(\K[^)]*" "$log_file" || true)"
104+
if [ -z "$environment_name" ]; then
105+
environment_name="$(grep -oP "Bootstrap completed for project '[^']*' environment '\K[^']*" "$log_file" || true)"
106+
fi
107+
environment_name="${environment_name%%$'\n'*}"
108+
environment_name="${environment_name//$'\r'/}"
109+
if [ -z "$environment_name" ]; then
110+
environment_name="production"
111+
fi
112+
113+
railway_logs_url="$(grep -oP 'Build Logs:\s*\Khttps://railway\.com\S+' "$log_file" | head -n 1 || true)"
114+
railway_logs_url="${railway_logs_url%%$'\n'*}"
115+
railway_logs_url="${railway_logs_url%%&}"
116+
117+
if [ -n "$project" ]; then
118+
set +e
119+
railway_call link --project "$project" --environment "$environment_name" --json >/dev/null 2>&1
120+
status_json="$(railway_call status --json 2>/dev/null)"
121+
122+
if [ -z "$url" ]; then
123+
domain="$(railway_call variable list -k --service gateway --environment "$environment_name" 2>/dev/null | grep '^RAILWAY_PUBLIC_DOMAIN=' | cut -d= -f2- || true)"
124+
if [ -n "$domain" ]; then
125+
url="https://${domain}/w"
126+
fi
127+
fi
128+
129+
project_id="$(printf "%s" "$status_json" | jq -r '.project.id // .projectId // empty' 2>/dev/null || true)"
130+
environment_id="$(printf "%s" "$status_json" | jq -r '.environment.id // .environmentId // empty' 2>/dev/null || true)"
131+
132+
if [ -z "$project_id" ] || [ -z "$environment_id" ]; then
133+
projects_json="$(railway_call list --json 2>/dev/null)"
134+
project_id="$(printf "%s" "$projects_json" | jq -r --arg name "$project" '.[] | select(.name == $name) | .id' 2>/dev/null | head -n 1)"
135+
136+
environment_json="$(railway_call environment "$environment_name" --json 2>/dev/null)"
137+
environment_id="$(printf "%s" "$environment_json" | jq -r '.id // .environment.id // .environmentId // empty' 2>/dev/null | head -n 1)"
138+
fi
139+
set -e
140+
141+
if [ "$project_id" = "null" ]; then
142+
project_id=""
143+
fi
144+
if [ "$environment_id" = "null" ]; then
145+
environment_id=""
146+
fi
147+
148+
if [ -n "$project_id" ] && [ -n "$environment_id" ]; then
149+
railway_logs_url="https://railway.com/project/${project_id}/logs?environmentId=${environment_id}"
150+
elif [ -n "$project_id" ]; then
151+
railway_logs_url="https://railway.com/project/${project_id}/logs"
152+
fi
153+
fi
154+
155+
echo "preview_url=${url}" >> "$GITHUB_OUTPUT"
76156
echo "project_name=${project}" >> "$GITHUB_OUTPUT"
157+
echo "environment_name=${environment_name}" >> "$GITHUB_OUTPUT"
158+
echo "railway_logs_url=${railway_logs_url}" >> "$GITHUB_OUTPUT"
159+
160+
trap - EXIT
161+
cleanup
162+
163+
if [ "$deploy_failed" = "true" ]; then
164+
echo "deploy_failed=true" >> "$GITHUB_OUTPUT"
165+
exit 1
166+
fi
77167
78168
- name: Post preview URL as PR comment
79169
if: inputs.pr_number != '' && steps.deploy.outputs.preview_url != ''
@@ -85,6 +175,8 @@ jobs:
85175
const projectName = '${{ steps.deploy.outputs.project_name }}';
86176
const imageTag = '${{ inputs.image_tag }}';
87177
const marker = '<!-- railway-preview-bot -->';
178+
const runUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
179+
const railwayLogsUrl = '${{ steps.deploy.outputs.railway_logs_url }}';
88180
89181
const body = [
90182
marker,
@@ -96,9 +188,11 @@ jobs:
96188
`| **Project** | \`${projectName}\` |`,
97189
`| **Image tag** | \`${imageTag}\` |`,
98190
`| **Status** | Deployed |`,
191+
railwayLogsUrl ? `| **Railway logs** | [Open logs](${railwayLogsUrl}) |` : null,
192+
`| **Workflow logs** | [View workflow run](${runUrl}) |`,
99193
'',
100194
`_Updated at ${new Date().toISOString()}_`,
101-
].join('\n');
195+
].filter(Boolean).join('\n');
102196
103197
// Find existing comment from this bot
104198
const { data: comments } = await github.rest.issues.listComments({
@@ -134,19 +228,23 @@ jobs:
134228
const imageTag = '${{ inputs.image_tag }}';
135229
const marker = '<!-- railway-preview-bot -->';
136230
const runUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
231+
const previewUrl = '${{ steps.deploy.outputs.preview_url }}';
232+
const railwayLogsUrl = '${{ steps.deploy.outputs.railway_logs_url }}';
137233
138234
const body = [
139235
marker,
140236
'### Railway Preview Environment',
141237
'',
142238
`| | |`,
143239
`|---|---|`,
240+
previewUrl ? `| **Preview URL** | ${previewUrl} |` : null,
144241
`| **Image tag** | \`${imageTag}\` |`,
145242
`| **Status** | Failed |`,
243+
railwayLogsUrl ? `| **Railway logs** | [Open logs](${railwayLogsUrl}) |` : null,
146244
`| **Logs** | [View workflow run](${runUrl}) |`,
147245
'',
148246
`_Updated at ${new Date().toISOString()}_`,
149-
].join('\n');
247+
].filter(Boolean).join('\n');
150248
151249
const { data: comments } = await github.rest.issues.listComments({
152250
owner: context.repo.owner,
@@ -182,4 +280,8 @@ jobs:
182280
if [ -n "${{ steps.deploy.outputs.preview_url }}" ]; then
183281
echo "- **Preview URL:** ${{ steps.deploy.outputs.preview_url }}" >> "$GITHUB_STEP_SUMMARY"
184282
fi
283+
if [ -n "${{ steps.deploy.outputs.railway_logs_url }}" ]; then
284+
echo "- **Railway logs:** ${{ steps.deploy.outputs.railway_logs_url }}" >> "$GITHUB_STEP_SUMMARY"
285+
fi
286+
echo "- **Workflow logs:** ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" >> "$GITHUB_STEP_SUMMARY"
185287
echo "- **Project:** \`${{ steps.deploy.outputs.project_name }}\`" >> "$GITHUB_STEP_SUMMARY"

hosting/railway/oss/README.md

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -197,15 +197,21 @@ The API image defaults to docker-compose hostnames for Redis (`redis-durable:638
197197

198198
### Build times on first deploy
199199

200-
First deploys on Railway take longer because Docker layer caches are cold. The app settle window (`RAILWAY_APP_SETTLE_SECONDS`, default 60) may not be enough on very slow builds. If smoke fails because services are still DEPLOYING, wait and re-run smoke manually.
200+
First deploys on Railway take longer because Docker layer caches are cold. Deploy now relies mostly on readiness polling in smoke checks instead of fixed sleeps, so slower starts are less likely to fail prematurely.
201+
202+
For GitHub preview builds, CI now uses shared BuildKit registry cache tags (`buildcache-shared`) plus PR-scoped tags (`buildcache-pr-<number>`). It also builds API, web, and services images in parallel matrix jobs. This keeps repeated PR builds fast and also improves first builds on new PRs by reusing layers from previous runs. Manual workflow dispatches without a PR number use `manual-<sha>` image tags and skip deploy.
201203

202204
### Smoke check options
203205

204206
The smoke script supports these environment variables:
205207

206-
- `SMOKE_MAX_RETRIES` (default `30`) - retries per endpoint
207-
- `SMOKE_SLEEP_SECONDS` (default `10`) - sleep between retries
208-
- `SMOKE_AUTO_REPAIR` (default `true`) - redeploy failing services automatically
208+
- `SMOKE_MAX_RETRIES` (default `10`) - legacy retry count used to derive timeout when `SMOKE_MAX_WAIT_SECONDS` is not set
209+
- `SMOKE_SLEEP_SECONDS` (default `5`) - poll interval between readiness checks
210+
- `SMOKE_MAX_WAIT_SECONDS` (default `SMOKE_MAX_RETRIES * SMOKE_SLEEP_SECONDS`) - max wait time per endpoint before failing
211+
- `SMOKE_DOMAIN_MAX_WAIT_SECONDS` (default `SMOKE_MAX_WAIT_SECONDS`) - max wait time for gateway domain resolution
212+
- `SMOKE_CURL_CONNECT_TIMEOUT` (default `5`) - per-request TCP/TLS connect timeout in seconds
213+
- `SMOKE_CURL_MAX_TIME` (default `10`) - max duration per health request in seconds
214+
- `SMOKE_AUTO_REPAIR` (default `false`) - redeploy failing services automatically
209215

210216
For CI, consider `SMOKE_AUTO_REPAIR=false` to get clean pass/fail signals without side effects.
211217

@@ -252,7 +258,8 @@ the deploy flow grows or back-to-back deploys hit the 1,000 RPH Hobby ceiling.
252258

253259
## Notes
254260

255-
- This fast-start flow keeps auth minimal (`AGENTA_LICENSE=oss`) and does not wire CI yet.
261+
- This fast-start flow keeps auth minimal (`AGENTA_LICENSE=oss`).
262+
- CI is wired for Railway preview environments via `.github/workflows/06-railway-preview-build.yml`, `.github/workflows/07-railway-preview-deploy.yml`, and `.github/workflows/08-railway-preview-cleanup.yml`.
256263
- Postgres and Redis are provisioned as image-backed services with explicit volume mounts.
257264
- Redis now gets a `/data` volume during bootstrap for persistence.
258265
- `configure.sh` sets `RAILWAY_RUN_UID=0` and `RAILWAY_RUN_GID=0` on the Redis

0 commit comments

Comments
 (0)