@@ -51,9 +51,17 @@ inputs:
5151 required : true
5252 default : " "
5353 bazel-remote-cache-url :
54- description : " URL of the Bazel remote cache to use for building the image"
55- required : true
54+ description : " URL of the Bazel remote cache to use for building the image (http/grpc). Leave empty to use Dockerfile-default cache mount paths. "
55+ required : false
5656 default : " "
57+ CACHE_REGISTRY :
58+ description : " OCI registry used for BuildKit layer cache (cache-to/cache-from). Must be writable by the GITHUB_TOKEN."
59+ required : false
60+ default : " ghcr.io/nvidia/jax-toolbox-buildcache"
61+ ENABLE_BAZEL_REPO_CACHE :
62+ description : " Enable Bazel repository-cache save/restore via actions/cache and build-context injection. Set to 'true' only for containers that run Bazel (i.e. build-jax)."
63+ required : false
64+ default : " false"
5765
5866outputs :
5967 DOCKER_TAG_MEALKIT :
7381 echo 'UPLD_IMAGE=ghcr.io/nvidia/jax-toolbox-internal' >> $GITHUB_ENV
7482 echo "BADGE_FILENAME_FULL=${{ inputs.BADGE_FILENAME }}-${{ inputs.ARCHITECTURE }}.json" >> $GITHUB_ENV
7583
84+ - name : Set up build cache environment
85+ id : cache-env
86+ shell : bash
87+ run : |
88+ # Sanitize branch name: replace / and non-tag chars, cap at 100 chars
89+ BRANCH=$(echo "${{ github.ref_name }}" \
90+ | sed 's|/|-|g' \
91+ | tr -cs 'a-zA-Z0-9._-' '-' \
92+ | sed 's/^-//;s/-$//' \
93+ | cut -c1-100)
94+
95+ CONTAINER="${{ inputs.CONTAINER_NAME }}"
96+ ARCH="${{ inputs.ARCHITECTURE }}"
97+ REGISTRY="${{ inputs.CACHE_REGISTRY }}"
98+
99+ echo "CACHE_REF_MEALKIT_BRANCH=${REGISTRY}:${CONTAINER}-${ARCH}-${BRANCH}-mealkit" >> $GITHUB_ENV
100+ echo "CACHE_REF_MEALKIT_MAIN=${REGISTRY}:${CONTAINER}-${ARCH}-main-mealkit" >> $GITHUB_ENV
101+ echo "CACHE_REF_FINAL_BRANCH=${REGISTRY}:${CONTAINER}-${ARCH}-${BRANCH}-final" >> $GITHUB_ENV
102+ echo "CACHE_REF_FINAL_MAIN=${REGISTRY}:${CONTAINER}-${ARCH}-main-final" >> $GITHUB_ENV
103+
104+ # Bazel repo-cache key: keyed on arch only so that restore-keys
105+ # always finds the most recent cache for this architecture, regardless of
106+ # which JAX/XLA commit is being built. EXTRA_BUILD_ARGS contains
107+ # URLREF_JAX=...#<commit> which changes every upstream push; including
108+ # it would cause a primary-key miss on every single run.
109+ echo "BAZEL_REPO_CACHE_KEY=bazel-repo-${ARCH}" >> $GITHUB_ENV
110+
111+ # Pass BAZEL_CACHE build-arg only when a remote URL is explicitly given;
112+ # otherwise the Dockerfile default (/cache/bazel-disk) applies.
113+ if [[ -n "${{ inputs.bazel-remote-cache-url }}" ]]; then
114+ echo "BAZEL_CACHE_BUILD_ARG=BAZEL_CACHE=${{ inputs.bazel-remote-cache-url }}" >> $GITHUB_ENV
115+ else
116+ echo "BAZEL_CACHE_BUILD_ARG=" >> $GITHUB_ENV
117+ fi
118+
119+ # Ensure the directory exists regardless of whether we restore a cache.
120+ # An empty directory fed via --build-context is equivalent to FROM scratch.
121+ - name : Prepare Bazel repo cache directory
122+ shell : bash
123+ run : mkdir -p /tmp/bazel-repo-cache
124+
125+ - name : Restore Bazel repo cache
126+ id : restore-bazel-repo-cache
127+ if : inputs.ENABLE_BAZEL_REPO_CACHE == 'true'
128+ uses : actions/cache/restore@v4
129+ with :
130+ path : /tmp/bazel-repo-cache
131+ # Key: arch-scoped + run_id so each successful run saves a fresh entry.
132+ # restore-keys falls back to the most recent cache for this arch,
133+ # giving a warm repo cache even when the exact SHA differs.
134+ key : ${{ env.BAZEL_REPO_CACHE_KEY }}-${{ github.run_id }}
135+ restore-keys : |
136+ ${{ env.BAZEL_REPO_CACHE_KEY }}-
137+
76138 - name : Setup SSH
77139 id : setup-ssh
78140 uses : ./.github/actions/setup-ssh
@@ -136,9 +198,23 @@ runs:
136198 "SSH_KNOWN_HOSTS=${{ steps.setup-ssh.outputs.known-hosts-file }}"
137199 build-args : |
138200 BASE_IMAGE=${{ inputs.BASE_IMAGE }}
139- BAZEL_CACHE=${{ inputs.bazel-remote-cache-url }}
140201 BUILD_DATE=${{ inputs.BUILD_DATE }}
202+ ${{ env.BAZEL_CACHE_BUILD_ARG }}
141203 ${{ inputs.EXTRA_BUILD_ARGS }}
204+ # Inject pre-restored Bazel repo cache as a named build context.
205+ # The Dockerfile declares `FROM scratch AS bazel-repo-seed`; passing this
206+ # context overrides that stage with the contents of the local directory.
207+ # When the directory is empty (cold start) the bind-mount is a no-op.
208+ build-contexts : |
209+ bazel-repo-seed=/tmp/bazel-repo-cache
210+ cache-from : |
211+ type=registry,ref=${{ env.CACHE_REF_MEALKIT_BRANCH }}
212+ type=registry,ref=${{ env.CACHE_REF_MEALKIT_MAIN }}
213+ # mode=max: cache ALL intermediate stages (builder, mealkit) so
214+ # full Bazel layer is reusable on the next run.
215+ # ignore-error=true: cache push failure must never kill the build.
216+ cache-to : type=registry,ref=${{ env.CACHE_REF_MEALKIT_BRANCH }},mode=max,oci-mediatypes=true,image-manifest=true,compression=zstd,ignore-error=true
217+
142218 # FINAL IMAGE BUILD
143219 - name : Set docker metadata - final
144220 id : final-metadata
@@ -169,9 +245,76 @@ runs:
169245 "SSH_KNOWN_HOSTS=${{ steps.setup-ssh.outputs.known-hosts-file }}"
170246 build-args : |
171247 BASE_IMAGE=${{ inputs.BASE_IMAGE }}
172- BAZEL_CACHE=${{ inputs.bazel-remote-cache-url }}
173248 BUILD_DATE=${{ inputs.BUILD_DATE }}
249+ ${{ env.BAZEL_CACHE_BUILD_ARG }}
174250 ${{ inputs.EXTRA_BUILD_ARGS }}
251+ build-contexts : |
252+ bazel-repo-seed=/tmp/bazel-repo-cache
253+ cache-from : |
254+ type=registry,ref=${{ env.CACHE_REF_FINAL_BRANCH }}
255+ type=registry,ref=${{ env.CACHE_REF_FINAL_MAIN }}
256+ type=registry,ref=${{ env.CACHE_REF_MEALKIT_BRANCH }}
257+ type=registry,ref=${{ env.CACHE_REF_MEALKIT_MAIN }}
258+ # mode=min: builder+mealkit layers are already cached under CACHE_REF_MEALKIT_BRANCH
259+ # (mode=max above). Only cache the final stage's unique layers here.
260+ cache-to : type=registry,ref=${{ env.CACHE_REF_FINAL_BRANCH }},mode=min,oci-mediatypes=true,image-manifest=true,compression=zstd,ignore-error=true
261+
262+ # BAZEL REPO CACHE EXPORT
263+ # This step extracts /cache/bazel-repo from the builder stage back to the
264+ # runner so it can be saved via actions/cache.
265+ #
266+ # It builds only the lightweight `bazel-repo-export` stage (FROM scratch +
267+ # COPY --from=builder), pulling the already-cached builder layer from the
268+ # registry. No Bazel re-invocation occurs.
269+ #
270+ # Cost: one registry pull of the builder layer + a local copy.
271+ # This is bounded (repo cache only, NOT disk cache) and will NOT trigger
272+ # the multi-hour `[cache-export] exporting to client directory` stall that
273+ # afflicted the old type=local full-cache-export path.
274+ - name : Export Bazel repo cache
275+ id : export-bazel-repo-cache
276+ if : inputs.ENABLE_BAZEL_REPO_CACHE == 'true' && steps.mealkit-build.outcome == 'success'
277+ shell : bash
278+ env :
279+ EXTRA_BUILD_ARGS : ${{ inputs.EXTRA_BUILD_ARGS }}
280+ run : |
281+ # Convert newline-separated build args into individual --build-arg flags
282+ BUILD_ARG_FLAGS=()
283+ while IFS= read -r line; do
284+ [[ -z "$line" ]] && continue
285+ BUILD_ARG_FLAGS+=(--build-arg "$line")
286+ done <<< "$EXTRA_BUILD_ARGS"
287+
288+ # Export to a staging dir to avoid a read/write collision:
289+ # the build reads /tmp/bazel-repo-cache via --build-context and
290+ # would corrupt the seed if we wrote to the same path concurrently.
291+ rm -rf /tmp/bazel-repo-cache-new
292+
293+ docker buildx build \
294+ --platform "linux/${{ inputs.ARCHITECTURE }}" \
295+ --file "${{ inputs.DOCKERFILE }}" \
296+ --target bazel-repo-export \
297+ --output "type=local,dest=/tmp/bazel-repo-cache-new" \
298+ --cache-from "type=registry,ref=${{ env.CACHE_REF_MEALKIT_BRANCH }}" \
299+ --cache-from "type=registry,ref=${{ env.CACHE_REF_MEALKIT_MAIN }}" \
300+ --build-arg "BASE_IMAGE=${{ inputs.BASE_IMAGE }}" \
301+ --build-arg "BUILD_DATE=${{ inputs.BUILD_DATE }}" \
302+ --build-context "bazel-repo-seed=/tmp/bazel-repo-cache" \
303+ "${BUILD_ARG_FLAGS[@]}" \
304+ "${{ inputs.DOCKER_CONTEXT }}"
305+
306+ # Swap staging into the canonical location for actions/cache/save
307+ rm -rf /tmp/bazel-repo-cache
308+ mv /tmp/bazel-repo-cache-new /tmp/bazel-repo-cache
309+
310+ - name : Save Bazel repo cache
311+ if : inputs.ENABLE_BAZEL_REPO_CACHE == 'true' && steps.export-bazel-repo-cache.outcome == 'success'
312+ uses : actions/cache/save@v4
313+ with :
314+ path : /tmp/bazel-repo-cache
315+ # Same key as restore: arch + run_id. Each successful run overwrites
316+ # the previous entry, keeping the cache fresh without unbounded growth.
317+ key : ${{ env.BAZEL_REPO_CACHE_KEY }}-${{ github.run_id }}
175318
176319 # SITREP GENERATION
177320 - name : Generate sitrep
0 commit comments