Skip to content

Commit 67ed978

Browse files
feat: Llama-4-Maverick-16B-128E multihost serving recipe
1 parent 1869642 commit 67ed978

File tree

10 files changed

+824
-2
lines changed

10 files changed

+824
-2
lines changed

inference/trillium/JetStream-Maxtext/Llama-4-Maverick-17B-128E/README.md

Lines changed: 368 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
FROM ubuntu:22.04
16+
17+
ENV DEBIAN_FRONTEND=noninteractive
18+
19+
RUN apt -y update && apt install -y --no-install-recommends apt-transport-https ca-certificates gnupg git python3.10 python3-pip curl nano vim
20+
21+
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1
22+
RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | gpg --dearmor -o /usr/share/keyrings/cloud.google.gpg && apt-get update -y && apt-get install google-cloud-sdk -y
23+
24+
RUN python3 -m pip install --upgrade pip
25+
26+
ENV JAX_PLATFORMS=proxy
27+
ENV JAX_BACKEND_TARGET=grpc://localhost:38681
28+
ENV XCLOUD_ENVIRONMENT=GCP
29+
30+
ENV MAXTEXT_VERSION=main
31+
ENV JETSTREAM_VERSION=main
32+
33+
RUN git clone https://github.com/AI-Hypercomputer/JetStream.git && \
34+
git clone https://github.com/AI-Hypercomputer/maxtext.git
35+
36+
RUN cd maxtext/ && \
37+
git checkout ${MAXTEXT_VERSION} && \
38+
bash setup.sh
39+
40+
RUN cd /JetStream && \
41+
git checkout ${JETSTREAM_VERSION} && \
42+
pip install -e .
43+
44+
RUN pip install setuptools fastapi uvicorn
45+
46+
RUN apt -y update && apt-get -y install python3-dev && apt-get -y install build-essential
47+
48+
ENTRYPOINT [ "/bin/bash" ]
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
steps:
16+
- name: 'gcr.io/cloud-builders/docker'
17+
args:
18+
- 'build'
19+
- '--tag=${_ARTIFACT_REGISTRY}/${_JETSTREAM_MAXTEXT_IMAGE}:${_JETSTREAM_MAXTEXT_VERSION}'
20+
- '--file=Dockerfile'
21+
- '.'
22+
automapSubstitutions: true
23+
24+
images:
25+
- '${_ARTIFACT_REGISTRY}/${_JETSTREAM_MAXTEXT_IMAGE}:${_JETSTREAM_MAXTEXT_VERSION}'
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
apiVersion: v2
16+
name: trillium-maxtext-jetstream-llama-serve-model
17+
description: trillium-maxtext-jetstream-llama-serve-model
18+
type: application
19+
version: 0.1.0
20+
appVersion: "1.16.0"
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
{{- if .Values.convert_hf_ckpt }}
2+
{{- $root := . }}
3+
4+
apiVersion: batch/v1
5+
kind: Job
6+
metadata:
7+
name: {{ .Release.Name }}-convert-ckpt
8+
labels:
9+
app: {{ .Release.Name }}-convert-ckpt
10+
app.kubernetes.io/instance: {{ .Release.Name }}
11+
app.kubernetes.io/managed-by: {{ .Release.Service }}
12+
spec:
13+
template:
14+
metadata:
15+
labels:
16+
app: {{ .Release.Name }}-convert-ckpt
17+
app.kubernetes.io/instance: {{ .Release.Name }}
18+
spec:
19+
restartPolicy: OnFailure
20+
affinity:
21+
nodeAffinity:
22+
requiredDuringSchedulingIgnoredDuringExecution:
23+
nodeSelectorTerms:
24+
- matchExpressions:
25+
- key: cloud.google.com/machine-family
26+
operator: In
27+
values:
28+
- n2
29+
- key: node.kubernetes.io/instance-type
30+
operator: In
31+
values:
32+
- n2-highmem-80
33+
34+
volumes:
35+
- name: shared-memory
36+
emptyDir:
37+
medium: "Memory"
38+
sizeLimit: 50Gi
39+
- name: local-ssd
40+
hostPath:
41+
path: /mnt/stateful_partition/kube-ephemeral-ssd
42+
43+
containers:
44+
- name: convert-model
45+
image: "{{ .Values.job.image.repository }}:{{ .Values.job.image.tag }}"
46+
imagePullPolicy: Always
47+
securityContext:
48+
privileged: true
49+
50+
env:
51+
- name: HF_TOKEN
52+
valueFrom:
53+
secretKeyRef:
54+
name: "{{ .Values.huggingface.secretName }}"
55+
key: "{{ .Values.huggingface.secretData.token }}"
56+
- name: BASE_MODEL_PATH
57+
value: "/ssd/{{ .Values.model.name }}"
58+
- name: CHECKPOINT_TPU_UNSCANNED
59+
value: "/ssd/{{ .Values.model.name }}/output/unscanned_ckpt/checkpoints"
60+
- name: MODEL_SIZE
61+
value: "llama4-17b-128e"
62+
{{- range $gcs := $root.Values.volumes.gcsMounts }}
63+
- name: GCS_FUSE_BUCKET
64+
value: "{{ $gcs.bucketName }}"
65+
{{- end }}
66+
67+
workingDir: /workspace
68+
command: ["/bin/bash", "-c"]
69+
args:
70+
- |
71+
set -eux
72+
73+
echo "Starting conversion job on a CPU node..."
74+
df -h
75+
76+
pip install torch --index-url https://download.pytorch.org/whl/cpu
77+
pip install huggingface_hub[hf_xet]
78+
79+
rm -rf /ssd/{{ .Values.model.name }}
80+
81+
82+
echo "Downloading HuggingFace model to ${BASE_MODEL_PATH}..."
83+
84+
huggingface-cli download {{ .Values.model.name }} --local-dir ${BASE_MODEL_PATH} --local-dir-use-symlinks False
85+
86+
# Rename weights to expected format
87+
88+
echo "Rename weights in ${BASE_MODEL_PATH}"
89+
found_files=0
90+
for old_filepath in "${BASE_MODEL_PATH}"/consolidated.??.pth; do
91+
if [ -f "$old_filepath" ]; then
92+
found_files=$((found_files + 1))
93+
old_filename=$(basename "$old_filepath")
94+
new_filename=$(echo "$old_filename" | sed 's/^consolidated/llama4-17b-128e/')
95+
new_filepath="${BASE_MODEL_PATH}/${new_filename}"
96+
97+
if [ "$old_filepath" != "$new_filepath" ]; then
98+
mv -v "$old_filepath" "$new_filepath"
99+
echo "Renamed $old_filepath to $new_filepath"
100+
else
101+
echo "File already named correctly: $old_filepath"
102+
fi
103+
fi
104+
done
105+
if [ "$found_files" -eq 0 ]; then
106+
echo "No consolidated.*.pth files found for renaming in ${BASE_MODEL_PATH}."
107+
fi
108+
109+
echo "Starting GCSFuse Mount"
110+
mkdir -p /gcs
111+
gcsfuse --client-protocol http2 ${GCS_FUSE_BUCKET} /gcs
112+
113+
echo "Running MaxText checkpoint conversion (on CPU node)..."
114+
cd /maxtext || exit 1
115+
JAX_PLATFORMS=cpu python3 -m MaxText.llama4_ckpt_unscanned \
116+
--base-model-path ${BASE_MODEL_PATH} \
117+
--maxtext-model-path ${CHECKPOINT_TPU_UNSCANNED} \
118+
--model-size ${MODEL_SIZE}
119+
120+
gsutil -m cp -r ${CHECKPOINT_TPU_UNSCANNED} /gcs/{{ .Values.model.name }}/output/unscanned_ckpt/checkpoints/
121+
122+
echo "Conversion Job Complete. Unscanned checkpoints should be at ${CHECKPOINT_TPU_UNSCANNED}"
123+
124+
volumeMounts:
125+
- name: shared-memory
126+
mountPath: /dev/shm
127+
- name: local-ssd
128+
mountPath: {{ $root.Values.volumes.ssdMountPath }}
129+
backoffLimit: 1
130+
{{- end }}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
apiVersion: v2
16+
name: trillium-maxtext-jetstream-llama-serve-model
17+
description: trillium-maxtext-jetstream-llama-serve-model
18+
type: application
19+
version: 0.1.0
20+
appVersion: "1.16.0"
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
apiVersion: v1
16+
kind: ConfigMap
17+
metadata:
18+
name: "{{ .Release.Name }}"
19+
data:
20+
maxtext-configuration.yaml: |-
21+
{{- range $key, $value := .Values.maxtext_config }}
22+
{{ $key }}: {{ $value }}
23+
{{- end }}
24+
25+
libtpu-init-args: |-
26+
{{- range $key, $value := .Values.xla_flags }}
27+
--{{ $key }}={{ $value }}
28+
{{- end }}

0 commit comments

Comments
 (0)