File tree Expand file tree Collapse file tree 1 file changed +35
-1
lines changed Expand file tree Collapse file tree 1 file changed +35
-1
lines changed Original file line number Diff line number Diff line change @@ -102,6 +102,8 @@ agents:
102102 queue: cpu_queue_premerge_us_east_1
103103 {% elif step .gpu == "a100" %}
104104 queue: a100_queue
105+ {% elif step .gpu == "h100" %}
106+ queue: mithril-h100-pool
105107 {% elif step .gpu == "h200" %}
106108 queue: skylab-h200
107109 {% elif step .gpu == "b200" %}
@@ -130,7 +132,7 @@ retry:
130132
131133{% if step .num_nodes < 2 %}
132134plugins:
133- {% if step .gpu != "a100" and step .gpu != "h200" and step .gpu != "b200" %}
135+ {% if step .gpu != "a100" and step .gpu != "h100" and step . gpu != " h200" and step .gpu != "b200" %}
134136 - docker#v5.2.0:
135137 image: {{ image }}
136138 always-pull: true
@@ -207,6 +209,38 @@ plugins:
207209 - /dev/shm:/dev/shm
208210 - /data/benchmark-hf-cache:/benchmark-hf-cache
209211 - /data/benchmark-vllm-cache:/root/.cache/vllm
212+ {% elif step .gpu == "h100" %}
213+ - kubernetes:
214+ podSpec:
215+ containers:
216+ - image: {{ image }}
217+ command:
218+ - bash -c "{{ '(command nvidia-smi || true) && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 && cd ' ~ ((step.working_dir or default_working_dir) | safe) ~ ' && ' ~ (step.command or (step.commands | join(" && ")) | safe) }}"
219+ resources:
220+ limits:
221+ nvidia.com/gpu: {{ step.num_gpus or 1 }}
222+ volumeMounts:
223+ - name: devshm
224+ mountPath: /dev/shm
225+ - name: hf-cache
226+ mountPath: {{ hf_home }}
227+ env:
228+ - name: VLLM_USAGE_SOURCE
229+ value: ci-test
230+ - name: NCCL_CUMEM_HOST_ENABLE
231+ value: "0"
232+ - name: HF_HOME
233+ value: {{ hf_home }}
234+ nodeSelector:
235+ nvidia.com/gpu.product: NVIDIA-H100-80GB-HBM3
236+ volumes:
237+ - name: devshm
238+ emptyDir:
239+ medium: Memory
240+ - name: hf-cache
241+ hostPath:
242+ path: /mnt/hf-cache
243+ type: Directory
210244 {% else %}
211245 - kubernetes:
212246 podSpec:
You can’t perform that action at this time.
0 commit comments