File tree Expand file tree Collapse file tree 1 file changed +25
-0
lines changed Expand file tree Collapse file tree 1 file changed +25
-0
lines changed Original file line number Diff line number Diff line change
1
+ #! /bin/bash
2
+
3
+ # This script build the GH200 docker image and run the offline inference inside the container.
4
+ # It serves a sanity check for compilation and basic model usage.
5
+ set -ex
6
+
7
+ # Try building the docker image
8
+ DOCKER_BUILDKIT=1 docker build . \
9
+ --target test \
10
+ -platform " linux/arm64" \
11
+ -t gh200-test \
12
+ --build-arg max_jobs=66 \
13
+ --build-arg nvcc_threads=2 \
14
+ --build-arg torch_cuda_arch_list=" 9.0+PTX" \
15
+ --build-arg vllm_fa_cmake_gpu_arches=" 90-real"
16
+
17
+ # Setup cleanup
18
+ remove_docker_container () { docker rm -f gh200-test || true ; }
19
+ trap remove_docker_container EXIT
20
+ remove_docker_container
21
+
22
+ # Run the image and test offline inference
23
+ docker run --name gh200-test --gpus=all --entrypoint=" " gh200-test bash -c '
24
+ python3 examples/offline_inference.py
25
+ '
You can’t perform that action at this time.
0 commit comments