@@ -8,67 +8,65 @@ set -ex
88CORE_RANGE=${CORE_RANGE:- 48-95}
99NUMA_NODE=${NUMA_NODE:- 1}
1010
11+ export CMAKE_BUILD_PARALLEL_LEVEL=32
12+
1113# Setup cleanup
1214remove_docker_container () {
1315 set -e;
14- docker rm -f cpu-test-" $BUILDKITE_BUILD_NUMBER " -" $NUMA_NODE " cpu-test-" $BUILDKITE_BUILD_NUMBER " -avx2-" $NUMA_NODE " || true ;
15- docker image rm cpu-test-" $BUILDKITE_BUILD_NUMBER " cpu-test-" $BUILDKITE_BUILD_NUMBER " -avx2 || true ;
16+ docker rm -f cpu-test-" $NUMA_NODE " cpu-test-" $NUMA_NODE " -avx2 || true ;
1617}
1718trap remove_docker_container EXIT
1819remove_docker_container
1920
2021# Try building the docker image
21- numactl -C " $CORE_RANGE " -N " $NUMA_NODE " docker build --tag cpu-test-" $BUILDKITE_BUILD_NUMBER " --target vllm-test -f docker/Dockerfile.cpu .
22- numactl -C " $CORE_RANGE " -N " $NUMA_NODE " docker build --build-arg VLLM_CPU_DISABLE_AVX512=" true" --tag cpu-test-" $BUILDKITE_BUILD_NUMBER " -avx2 --target vllm-test -f docker/Dockerfile.cpu .
22+ numactl -C " $CORE_RANGE " -N " $NUMA_NODE " docker build --tag cpu-test-" $NUMA_NODE " --target vllm-test -f docker/Dockerfile.cpu .
23+ numactl -C " $CORE_RANGE " -N " $NUMA_NODE " docker build --build-arg VLLM_CPU_DISABLE_AVX512=" true" --tag cpu-test-" $NUMA_NODE " -avx2 --target vllm-test -f docker/Dockerfile.cpu .
2324
2425# Run the image, setting --shm-size=4g for tensor parallel.
2526docker run -itd --entrypoint /bin/bash -v ~ /.cache/huggingface:/root/.cache/huggingface --cpuset-cpus=" $CORE_RANGE " \
26- --cpuset-mems=" $NUMA_NODE " --privileged=true -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=4 --shm-size=4g --name cpu-test-" $BUILDKITE_BUILD_NUMBER " - " $ NUMA_NODE" cpu-test-" $BUILDKITE_BUILD_NUMBER "
27+ --cpuset-mems=" $NUMA_NODE " --privileged=true -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=4 --shm-size=4g --name cpu-test-" $NUMA_NODE " cpu-test-" $NUMA_NODE "
2728docker run -itd --entrypoint /bin/bash -v ~ /.cache/huggingface:/root/.cache/huggingface --cpuset-cpus=" $CORE_RANGE " \
28- --cpuset-mems=" $NUMA_NODE " --privileged=true -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=4 --shm-size=4g --name cpu-test-" $BUILDKITE_BUILD_NUMBER " -avx2- " $NUMA_NODE " cpu-test-" $BUILDKITE_BUILD_NUMBER " -avx2
29+ --cpuset-mems=" $NUMA_NODE " --privileged=true -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=4 --shm-size=4g --name cpu-test-" $NUMA_NODE " -avx2 cpu-test-" $NUMA_NODE " -avx2
2930
3031function cpu_tests() {
3132 set -e
3233 export NUMA_NODE=$2
33- export BUILDKITE_BUILD_NUMBER=$3
3434
3535 # offline inference
36- docker exec cpu-test-" $BUILDKITE_BUILD_NUMBER " -avx2- " $NUMA_NODE " bash -c "
36+ docker exec cpu-test-" $NUMA_NODE " -avx2 bash -c "
3737 set -e
3838 python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m"
3939
4040 # Run basic model test
41- docker exec cpu-test-" $BUILDKITE_BUILD_NUMBER " - " $ NUMA_NODE" bash -c "
41+ docker exec cpu-test-" $NUMA_NODE " bash -c "
4242 set -e
43- pytest -v -s tests/kernels/test_cache.py -m cpu_model
44- pytest -v -s tests/kernels/test_mla_decode_cpu.py -m cpu_model
45- pytest -v -s tests/models/decoder_only/language -m cpu_model
46- pytest -v -s tests/models/embedding/language -m cpu_model
47- pytest -v -s tests/models/encoder_decoder/language -m cpu_model
48- pytest -v -s tests/models/decoder_only/audio_language -m cpu_model
49- pytest -v -s tests/models/decoder_only/vision_language -m cpu_model"
43+ pytest -v -s tests/kernels/attention/test_cache.py -m cpu_model
44+ pytest -v -s tests/kernels/attention/test_mla_decode_cpu.py -m cpu_model
45+ pytest -v -s tests/models/language/generation -m cpu_model
46+ pytest -v -s tests/models/language/pooling -m cpu_model
47+ pytest -v -s tests/models/multimodal/generation --ignore=tests/models/multimodal/generation/test_mllama.py -m cpu_model"
5048
5149 # Run compressed-tensor test
52- docker exec cpu-test-" $BUILDKITE_BUILD_NUMBER " - " $ NUMA_NODE" bash -c "
50+ docker exec cpu-test-" $NUMA_NODE " bash -c "
5351 set -e
5452 pytest -s -v \
5553 tests/quantization/test_compressed_tensors.py::test_compressed_tensors_w8a8_static_setup \
5654 tests/quantization/test_compressed_tensors.py::test_compressed_tensors_w8a8_dynamic_per_token"
5755
5856 # Run AWQ test
59- docker exec cpu-test-" $BUILDKITE_BUILD_NUMBER " - " $ NUMA_NODE" bash -c "
57+ docker exec cpu-test-" $NUMA_NODE " bash -c "
6058 set -e
6159 pytest -s -v \
6260 tests/quantization/test_ipex_quant.py"
6361
6462 # Run chunked-prefill and prefix-cache test
65- docker exec cpu-test-" $BUILDKITE_BUILD_NUMBER " - " $ NUMA_NODE" bash -c "
63+ docker exec cpu-test-" $NUMA_NODE " bash -c "
6664 set -e
6765 pytest -s -v -k cpu_model \
6866 tests/basic_correctness/test_chunked_prefill.py"
6967
7068 # online serving
71- docker exec cpu-test-" $BUILDKITE_BUILD_NUMBER " - " $ NUMA_NODE" bash -c "
69+ docker exec cpu-test-" $NUMA_NODE " bash -c "
7270 set -e
7371 export VLLM_CPU_KVCACHE_SPACE=10
7472 export VLLM_CPU_OMP_THREADS_BIND=$1
@@ -83,12 +81,12 @@ function cpu_tests() {
8381 --tokenizer facebook/opt-125m"
8482
8583 # Run multi-lora tests
86- docker exec cpu-test-" $BUILDKITE_BUILD_NUMBER " - " $ NUMA_NODE" bash -c "
84+ docker exec cpu-test-" $NUMA_NODE " bash -c "
8785 set -e
8886 pytest -s -v \
8987 tests/lora/test_qwen2vl.py"
9088}
9189
9290# All of CPU tests are expected to be finished less than 40 mins.
9391export -f cpu_tests
94- timeout 40m bash -c " cpu_tests $CORE_RANGE $NUMA_NODE $BUILDKITE_BUILD_NUMBER "
92+ timeout 40m bash -c " cpu_tests $CORE_RANGE $NUMA_NODE "
0 commit comments