File tree Expand file tree Collapse file tree 2 files changed +5
-7
lines changed Expand file tree Collapse file tree 2 files changed +5
-7
lines changed Original file line number Diff line number Diff line change @@ -28,15 +28,16 @@ docker run --privileged --net host --shm-size=16G -it \
28
28
&& echo TEST_3 \
29
29
&& pytest -v -s /workspace/vllm/tests/entrypoints/llm/test_accuracy.py::test_lm_eval_accuracy_v1_engine \
30
30
&& echo TEST_4 \
31
- && pytest -s -v /workspace/vllm/tests/tpu/test_quantization_accuracy.py \
32
- && echo TEST_5 \
33
31
&& python3 /workspace/vllm/examples/offline_inference/tpu.py \
34
- && echo TEST_6 \
32
+ && echo TEST_5 \
35
33
&& pytest -s -v /workspace/vllm/tests/tpu/worker/test_tpu_model_runner.py \
36
- && echo TEST_7 \
34
+ && echo TEST_6 \
37
35
&& pytest -s -v /workspace/vllm/tests/v1/tpu/test_sampler.py" \
38
36
39
37
40
38
# TODO: This test fails because it uses RANDOM_SEED sampling
41
39
# && VLLM_USE_V1=1 pytest -v -s /workspace/vllm/tests/tpu/test_custom_dispatcher.py \
42
40
41
+ # TODO: Re-enable this after fixing recompilation in quantization.
42
+ # && echo TEST_4 \
43
+ # && pytest -s -v /workspace/vllm/tests/tpu/test_quantization_accuracy.py \
Original file line number Diff line number Diff line change 31
31
reason = "This is a basic test for TPU only" )
32
32
@pytest .mark .parametrize ("model" , MODELS )
33
33
@pytest .mark .parametrize ("max_tokens" , [5 ])
34
- @pytest .mark .parametrize ("enforce_eager" , [True ])
35
34
@pytest .mark .parametrize ("tensor_parallel_size" , TENSOR_PARALLEL_SIZES )
36
35
def test_models (
37
36
vllm_runner : type [VllmRunner ],
38
37
monkeypatch : pytest .MonkeyPatch ,
39
38
model : str ,
40
39
max_tokens : int ,
41
- enforce_eager : bool ,
42
40
tensor_parallel_size : int ,
43
41
) -> None :
44
42
prompt = "The next numbers of the sequence " + ", " .join (
@@ -51,7 +49,6 @@ def test_models(
51
49
with vllm_runner (
52
50
model ,
53
51
max_model_len = 8192 ,
54
- enforce_eager = enforce_eager ,
55
52
gpu_memory_utilization = 0.7 ,
56
53
max_num_seqs = 16 ,
57
54
tensor_parallel_size = tensor_parallel_size ) as vllm_model :
You can’t perform that action at this time.
0 commit comments