File tree Expand file tree Collapse file tree 3 files changed +7
-4
lines changed
.buildkite/scripts/hardware_ci Expand file tree Collapse file tree 3 files changed +7
-4
lines changed Original file line number Diff line number Diff line change @@ -42,7 +42,7 @@ docker run \
42
42
pytest -v -s v1/sample --ignore=v1/sample/test_logprobs.py --ignore=v1/sample/test_logprobs_e2e.py
43
43
pytest -v -s v1/worker --ignore=v1/worker/test_gpu_model_runner.py
44
44
pytest -v -s v1/structured_output
45
- pytest -v -s v1/spec_decode --ignore=v1/spec_decode/test_max_len.py --ignore=v1/spec_decode/test_eagle.py --ignore=v1/spec_decode/ test_tree_attention.py
45
+ pytest -v -s v1/spec_decode --ignore=v1/spec_decode/test_max_len.py --ignore=v1/spec_decode/test_tree_attention.py
46
46
pytest -v -s v1/kv_connector/unit --ignore=v1/kv_connector/unit/test_multi_connector.py --ignore=v1/kv_connector/unit/test_nixl_connector.py --ignore=v1/kv_connector/unit/test_shared_storage_connector.py
47
47
pytest -v -s v1/test_metrics
48
48
pytest -v -s v1/test_serial_utils.py
Original file line number Diff line number Diff line change @@ -1143,6 +1143,8 @@ def get_attn_backend_list_based_on_platform() -> list[str]:
1143
1143
print ("Skip FLASH_ATTN on ROCm as aiter is not installed" )
1144
1144
1145
1145
return attn_backend_list
1146
+ elif current_platform .is_xpu ():
1147
+ return ["FLASH_ATTN" , "TRITON_ATTN" ]
1146
1148
else :
1147
1149
raise ValueError ("Unsupported platform" )
1148
1150
Original file line number Diff line number Diff line change @@ -72,12 +72,13 @@ def __init__(
72
72
73
73
self .attn_metadata_builder : Optional [AttentionMetadataBuilder ] = None
74
74
75
- self .use_cuda_graph = (self .vllm_config .compilation_config .level
75
+ self .use_cuda_graph = (not current_platform .is_xpu ()
76
+ and self .vllm_config .compilation_config .level
76
77
== CompilationLevel .PIECEWISE and
77
78
not self .vllm_config .model_config .enforce_eager )
78
79
self .cudagraph_batch_sizes = list (
79
- reversed (
80
- self . vllm_config . compilation_config . cudagraph_capture_sizes ))
80
+ reversed (self . vllm_config . compilation_config .
81
+ cudagraph_capture_sizes )) if self . use_cuda_graph else []
81
82
82
83
# persistent buffers for cuda graph
83
84
self .input_ids = torch .zeros (self .max_num_tokens ,
You can’t perform that action at this time.
0 commit comments