diff --git a/vllm/v1/worker/gpu/block_table.py b/vllm/v1/worker/gpu/block_table.py index ff24e88ede2c..b31e9b179d26 100644 --- a/vllm/v1/worker/gpu/block_table.py +++ b/vllm/v1/worker/gpu/block_table.py @@ -3,10 +3,9 @@ from collections.abc import Iterable import torch -import triton -import triton.language as tl from vllm.attention.backends.utils import PAD_SLOT_ID +from vllm.triton_utils import tl, triton from vllm.utils.math_utils import cdiv from vllm.v1.utils import CpuGpuBuffer diff --git a/vllm/v1/worker/gpu/input_batch.py b/vllm/v1/worker/gpu/input_batch.py index 89f375649146..8313b32d2979 100644 --- a/vllm/v1/worker/gpu/input_batch.py +++ b/vllm/v1/worker/gpu/input_batch.py @@ -7,9 +7,8 @@ import numba.types as types import numpy as np import torch -import triton -import triton.language as tl +from vllm.triton_utils import tl, triton from vllm.utils import random_uuid from vllm.utils.math_utils import cdiv from vllm.v1.utils import CpuGpuBuffer diff --git a/vllm/v1/worker/gpu/sampler.py b/vllm/v1/worker/gpu/sampler.py index 55f98ca6bb6a..e78711d18fe4 100644 --- a/vllm/v1/worker/gpu/sampler.py +++ b/vllm/v1/worker/gpu/sampler.py @@ -3,10 +3,9 @@ from collections.abc import Callable import torch -import triton -import triton.language as tl from vllm.config.model import LogprobsMode +from vllm.triton_utils import tl, triton from vllm.v1.outputs import LogprobsTensors, SamplerOutput from vllm.v1.sample.ops.topk_topp_sampler import apply_top_k_top_p from vllm.v1.worker.gpu.states import SamplingMetadata