Skip to content

Commit 6d90ff6

Browse files
committed
v0.11.2
Signed-off-by: Javier <25750030+SystemPanic@users.noreply.github.com>
1 parent 8fe2262 commit 6d90ff6

File tree

13 files changed

+28
-29
lines changed

13 files changed

+28
-29
lines changed

CMakeLists.txt

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,8 @@ endif()
5656
# requirements.txt files and should be kept consistent. The ROCm torch
5757
# versions are derived from docker/Dockerfile.rocm
5858
#
59-
set(TORCH_SUPPORTED_VERSION_CUDA "2.7.1")
60-
set(TORCH_SUPPORTED_VERSION_ROCM "2.7.1")
59+
set(TORCH_SUPPORTED_VERSION_CUDA "2.8.0")
60+
set(TORCH_SUPPORTED_VERSION_ROCM "2.8.0")
6161

6262
#
6363
# Try to find python package with an executable that exactly matches
@@ -329,7 +329,6 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
329329
string(APPEND CMAKE_CXX_FLAGS "/Zc:__cplusplus")
330330
set(CMAKE_CUDA_FLAGS_DEBUG "")
331331
endif()
332-
>>>>>>> v0.11.1 RC
333332

334333
# Use the specified CUTLASS source directory for compilation if VLLM_CUTLASS_SRC_DIR is provided
335334
if (DEFINED ENV{VLLM_CUTLASS_SRC_DIR})

cmake/external_projects/triton_kernels.cmake

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Install OpenAI triton_kernels from https://github.com/triton-lang/triton/tree/main/python/triton_kernels
22

3-
set(DEFAULT_TRITON_KERNELS_TAG "v3.5.0")
3+
set(DEFAULT_TRITON_KERNELS_TAG "v3.4.0-windows.post20")
44

55
# Set TRITON_KERNELS_SRC_DIR for use with local development with vLLM. We expect TRITON_KERNELS_SRC_DIR to
66
# be directly set to the triton_kernels python directory.
@@ -12,12 +12,12 @@ if (DEFINED ENV{TRITON_KERNELS_SRC_DIR})
1212
)
1313

1414
else()
15-
set(TRITON_GIT "https://github.com/triton-lang/triton.git")
15+
set(TRITON_GIT "https://github.com/woct0rdho/triton-windows.git")
1616
message (STATUS "[triton_kernels] Fetch from ${TRITON_GIT}:${DEFAULT_TRITON_KERNELS_TAG}")
1717
FetchContent_Declare(
1818
triton_kernels
1919
# TODO (varun) : Fetch just the triton_kernels directory from Triton
20-
GIT_REPOSITORY https://github.com/triton-lang/triton.git
20+
GIT_REPOSITORY https://github.com/woct0rdho/triton-windows.git
2121
GIT_TAG ${DEFAULT_TRITON_KERNELS_TAG}
2222
GIT_PROGRESS TRUE
2323
SOURCE_SUBDIR python/triton_kernels/triton_kernels

csrc/attention/merge_attn_states.cu

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ __global__ void merge_attn_states_kernel(
4242

4343
float p_lse = prefix_lse[head_idx * num_tokens + token_idx];
4444
float s_lse = suffix_lse[head_idx * num_tokens + token_idx];
45-
p_lse = std::isinf(p_lse) ? -std::numeric_limits<float>::infinity() : p_lse;
46-
s_lse = std::isinf(s_lse) ? -std::numeric_limits<float>::infinity() : s_lse;
45+
p_lse = ::isinf(p_lse) ? -std::numeric_limits<float>::infinity() : p_lse;
46+
s_lse = ::isinf(s_lse) ? -std::numeric_limits<float>::infinity() : s_lse;
4747

4848
const float max_lse = fmaxf(p_lse, s_lse);
4949

@@ -55,7 +55,7 @@ __global__ void merge_attn_states_kernel(
5555
prefix_output (expected to be all zeros) and prefix_lse (-inf) to fix
5656
this problem.
5757
*/
58-
if (std::isinf(max_lse)) {
58+
if (::isinf(max_lse)) {
5959
if (pack_offset < head_size) {
6060
// Pack 128b load
6161
pack_128b_t p_out_pack = reinterpret_cast<const pack_128b_t*>(

csrc/fused_qknorm_rope_kernel.cu

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -56,17 +56,17 @@ template <typename T, int num>
5656
struct packed_as;
5757
// Specialization for packed_as used in this kernel.
5858
template <>
59-
struct packed_as<uint, 1> {
60-
using type = uint;
59+
struct packed_as<unsigned int, 1> {
60+
using type = unsigned int;
6161
};
6262

6363
template <>
64-
struct packed_as<uint, 2> {
64+
struct packed_as<unsigned int, 2> {
6565
using type = uint2;
6666
};
6767

6868
template <>
69-
struct packed_as<uint, 4> {
69+
struct packed_as<unsigned int, 4> {
7070
using type = uint4;
7171
};
7272

@@ -169,8 +169,8 @@ __global__ void fusedQKNormRopeKernel(
169169
"numSizeBytes must be a multiple of 4");
170170
constexpr int vecSize =
171171
elemSizeBytes /
172-
4; // Use packed_as<uint, vecSize> to perform loading/saving.
173-
using vec_T = typename tensorrt_llm::common::packed_as<uint, vecSize>::type;
172+
4; // Use packed_as<unsigned int, vecSize> to perform loading/saving.
173+
using vec_T = typename tensorrt_llm::common::packed_as<unsigned int, vecSize>::type;
174174

175175
int offsetWarp; // Offset for the warp
176176
if (isQ) {

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ requires = [
66
"packaging>=24.2",
77
"setuptools>=77.0.3,<81.0.0",
88
"setuptools-scm>=8.0",
9-
"torch == 2.9.0",
9+
"torch == 2.8.0",
1010
"wheel",
1111
"jinja2",
1212
]

requirements/build.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ ninja
44
packaging>=24.2
55
setuptools>=77.0.3,<81.0.0
66
setuptools-scm>=8
7-
torch==2.9.0
7+
torch==2.8.0
88
wheel
99
jinja2>=3.1.6
1010
regex

requirements/cpu-build.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ setuptools>=77.0.3,<81.0.0
55
setuptools-scm>=8
66
--extra-index-url https://download.pytorch.org/whl/cpu
77
torch==2.8.0+cpu; platform_machine == "x86_64" or platform_machine == "s390x"
8-
torch==2.9.0; platform_system == "Darwin"
8+
torch==2.8.0; platform_system == "Darwin"
99
torch==2.8.0; platform_machine == "ppc64le" or platform_machine == "aarch64"
1010
scons; platform_machine == "aarch64" # needed to build Arm Compute Library (ACL)
1111
wheel

requirements/cpu.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ packaging>=24.2
88
setuptools>=77.0.3,<81.0.0
99
--extra-index-url https://download.pytorch.org/whl/cpu
1010
torch==2.8.0+cpu; platform_machine == "x86_64" or platform_machine == "s390x"
11-
torch==2.9.0; platform_system == "Darwin"
11+
torch==2.8.0; platform_system == "Darwin"
1212
torch==2.8.0; platform_machine == "ppc64le" or platform_machine == "aarch64"
1313

1414
# required for the image processor of minicpm-o-2_6, this must be updated alongside torch

requirements/cuda.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ numba == 0.61.2 # Required for N-gram speculative decoding
55

66
# Dependencies for NVIDIA GPUs
77
ray[cgraph]>=2.48.0 # Ray Compiled Graph, required for pipeline parallelism in V1.
8-
torch==2.9.0
9-
torchaudio==2.9.0
8+
torch==2.8.0
9+
torchaudio==2.8.0
1010
# These must be updated alongside torch
1111
torchvision==0.24.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
1212
xformers==0.0.33.post1; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch >= 2.9

requirements/rocm-build.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22
-r common.txt
33

44
--extra-index-url https://download.pytorch.org/whl/rocm6.4
5-
torch==2.9.0
5+
torch==2.8.0
66
torchvision==0.24.0
7-
torchaudio==2.9.0
7+
torchaudio==2.8.0
88

99
triton==3.5.0
1010
cmake>=3.26.1,<4

0 commit comments

Comments
 (0)