v0.11.2

SystemPanic · SystemPanic · commit 6d90ff6196ae · 2025-11-24T14:49:14.000-05:00
Signed-off-by: Javier &lt;25750030+SystemPanic@users.noreply.github.com&gt;
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -56,8 +56,8 @@ endif()
 # requirements.txt files and should be kept consistent.  The ROCm torch
 # versions are derived from docker/Dockerfile.rocm
 #
-set(TORCH_SUPPORTED_VERSION_CUDA "2.7.1")
-set(TORCH_SUPPORTED_VERSION_ROCM "2.7.1")
+set(TORCH_SUPPORTED_VERSION_CUDA "2.8.0")
+set(TORCH_SUPPORTED_VERSION_ROCM "2.8.0")
 
 #
 # Try to find python package with an executable that exactly matches
@@ -329,7 +329,6 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
 	string(APPEND CMAKE_CXX_FLAGS "/Zc:__cplusplus")
 	set(CMAKE_CUDA_FLAGS_DEBUG "")
   endif()
->>>>>>> v0.11.1 RC
 
   # Use the specified CUTLASS source directory for compilation if VLLM_CUTLASS_SRC_DIR is provided
   if (DEFINED ENV{VLLM_CUTLASS_SRC_DIR})
diff --git a/cmake/external_projects/triton_kernels.cmake b/cmake/external_projects/triton_kernels.cmake
@@ -1,6 +1,6 @@
 # Install OpenAI triton_kernels from https://github.com/triton-lang/triton/tree/main/python/triton_kernels
 
-set(DEFAULT_TRITON_KERNELS_TAG "v3.5.0")
+set(DEFAULT_TRITON_KERNELS_TAG "v3.4.0-windows.post20")
 
 # Set TRITON_KERNELS_SRC_DIR for use with local development with vLLM. We expect TRITON_KERNELS_SRC_DIR to
 # be directly set to the triton_kernels python directory. 
@@ -12,12 +12,12 @@ if (DEFINED ENV{TRITON_KERNELS_SRC_DIR})
   )
 
 else()
-  set(TRITON_GIT "https://github.com/triton-lang/triton.git")
+  set(TRITON_GIT "https://github.com/woct0rdho/triton-windows.git")
   message (STATUS "[triton_kernels] Fetch from ${TRITON_GIT}:${DEFAULT_TRITON_KERNELS_TAG}")
   FetchContent_Declare(
           triton_kernels
           # TODO (varun) : Fetch just the triton_kernels directory from Triton
-          GIT_REPOSITORY https://github.com/triton-lang/triton.git
+          GIT_REPOSITORY https://github.com/woct0rdho/triton-windows.git
           GIT_TAG ${DEFAULT_TRITON_KERNELS_TAG}
           GIT_PROGRESS TRUE
           SOURCE_SUBDIR python/triton_kernels/triton_kernels
diff --git a/csrc/attention/merge_attn_states.cu b/csrc/attention/merge_attn_states.cu
@@ -42,8 +42,8 @@ __global__ void merge_attn_states_kernel(
 
   float p_lse = prefix_lse[head_idx * num_tokens + token_idx];
   float s_lse = suffix_lse[head_idx * num_tokens + token_idx];
-  p_lse = std::isinf(p_lse) ? -std::numeric_limits<float>::infinity() : p_lse;
-  s_lse = std::isinf(s_lse) ? -std::numeric_limits<float>::infinity() : s_lse;
+  p_lse = ::isinf(p_lse) ? -std::numeric_limits<float>::infinity() : p_lse;
+  s_lse = ::isinf(s_lse) ? -std::numeric_limits<float>::infinity() : s_lse;
 
   const float max_lse = fmaxf(p_lse, s_lse);
 
@@ -55,7 +55,7 @@ __global__ void merge_attn_states_kernel(
      prefix_output (expected to be all zeros) and prefix_lse (-inf) to fix
      this problem.
   */
-  if (std::isinf(max_lse)) {
+  if (::isinf(max_lse)) {
     if (pack_offset < head_size) {
       // Pack 128b load
       pack_128b_t p_out_pack = reinterpret_cast<const pack_128b_t*>(
diff --git a/csrc/fused_qknorm_rope_kernel.cu b/csrc/fused_qknorm_rope_kernel.cu
@@ -56,17 +56,17 @@ template <typename T, int num>
 struct packed_as;
 // Specialization for packed_as used in this kernel.
 template <>
-struct packed_as<uint, 1> {
-  using type = uint;
+struct packed_as<unsigned int, 1> {
+  using type = unsigned int;
 };
 
 template <>
-struct packed_as<uint, 2> {
+struct packed_as<unsigned int, 2> {
   using type = uint2;
 };
 
 template <>
-struct packed_as<uint, 4> {
+struct packed_as<unsigned int, 4> {
   using type = uint4;
 };
 
@@ -169,8 +169,8 @@ __global__ void fusedQKNormRopeKernel(
                   "numSizeBytes must be a multiple of 4");
     constexpr int vecSize =
         elemSizeBytes /
-        4;  // Use packed_as<uint, vecSize> to perform loading/saving.
-    using vec_T = typename tensorrt_llm::common::packed_as<uint, vecSize>::type;
+        4;  // Use packed_as<unsigned int, vecSize> to perform loading/saving.
+    using vec_T = typename tensorrt_llm::common::packed_as<unsigned int, vecSize>::type;
 
     int offsetWarp;  // Offset for the warp
     if (isQ) {
diff --git a/pyproject.toml b/pyproject.toml
@@ -6,7 +6,7 @@ requires = [
     "packaging>=24.2",
     "setuptools>=77.0.3,<81.0.0",
     "setuptools-scm>=8.0",
-    "torch == 2.9.0",
+    "torch == 2.8.0",
     "wheel",
     "jinja2",
 ]
diff --git a/requirements/build.txt b/requirements/build.txt
@@ -4,7 +4,7 @@ ninja
 packaging>=24.2
 setuptools>=77.0.3,<81.0.0
 setuptools-scm>=8
-torch==2.9.0
+torch==2.8.0
 wheel
 jinja2>=3.1.6
 regex
diff --git a/requirements/cpu-build.txt b/requirements/cpu-build.txt
@@ -5,7 +5,7 @@ setuptools>=77.0.3,<81.0.0
 setuptools-scm>=8
 --extra-index-url https://download.pytorch.org/whl/cpu
 torch==2.8.0+cpu; platform_machine == "x86_64" or platform_machine == "s390x"
-torch==2.9.0; platform_system == "Darwin"
+torch==2.8.0; platform_system == "Darwin"
 torch==2.8.0; platform_machine == "ppc64le" or platform_machine == "aarch64"
 scons; platform_machine == "aarch64"    # needed to build Arm Compute Library (ACL)
 wheel
diff --git a/requirements/cpu.txt b/requirements/cpu.txt
@@ -8,7 +8,7 @@ packaging>=24.2
 setuptools>=77.0.3,<81.0.0
 --extra-index-url https://download.pytorch.org/whl/cpu
 torch==2.8.0+cpu; platform_machine == "x86_64" or platform_machine == "s390x"
-torch==2.9.0; platform_system == "Darwin"
+torch==2.8.0; platform_system == "Darwin"
 torch==2.8.0; platform_machine == "ppc64le" or platform_machine == "aarch64"
 
 # required for the image processor of minicpm-o-2_6, this must be updated alongside torch
diff --git a/requirements/cuda.txt b/requirements/cuda.txt
@@ -5,8 +5,8 @@ numba == 0.61.2 # Required for N-gram speculative decoding
 
 # Dependencies for NVIDIA GPUs
 ray[cgraph]>=2.48.0 # Ray Compiled Graph, required for pipeline parallelism in V1.
-torch==2.9.0
-torchaudio==2.9.0
+torch==2.8.0
+torchaudio==2.8.0
 # These must be updated alongside torch
 torchvision==0.24.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
 xformers==0.0.33.post1; platform_system == 'Linux' and platform_machine == 'x86_64'  # Requires PyTorch >= 2.9
diff --git a/requirements/rocm-build.txt b/requirements/rocm-build.txt
@@ -2,9 +2,9 @@
 -r common.txt
 
 --extra-index-url https://download.pytorch.org/whl/rocm6.4
-torch==2.9.0
+torch==2.8.0
 torchvision==0.24.0
-torchaudio==2.9.0
+torchaudio==2.8.0
 
 triton==3.5.0
 cmake>=3.26.1,<4
diff --git a/requirements/test.in b/requirements/test.in
@@ -24,8 +24,8 @@ soundfile # required for audio tests
 jiwer # required for audio tests
 tblib # for pickling test exceptions
 timm >=1.0.17 # required for internvl and gemma3n-mm test
-torch==2.9.0
-torchaudio==2.9.0
+torch==2.8.0
+torchaudio==2.8.0
 torchvision==0.24.0
 transformers_stream_generator # required for qwen-vl test
 matplotlib # required for qwen-vl test
diff --git a/requirements/test.txt b/requirements/test.txt
@@ -1123,7 +1123,7 @@ tomli==2.2.1
     # via schemathesis
 tomli-w==1.2.0
     # via schemathesis
-torch==2.9.0+cu129
+torch==2.8.0+cu126
     # via
     #   -r requirements/test.in
     #   accelerate
@@ -1152,7 +1152,7 @@ torch==2.9.0+cu129
     #   torchvision
     #   vector-quantize-pytorch
     #   vocos
-torchaudio==2.9.0+cu129
+torchaudio==2.8.0+cu126
     # via
     #   -r requirements/test.in
     #   encodec
diff --git a/requirements/windows.txt b/requirements/windows.txt
@@ -1,3 +1,3 @@
 winloop
-triton-windows==3.2.0.post19
-xformers==0.0.31.post1
+triton-windows==3.4.0.post20
+xformers==0.0.32.post2

Original file line number	Diff line number	Diff line change
`@@ -6,7 +6,7 @@ requires = [`
`6`	`6`	`"packaging>=24.2",`
`7`	`7`	`"setuptools>=77.0.3,<81.0.0",`
`8`	`8`	`"setuptools-scm>=8.0",`
`9`		`- "torch == 2.9.0",`
	`9`	`+ "torch == 2.8.0",`
`10`	`10`	`"wheel",`
`11`	`11`	`"jinja2",`
`12`	`12`	`]`