Skip to content

Commit a73f649

Browse files
Update launch_bounds_utils.h for correct compile on Multiple Cuda Arch - PTXAS out of range Warning (#25843)
Signed-off-by: Salvatore Cena <[email protected]> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
1 parent 001e50c commit a73f649

File tree

1 file changed

+29
-3
lines changed

1 file changed

+29
-3
lines changed

csrc/launch_bounds_utils.h

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,37 @@
88
#define VLLM_LAUNCH_BLOCKS_CAP 4
99
#endif
1010

11-
// compile-time estimate of max threads per SM for launch bounds.
11+
// Compile-time estimate of max threads per SM for launch bounds.
12+
// Families: 1024, 1536, 2048 threads/SM.
1213
#ifndef VLLM_MAX_THREADS_PER_SM
13-
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 300
14-
#define VLLM_MAX_THREADS_PER_SM 1536
14+
#ifdef __CUDA_ARCH__
15+
16+
/* 1024 thr/SM: Turing (sm_75) */
17+
#if (__CUDA_ARCH__ == 750)
18+
#define VLLM_MAX_THREADS_PER_SM 1024
19+
20+
/* 1536 thr/SM: Ampere GA10x (sm_86/87), Ada (sm_89),
21+
GB20x consumer (sm_120/121), Thor (sm_101 or sm_110) */
22+
#elif (__CUDA_ARCH__ == 860) || (__CUDA_ARCH__ == 870) || \
23+
(__CUDA_ARCH__ == 890) || (__CUDA_ARCH__ == 1010) || \
24+
(__CUDA_ARCH__ == 1100) || (__CUDA_ARCH__ == 1200) || \
25+
(__CUDA_ARCH__ == 1210)
26+
#define VLLM_MAX_THREADS_PER_SM 1536
27+
28+
/* 2048 thr/SM: Volta (sm_70/72), Ampere GA100 (sm_80),
29+
Hopper (sm_90), Blackwell (sm_100/103) */
30+
#elif (__CUDA_ARCH__ == 700) || (__CUDA_ARCH__ == 720) || \
31+
(__CUDA_ARCH__ == 800) || (__CUDA_ARCH__ == 900) || \
32+
(__CUDA_ARCH__ == 1000) || (__CUDA_ARCH__ == 1030)
33+
#define VLLM_MAX_THREADS_PER_SM 2048
34+
35+
/* Fallback: use 2048 for unknown future CCs */
36+
#else
37+
#define VLLM_MAX_THREADS_PER_SM 2048
38+
#endif
39+
1540
#else
41+
/* Host pass (no __CUDA_ARCH__): neutral default */
1642
#define VLLM_MAX_THREADS_PER_SM 2048
1743
#endif
1844
#endif

0 commit comments

Comments
 (0)