Skip to content

Commit 3779bc9

Browse files
committed
Merge branch 'user/yibinl/fix_build' into 'release/1.1.1-25.12-NIM'
Chery-pick: [https://nvbugs/5655885][fix] fix invalid instruction error in 2shot ar kernel on Ampere (#9394) See merge request ftp/tekit!9896 Signed-off-by: Yibin Li <yibinl@nvidia.com>
2 parents d4ec9ef + 22b2ca9 commit 3779bc9

File tree

2 files changed

+11
-5
lines changed

2 files changed

+11
-5
lines changed

cpp/tensorrt_llm/kernels/communicationKernels/allReduceFusionKernels.cu

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,11 +134,17 @@ public:
134134
// corresponding CTA has not been launched.
135135
for (int flag_idx = blockIdx.x; flag_idx < kBarrierFlagCount; flag_idx += gridDim.x)
136136
{
137+
#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 900))
137138
asm volatile(
138139
"st.global.relaxed.sys.b32 [%1], %0;" ::"r"(m_flag_value), "l"(m_target_flag + flag_idx * NRanks));
140+
#else
141+
st_flag(m_target_flag + flag_idx * NRanks, m_flag_value);
142+
#endif
139143
}
144+
#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 900))
140145
// Single release fence
141146
asm volatile("fence.release.sys;");
147+
#endif
142148

143149
while (ld_flag(m_current_flag) == prev_flag(m_flag_value))
144150
{

jenkins/Build.groovy

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -62,22 +62,22 @@ def BUILD_CONFIGS = [
6262
(CONFIG_LINUX_X86_64_VANILLA) : [
6363
(WHEEL_EXTRA_ARGS) : "--extra-cmake-vars ENABLE_MULTI_DEVICE=1 --extra-cmake-vars WARNING_IS_ERROR=ON --extra-cmake-vars NIXL_ROOT=/opt/nvidia/nvda_nixl --micro_benchmarks",
6464
(TARNAME) : "TensorRT-LLM.tar.gz",
65-
(WHEEL_ARCHS): "90-real;100-real;103-real;120-real",
65+
(WHEEL_ARCHS): "80-real;86-real;89-real;90-real;100-real;103-real;120-real",
6666
],
6767
(CONFIG_LINUX_X86_64_PYBIND) : [
6868
(WHEEL_EXTRA_ARGS) : "--binding_type pybind --extra-cmake-vars ENABLE_MULTI_DEVICE=1 --extra-cmake-vars WARNING_IS_ERROR=ON --extra-cmake-vars NIXL_ROOT=/opt/nvidia/nvda_nixl --micro_benchmarks",
6969
(TARNAME) : "pybind-TensorRT-LLM.tar.gz",
70-
(WHEEL_ARCHS): "90-real;100-real;103-real;120-real",
70+
(WHEEL_ARCHS): "80-real;86-real;89-real;90-real;100-real;103-real;120-real",
7171
],
7272
(CONFIG_LINUX_X86_64_SINGLE_DEVICE) : [
7373
(WHEEL_EXTRA_ARGS) : "--extra-cmake-vars ENABLE_MULTI_DEVICE=0 --extra-cmake-vars WARNING_IS_ERROR=ON --extra-cmake-vars ENABLE_UCX=0 --micro_benchmarks",
7474
(TARNAME) : "single-device-TensorRT-LLM.tar.gz",
75-
(WHEEL_ARCHS): "90-real;100-real;103-real;120-real",
75+
(WHEEL_ARCHS): "80-real;86-real;89-real;90-real;100-real;103-real;120-real",
7676
],
7777
(CONFIG_LINUX_X86_64_LLVM) : [
7878
(WHEEL_EXTRA_ARGS) : "--extra-cmake-vars ENABLE_MULTI_DEVICE=1 --extra-cmake-vars WARNING_IS_ERROR=ON --micro_benchmarks -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_CUDA_HOST_COMPILER=clang -DCMAKE_LINKER_TYPE=LLD",
7979
(TARNAME) : "llvm-TensorRT-LLM.tar.gz",
80-
(WHEEL_ARCHS): "90-real;100-real;103-real;120-real",
80+
(WHEEL_ARCHS): "80-real;86-real;89-real;90-real;100-real;103-real;120-real",
8181
],
8282
(CONFIG_LINUX_AARCH64): [
8383
(WHEEL_EXTRA_ARGS) : "--extra-cmake-vars WARNING_IS_ERROR=ON --extra-cmake-vars NIXL_ROOT=/opt/nvidia/nvda_nixl",
@@ -504,7 +504,7 @@ def buildWheelInContainer(pipeline, libraries=[], triple=X86_64_TRIPLE, clean=fa
504504
if (triple == AARCH64_TRIPLE) {
505505
extra_args = "-a '90-real;100-real;103-real;120-real'"
506506
} else {
507-
extra_args = "-a '90-real;100-real;103-real;120-real'"
507+
extra_args = "-a '80-real;86-real;89-real;90-real;100-real;103-real;120-real'"
508508
}
509509
}
510510
if (pre_cxx11abi) {

0 commit comments

Comments
 (0)