File tree Expand file tree Collapse file tree 1 file changed +6
-0
lines changed
cpp/tensorrt_llm/kernels/communicationKernels Expand file tree Collapse file tree 1 file changed +6
-0
lines changed Original file line number Diff line number Diff line change @@ -137,11 +137,17 @@ public:
137137 // corresponding CTA has not been launched.
138138 for (int flag_idx = blockIdx .x ; flag_idx < kBarrierFlagCount ; flag_idx += gridDim .x )
139139 {
140+ #if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 900))
140141 asm volatile (
141142 " st.global.relaxed.sys.b32 [%1], %0;" ::" r" (m_flag_value), " l" (m_target_flag + flag_idx * NRanks));
143+ #else
144+ st_flag (m_target_flag + flag_idx * NRanks, m_flag_value);
145+ #endif
142146 }
147+ #if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 900))
143148 // Single release fence
144149 asm volatile (" fence.release.sys;" );
150+ #endif
145151
146152 while (ld_flag (m_current_flag) == prev_flag (m_flag_value))
147153 {
You can’t perform that action at this time.
0 commit comments