@@ -87,6 +87,7 @@ ncclDataType_t getNcclDataType(at::ScalarType type) {
8787
8888bool complexViewAsRealAllowed (const ReduceOp& reduceOp) {
8989 switch (reduceOp) {
90+ // NOLINTNEXTLINE(bugprone-branch-clone)
9091 case ReduceOp::SUM:
9192 return true ;
9293 case ReduceOp::AVG:
@@ -119,6 +120,7 @@ ncclRedOpRAII unpackPreMulSum(
119120 &preMulSum,
120121 // https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/api/ops.html#ncclredopcreatepremulsum
121122 // tells us that the scalar input is strictly a multiplier.
123+ // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
122124 /* scalar=*/ has_tensor ? const_cast <T*>(ptr_factor) : &scalar_factor,
123125 dataType,
124126 residence,
@@ -318,6 +320,7 @@ static void cacheAllocatorRegisterHook(
318320 auto & ncclComm = it.first ;
319321 auto & devIdx = it.second ;
320322 if (te.device_ == devIdx) {
323+ // NOLINTNEXTLINE(performance-no-int-to-ptr)
321324 ncclComm->registerSegment (reinterpret_cast <void *>(te.addr_ ), te.size_ );
322325 }
323326 }
@@ -336,6 +339,7 @@ static void cacheAllocatorDeregisterHook(
336339 auto & ncclComm = it.first ;
337340 auto & devIdx = it.second ;
338341 if (te.device_ == devIdx) {
342+ // NOLINTNEXTLINE(performance-no-int-to-ptr)
339343 ncclComm->deregisterSegment (reinterpret_cast <void *>(te.addr_ ));
340344 }
341345 }
@@ -869,7 +873,6 @@ ProcessGroupNCCL::ProcessGroupNCCL(
869873 : Backend(rank, size),
870874 store_(std::move(store)),
871875 options_(std::move(options)),
872-
873876 traceKeyStart_(getTraceStartKey(" NCCL" , rank)),
874877 traceKeyEnd_(getTraceEndKey(" NCCL" , rank)),
875878 terminateProcessGroup_(false ),
@@ -888,7 +891,7 @@ ProcessGroupNCCL::ProcessGroupNCCL(
888891 // other threads and cause segfaults.
889892 const auto ncclVersion = getNcclVersion ();
890893 this ->setGroupUid (options_->group_name );
891- this ->localDeviceCount_ = at::cuda::getNumGPUs ();
894+ this ->localDeviceCount_ = static_cast < int >( at::cuda::getNumGPUs () );
892895 logPrefix_ = createLogPrefix ();
893896 blockingWait_ = getCvarBool (TORCH_NCCL_BLOCKING_WAIT, false );
894897 asyncErrorHandling_ = static_cast <ErrorHandlingMode>(
@@ -1013,8 +1016,8 @@ ProcessGroupNCCL::ProcessGroupNCCL(
10131016 this ->globalRankStride = 0 ;
10141017 } else {
10151018 bool ranksAreStrided = true ;
1016- int startRank = options_->global_ranks_in_group [0 ];
1017- int stride =
1019+ auto startRank = options_->global_ranks_in_group [0 ];
1020+ auto stride =
10181021 options_->global_ranks_in_group [1 ] - options_->global_ranks_in_group [0 ];
10191022 for (std::vector<uint64_t >::size_type i = 0 ;
10201023 i < options_->global_ranks_in_group .size ();
@@ -1377,6 +1380,7 @@ void ProcessGroupNCCL::shutdown() {
13771380 this ->abort ();
13781381}
13791382
1383+ // NOLINTNEXTLINE(bugprone-exception-escape)
13801384ProcessGroupNCCL::~ProcessGroupNCCL () {
13811385 LOG (INFO) << logPrefix () << " ProcessGroupNCCL destructor entered." ;
13821386
0 commit comments