Skip to content

Commit f9ae3fa

Browse files
cyyeverpytorchmergebot
authored andcommitted
[Distributed] [19/N] Fix clang-tidy warnings in torch/csrc/distributed/ (pytorch#138903)
Fixes #ISSUE_NUMBER Pull Request resolved: pytorch#138903 Approved by: https://github.com/ezyang
1 parent 39aa3cb commit f9ae3fa

16 files changed

+43
-34
lines changed

torch/csrc/distributed/c10d/CUDASymmetricMemory.hpp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@
44
#include <torch/csrc/distributed/c10d/Store.hpp>
55
#include <torch/csrc/distributed/c10d/SymmetricMemory.hpp>
66

7-
namespace c10d {
8-
namespace symmetric_memory {
7+
namespace c10d::symmetric_memory {
98

109
#if !defined(USE_ROCM) && defined(PYTORCH_C10_DRIVER_API_SUPPORTED)
1110
using HandleType = CUmemGenericAllocationHandle;
@@ -85,13 +84,13 @@ struct Block : public c10::intrusive_ptr_target {
8584
size_t block_size,
8685
size_t buffer_size,
8786
size_t signal_pad_offset,
88-
const std::string& group_name)
87+
std::string group_name)
8988
: handle(handle),
9089
device_idx(device_idx),
9190
block_size(block_size),
9291
buffer_size(buffer_size),
9392
signal_pad_offset(signal_pad_offset),
94-
group_name(group_name),
93+
group_name(std::move(group_name)),
9594
symm_mem(nullptr) {}
9695
};
9796

@@ -113,5 +112,4 @@ class CUDASymmetricMemoryAllocator : public SymmetricMemoryAllocator {
113112
std::unordered_map<void*, c10::intrusive_ptr<Block>> ptr_to_block_;
114113
};
115114

116-
} // namespace symmetric_memory
117-
} // namespace c10d
115+
} // namespace c10d::symmetric_memory

torch/csrc/distributed/c10d/DMAConnectivity.hpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
#pragma once
22

3-
#include <optional>
4-
53
#include <ATen/ATen.h>
64

75
namespace c10d {
@@ -25,7 +23,7 @@ struct TORCH_API DMAConnectivity : c10::intrusive_ptr_target {
2523

2624
struct DMAConnectivityDetector : c10::intrusive_ptr_target {
2725
virtual c10::intrusive_ptr<DMAConnectivity> detect() = 0;
28-
virtual ~DMAConnectivityDetector() {}
26+
~DMAConnectivityDetector() override = default;
2927
};
3028

3129
C10_EXPORT void register_dma_connectivity_detector(

torch/csrc/distributed/c10d/GroupRegistry.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ class GroupRegistry {
1111
public:
1212
void register_group(
1313
std::string group_name,
14+
// NOLINTNEXTLINE(performance-unnecessary-value-param)
1415
c10::intrusive_ptr<c10d::ProcessGroup> group) {
1516
std::unique_lock write_lock(lock_);
1617
auto [_, inserted] =

torch/csrc/distributed/c10d/NCCLUtils.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -370,7 +370,7 @@ class NCCLComm {
370370
NCCLComm& operator=(NCCLComm&& other) = delete;
371371

372372
// Move constructable
373-
// NOLINTNEXTLINE(.*-noexcept-move-.*)
373+
// NOLINTNEXTLINE(*-noexcept-move-*)
374374
NCCLComm(NCCLComm&& other) {
375375
// Using other's lock, as it reads other's states
376376
// Can not use this.mutex_, as this object is being constructed.

torch/csrc/distributed/c10d/ProcessGroupGloo.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <deque>
77
#include <mutex>
88
#include <thread>
9+
#include <utility>
910
#include <vector>
1011

1112
#include <gloo/algorithm.h>
@@ -91,7 +92,8 @@ class TORCH_API ProcessGroupGloo : public Backend {
9192
// Wrap c10d store as Gloo store
9293
class TORCH_API GlooStore : public ::gloo::rendezvous::Store {
9394
public:
94-
GlooStore(const c10::intrusive_ptr<::c10d::Store>& store) : store_(store) {}
95+
GlooStore(c10::intrusive_ptr<::c10d::Store> store)
96+
: store_(std::move(store)) {}
9597

9698
void setUint(const std::string& key, const std::vector<uint8_t>& value) {
9799
store_->set(key, value);

torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ ncclDataType_t getNcclDataType(at::ScalarType type) {
8787

8888
bool complexViewAsRealAllowed(const ReduceOp& reduceOp) {
8989
switch (reduceOp) {
90+
// NOLINTNEXTLINE(bugprone-branch-clone)
9091
case ReduceOp::SUM:
9192
return true;
9293
case ReduceOp::AVG:
@@ -119,6 +120,7 @@ ncclRedOpRAII unpackPreMulSum(
119120
&preMulSum,
120121
// https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/api/ops.html#ncclredopcreatepremulsum
121122
// tells us that the scalar input is strictly a multiplier.
123+
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
122124
/*scalar=*/has_tensor ? const_cast<T*>(ptr_factor) : &scalar_factor,
123125
dataType,
124126
residence,
@@ -318,6 +320,7 @@ static void cacheAllocatorRegisterHook(
318320
auto& ncclComm = it.first;
319321
auto& devIdx = it.second;
320322
if (te.device_ == devIdx) {
323+
// NOLINTNEXTLINE(performance-no-int-to-ptr)
321324
ncclComm->registerSegment(reinterpret_cast<void*>(te.addr_), te.size_);
322325
}
323326
}
@@ -336,6 +339,7 @@ static void cacheAllocatorDeregisterHook(
336339
auto& ncclComm = it.first;
337340
auto& devIdx = it.second;
338341
if (te.device_ == devIdx) {
342+
// NOLINTNEXTLINE(performance-no-int-to-ptr)
339343
ncclComm->deregisterSegment(reinterpret_cast<void*>(te.addr_));
340344
}
341345
}
@@ -869,7 +873,6 @@ ProcessGroupNCCL::ProcessGroupNCCL(
869873
: Backend(rank, size),
870874
store_(std::move(store)),
871875
options_(std::move(options)),
872-
873876
traceKeyStart_(getTraceStartKey("NCCL", rank)),
874877
traceKeyEnd_(getTraceEndKey("NCCL", rank)),
875878
terminateProcessGroup_(false),
@@ -888,7 +891,7 @@ ProcessGroupNCCL::ProcessGroupNCCL(
888891
// other threads and cause segfaults.
889892
const auto ncclVersion = getNcclVersion();
890893
this->setGroupUid(options_->group_name);
891-
this->localDeviceCount_ = at::cuda::getNumGPUs();
894+
this->localDeviceCount_ = static_cast<int>(at::cuda::getNumGPUs());
892895
logPrefix_ = createLogPrefix();
893896
blockingWait_ = getCvarBool(TORCH_NCCL_BLOCKING_WAIT, false);
894897
asyncErrorHandling_ = static_cast<ErrorHandlingMode>(
@@ -1013,8 +1016,8 @@ ProcessGroupNCCL::ProcessGroupNCCL(
10131016
this->globalRankStride = 0;
10141017
} else {
10151018
bool ranksAreStrided = true;
1016-
int startRank = options_->global_ranks_in_group[0];
1017-
int stride =
1019+
auto startRank = options_->global_ranks_in_group[0];
1020+
auto stride =
10181021
options_->global_ranks_in_group[1] - options_->global_ranks_in_group[0];
10191022
for (std::vector<uint64_t>::size_type i = 0;
10201023
i < options_->global_ranks_in_group.size();
@@ -1377,6 +1380,7 @@ void ProcessGroupNCCL::shutdown() {
13771380
this->abort();
13781381
}
13791382

1383+
// NOLINTNEXTLINE(bugprone-exception-escape)
13801384
ProcessGroupNCCL::~ProcessGroupNCCL() {
13811385
LOG(INFO) << logPrefix() << "ProcessGroupNCCL destructor entered.";
13821386

torch/csrc/distributed/c10d/ProcessGroupNCCL.hpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,8 @@ struct DumpPipe {
198198
if (fd_ == -1) {
199199
return false;
200200
}
201-
char buf[128];
201+
// NOLINTNEXTLINE(*array*)
202+
char buf[128]{};
202203
// non-blocking from O_NONBLOCK above.
203204
// Ignore EINTR because we already will poll this
204205
// again later.
@@ -461,8 +462,8 @@ class TORCH_API ProcessGroupNCCL : public Backend {
461462
// NOTE: We intentionally store raw pointers so that
462463
// we do not attempt to destroy the event objects on process exit,
463464
// because cuda may be gone.
464-
std::deque<at::cuda::CUDAEvent*>
465-
eventsArray_[2]; // 0 for timing=false, 1 for timing=true
465+
std::array<std::deque<at::cuda::CUDAEvent*>, 2>
466+
eventsArray_; // 0 for timing=false, 1 for timing=true
466467
};
467468

468469
struct Options : Backend::Options {
@@ -1181,7 +1182,7 @@ class TORCH_API ProcessGroupNCCL : public Backend {
11811182
bool dumpOnTimeoutOrEx_;
11821183

11831184
// Whether or not to sleep after an exception is thrown in the watchdog.
1184-
bool sleepAfterException_;
1185+
bool sleepAfterException_{};
11851186

11861187
// Whether or not to enable nan check for input tensors to collectives.
11871188
bool enableNanCheck_;

torch/csrc/distributed/c10d/PyProcessGroup.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ class TORCH_PYTHON_API PythonOnCompletionHook {
212212
PythonOnCompletionHook(py::object hook) : hook_(std::move(hook)) {}
213213
PythonOnCompletionHook(const PythonOnCompletionHook&) = default;
214214

215+
// NOLINTNEXTLINE(bugprone-exception-escape)
215216
~PythonOnCompletionHook() {
216217
py::gil_scoped_acquire ag;
217218
hook_.dec_ref();

torch/csrc/distributed/c10d/RankLocal.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ class RankLocal {
5555
}
5656

5757
private:
58-
RankLocal(){};
58+
RankLocal() = default;
5959
thread_local static T* cached_;
6060
static std::unordered_map<uint64_t, T> thread_id_to_rank_local_;
6161
static std::shared_mutex lock_;

torch/csrc/distributed/c10d/Store.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ class TORCH_API Store : public torch::CustomClassHolder {
7575
// watchKey() is deprecated and no longer supported.
7676
virtual void watchKey(
7777
const std::string& /* unused */,
78+
// NOLINTNEXTLINE(performance-unnecessary-value-param)
7879
WatchKeyCallback /* unused */) {
7980
TORCH_CHECK(false, "watchKey is deprecated, no implementation support it.");
8081
}

0 commit comments

Comments
 (0)