Skip to content

Commit 46dac16

Browse files
committed
Fixes #769: Applying a bunch of code style, clarity, dis-ambiguation and similar suggestions from a CLion IDE code inspection:
* Using rethrow expressions (`throw;`) instead of throwing the caught exception object (`throw ex;`). * Made more constant variables `constexpr` * Replace post-increment with pre-increment * Removed a redundant `typename` * Removed some redundant casts * Removed some redundant parentheses * Spacing tweaks * Made some memory allocation functions and a `detail_::get_context_for()` function take more `const` parameters * Removed unnecessary inclusions (and replaced them with forward-declarations or more limited inclusions) * Removed redundant `inline` specifications (for function templates) * Removed redundant `const` specifier for parameters passed by value * Example programs code: * Common examples utility: * Added explanation of possible demangling failure status * Made the demangling function a little more self-documenting (saving a couple of comments), and avoided using 0 for `nullptr` while at it * Avoiding C-style cast or functional-style enum construction to better express intent (and not confuse static analyzers) * Made some more function parameters `const` * Avoiding warnings about unused variables (+ spacing tweaks) * Made some more function parameters `const` (which NVIDIA should have, actually) * `SimpleCudaGraphs` example program: Switching a `reinterpret_cast` to a more appropriate `static_cast` * Module management example program: Avoiding variable shadowing (although it's not harmful)
1 parent c0aee06 commit 46dac16

27 files changed

+113
-97
lines changed

examples/by_api_module/context_management.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ void test_context(
7272
auto printf_fifo_size = context.get_limit(CU_LIMIT_PRINTF_FIFO_SIZE);
7373
std::cout << "The printf FIFO size for context " << context << " is " << printf_fifo_size << ".\n";
7474
decltype(printf_fifo_size) new_printf_fifo_size =
75-
(printf_fifo_size <= 1024) ? 2 * printf_fifo_size : printf_fifo_size - 512;
75+
(printf_fifo_size <= 1024) ? 2 * printf_fifo_size : printf_fifo_size - 512;
7676
context.set_limit(CU_LIMIT_PRINTF_FIFO_SIZE, new_printf_fifo_size);
7777
printf_fifo_size = context.get_limit(CU_LIMIT_PRINTF_FIFO_SIZE);
7878
assert_(printf_fifo_size == new_printf_fifo_size);
@@ -91,9 +91,9 @@ void test_context(
9191
}
9292

9393
void current_context_manipulation(
94-
cuda::device_t &device,
95-
cuda::device::primary_context_t &pc,
96-
cuda::context_t &created_context)
94+
const cuda::device_t &device,
95+
const cuda::device::primary_context_t &pc,
96+
const cuda::context_t &created_context)
9797
{
9898
cuda::context_t context_0 = pc;
9999
cuda::context_t context_1 = created_context;
@@ -104,13 +104,13 @@ void current_context_manipulation(
104104
assert_(cuda::context::current::get() == context_1);
105105
assert_(cuda::context::current::detail_::get_handle() == context_1.handle());
106106

107-
108107
auto context_2 = cuda::context::create(device);
109108
{
110109
cuda::context::current::scoped_override_t context_for_this_block { context_2 };
111110
assert_(context_2.handle() == cuda::context::current::get().handle());
112111
assert_(context_2 == cuda::context::current::get());
113112
}
113+
(void) context_2; // We want it in existence outside the inner scope
114114
auto gotten = cuda::context::current::get();
115115
assert_(gotten == context_1);
116116

examples/by_api_module/execution_control.cu

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ int main(int argc, char **argv)
115115
launch_config_4 = std::move(launch_config_3);
116116
cuda::launch_configuration_t launch_config_5{std::move(launch_config_2)};
117117
(void) launch_config_4;
118+
(void) launch_config_5;
118119
// In case the `[[maybe_unused]]` attribute and the void-casting is ignored,
119120
// let's try to trick the compiler
120121
// into thinking we're actually using launch_config_4.

examples/by_api_module/module_management.cpp

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,8 @@ std::string make_instantiation_name(string_view base_name, Ts&&... args)
8484
}
8585

8686
void handle_compilation_failure(
87-
const cuda::rtc::compilation_output_t<cuda::cuda_cpp>& compilation_output,
88-
cuda::rtc::compilation_options_t<cuda::cuda_cpp> compilation_options = {})
87+
const cuda::rtc::compilation_output_t<cuda::cuda_cpp> & compilation_output,
88+
const cuda::rtc::compilation_options_t<cuda::cuda_cpp> & compilation_options = {})
8989
{
9090
std::cerr << "Program compilation failed:\n";
9191
auto compilation_log = compilation_output.log();
@@ -108,16 +108,16 @@ get_compiled_program(const cuda::device_t &device)
108108
__constant__ int a;
109109
110110
__global__
111-
void my_kernel1(float const* indata, float* outdata) {
112-
outdata[0] = indata[0] + 1;
113-
outdata[0] -= 1;
111+
void my_kernel1(float const* in_data, float* out_data) {
112+
out_data[0] = indata[0] + 1;
113+
out_data[0] -= 1;
114114
}
115115
116116
template<int C, typename T>
117117
__global__
118-
void my_kernel2(float const* indata, float* outdata) {
118+
void my_kernel2(float const* in_data, float* out_data) {
119119
for( int i=0; i<C; ++i ) {
120-
outdata[0] =-indata[0];
120+
out_data[0] =-in_data[0];
121121
}
122122
};
123123
@@ -165,10 +165,10 @@ bool basic_module_tests(
165165
module_kernels = std::move(module_kernels_);
166166
#endif
167167

168-
test_result = test_result and (module.device_id() == device.id());
169-
test_result = test_result and (module.device() == device);
170-
test_result = test_result and (module.context() == device.primary_context(cuda::do_not_hold_primary_context_refcount_unit));
171-
test_result = test_result and (module.context_handle() == cuda::device::primary_context::detail_::get_handle(device.id()));
168+
test_result = test_result and module.device_id() == device.id();
169+
test_result = test_result and module.device() == device;
170+
test_result = test_result and module.context() == device.primary_context(cuda::do_not_hold_primary_context_refcount_unit);
171+
test_result = test_result and module.context_handle() == cuda::device::primary_context::detail_::get_handle(device.id());
172172

173173
{
174174
auto a = module.get_global_region(compilation_result.get_mangling_of(constant_name));
@@ -181,9 +181,9 @@ bool basic_module_tests(
181181
auto my_kernel2 = module.get_kernel(mangled_kernel_names[1]);
182182

183183
auto list_kernel =
184-
[](const char * title, const char * mangled_name, cuda::optional<const char*> unmangled) {
184+
[](const char * title_, const char * mangled_name, cuda::optional<const char*> unmangled) {
185185
std::cout
186-
<< title << ":\n"
186+
<< title_ << ":\n"
187187
<< " unmangled: " << unmangled.value_or("N/A") << '\n'
188188
<< " mangled: " << mangled_name << "\n"
189189
#if __GNUC__

examples/by_api_module/unified_addressing.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,7 @@ void pointer_properties(const cuda::device_t& device)
5858
(void) host_ptr; // Some compilers don't respect [[maybe_unused]] :-(
5959
die_("Was expecting the host_ptr() method to fail for a device-side pointer");
6060
} catch(cuda::runtime_error& e) {
61-
if (e.code() != cuda::status::named_t::invalid_value) {
62-
throw e;
63-
}
61+
if (e.code() != cuda::status::named_t::invalid_value) { throw; }
6462
}
6563
auto ptr_reported_as_managed = cuda::memory::pointer::detail_::get_attribute<CU_POINTER_ATTRIBUTE_IS_MANAGED>(raw_pointers[i]);
6664
assert_(ptr_reported_as_managed == 0);
@@ -140,7 +138,7 @@ void wrapped_pointers_and_regions(const cuda::device_t& device)
140138
<< ptr.get() << ", " << " host-side pointer: " << host_side_ptr;
141139
}
142140
catch(cuda::runtime_error& e) {
143-
if (e.code() != cuda::status::invalid_value) { throw e; }
141+
if (e.code() != cuda::status::invalid_value) { throw; }
144142
}
145143
}
146144

examples/common.hpp

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ bool your_type_was_() { return true; }
4040

4141
inline const char* ordinal_suffix(int n)
4242
{
43-
static const char suffixes [4][5] = {"th", "st", "nd", "rd"};
43+
static constexpr char suffixes [4][5] = {"th", "st", "nd", "rd"};
4444
auto ord = n % 100;
4545
if (ord / 10 == 1) { ord = 0; }
4646
ord = ord % 10;
@@ -95,12 +95,12 @@ std::ostream& operator<<(std::ostream& os, cuda::device::compute_capability_t cc
9595

9696
std::ostream& operator<<(std::ostream& os, cuda::multiprocessor_cache_preference_t pref)
9797
{
98-
return (os << cache_preference_name(pref));
98+
return os << cache_preference_name(pref);
9999
}
100100

101101
std::ostream& operator<<(std::ostream& os, cuda::context::host_thread_sync_scheduling_policy_t pref)
102102
{
103-
return (os << host_thread_sync_scheduling_policy_name(pref));
103+
return os << host_thread_sync_scheduling_policy_name(pref);
104104
}
105105

106106
std::ostream& operator<<(std::ostream& os, cuda::context::handle_t handle)
@@ -195,7 +195,7 @@ void print_context_stack()
195195
}
196196
std::cout << '\n';
197197
}
198-
for (auto it = contexts.rbegin(); it != contexts.rend(); it++) {
198+
for (auto it = contexts.rbegin(); it != contexts.rend(); ++it) {
199199
cuda::context::current::detail_::push(*it);
200200
}
201201
}
@@ -359,14 +359,33 @@ cuda::device::id_t choose_device(int argc, char ** argv)
359359
}
360360

361361
#ifdef __GNUC__
362+
363+
inline char const* describe_demangling_status(int status)
364+
{
365+
switch (status) {
366+
case 0: return "success";
367+
case 1: return "A memory allocation failure occurred";
368+
case 2: return "mangled_name is not a valid name under the C++ ABI mangling rules";
369+
case 3: return "One of the arguments is invalid";
370+
default: return "Unknown demangling status";
371+
}
372+
}
373+
362374
// Inefficient, but simple
363375
inline std::string demangle(const char *mangled_name)
364376
{
365377
if (mangled_name == nullptr) { return nullptr; }
366378
int status;
367-
char *raw_demangled = abi::__cxa_demangle(mangled_name, 0 /* output buffer */, 0 /* length */, &status);
379+
auto no_preallocated_output_buffer = nullptr;
380+
auto dont_return_mangled_length = nullptr;
381+
char *raw_demangled = abi::__cxa_demangle(
382+
mangled_name,
383+
no_preallocated_output_buffer,
384+
dont_return_mangled_length,
385+
&status);
368386
if (raw_demangled == nullptr) {
369-
throw std::runtime_error(std::string("Failed demangling \"") + mangled_name + '\"');
387+
throw std::runtime_error(std::string("Failed demangling \"") + mangled_name + "\": "
388+
+ describe_demangling_status(status));
370389
}
371390
std::string result { raw_demangled };
372391
free(raw_demangled);

examples/modified_cuda_samples/memMapIPCDrv/helper_multiprocess.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ int ipcOpenSocket(ipcHandle *&handle) {
217217
return 0;
218218
}
219219

220-
int ipcCloseSocket(ipcHandle *handle) {
220+
int ipcCloseSocket(const ipcHandle *handle) {
221221
if (!handle) {
222222
return -1;
223223
}
@@ -498,7 +498,7 @@ int ipcRecvShareableHandles(ipcHandle *handle,
498498
return 0;
499499
}
500500

501-
int ipcCloseSocket(ipcHandle *handle) {
501+
int ipcCloseSocket(const ipcHandle *handle) {
502502
for (int i = 0; i < handle->hMailslot.size(); i++) {
503503
CloseHandle(handle->hMailslot[i]);
504504
}

examples/modified_cuda_samples/memMapIPCDrv/helper_multiprocess.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ int
106106
ipcOpenSocket(ipcHandle *&handle);
107107

108108
int
109-
ipcCloseSocket(ipcHandle *handle);
109+
ipcCloseSocket(const ipcHandle *handle);
110110

111111
int
112112
ipcRecvShareableHandles(ipcHandle *handle, std::vector<shared_allocation_handle_t>& shareableHandles);

examples/modified_cuda_samples/simpleCudaGraphs/simpleCudaGraphs.cu

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ namespace cg = cooperative_groups;
4444
#define THREADS_PER_BLOCK 512
4545
#define GRAPH_LAUNCH_ITERATIONS 3
4646

47-
__global__ void reduce(float *inputVec, double *outputVec, size_t inputSize, size_t outputSize)
47+
__global__ void reduce(const float *inputVec, double *outputVec, size_t inputSize, size_t outputSize)
4848
{
4949
__shared__ double tmp[THREADS_PER_BLOCK];
5050

@@ -53,7 +53,7 @@ __global__ void reduce(float *inputVec, double *outputVec, size_t inputSize, siz
5353

5454
double temp_sum = 0.0;
5555
for (int i = globaltid; i < inputSize; i += gridDim.x * blockDim.x) {
56-
temp_sum += (double) inputVec[i];
56+
temp_sum += static_cast<double>(inputVec[i]);
5757
}
5858
tmp[cta.thread_rank()] = temp_sum;
5959

@@ -132,7 +132,7 @@ __global__ void reduceFinal(double *inputVec, double *result, size_t inputSize)
132132
}
133133

134134
void init_input(cuda::span<float> a) {
135-
auto generator = []() { return static_cast<float>(rand() & 0xFF) / (float)RAND_MAX; };
135+
auto generator = []() { return static_cast<float>(rand() & 0xFF) / static_cast<float>(RAND_MAX); };
136136
::std::generate_n(a.data(), a.size(), generator);
137137
}
138138

@@ -144,7 +144,7 @@ void myRealHostNodeCallback(char const *graph_construction_mode, double result)
144144

145145
void CUDART_CB myHostNodeCallback(void *type_erased_data)
146146
{
147-
auto *data = reinterpret_cast<std::pair<const char*, double*>*>(type_erased_data);
147+
auto *data = static_cast<std::pair<const char*, double*>*>(type_erased_data);
148148
auto graph_construction_mode = data->first;
149149
auto result = data->second;
150150
myRealHostNodeCallback(graph_construction_mode, *result);

examples/modified_cuda_samples/streamOrderedAllocationIPC/helper_multiprocess.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ int ipcOpenSocket(ipcHandle *&handle) {
217217
return 0;
218218
}
219219

220-
int ipcCloseSocket(ipcHandle *handle) {
220+
int ipcCloseSocket(const ipcHandle *handle) {
221221
if (!handle) {
222222
return -1;
223223
}
@@ -466,7 +466,7 @@ int ipcRecvShareableHandles(ipcHandle *handle,
466466
return 0;
467467
}
468468

469-
int ipcCloseSocket(ipcHandle *handle) {
469+
int ipcCloseSocket(const ipcHandle *handle) {
470470
for (int i = 0; i < handle->hMailslot.size(); i++) {
471471
CloseHandle(handle->hMailslot[i]);
472472
}

examples/modified_cuda_samples/streamOrderedAllocationIPC/helper_multiprocess.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ int
103103
ipcOpenSocket(ipcHandle *&handle);
104104

105105
int
106-
ipcCloseSocket(ipcHandle *handle);
106+
ipcCloseSocket(const ipcHandle *handle);
107107

108108
int
109109
ipcRecvShareableHandles(ipcHandle *handle, std::vector<shared_pool_handle_t>& shareable_pool_handles);

0 commit comments

Comments
 (0)