@@ -79,6 +79,12 @@ DEFINE_string(selected_gpus, "",
79
79
namespace paddle {
80
80
namespace platform {
81
81
82
+ inline std::string CudaErrorWebsite () {
83
+ return " Please see detail in https://docs.nvidia.com/cuda/cuda-runtime-api"
84
+ " /group__CUDART__TYPES.html#group__CUDART__TYPES_1g3f51e3575c217824"
85
+ " 6db0a94a430e0038" ;
86
+ }
87
+
82
88
static int GetCUDADeviceCountImpl () {
83
89
const auto *cuda_visible_devices = std::getenv (" CUDA_VISIBLE_DEVICES" );
84
90
if (cuda_visible_devices != nullptr ) {
@@ -92,9 +98,12 @@ static int GetCUDADeviceCountImpl() {
92
98
}
93
99
94
100
int count;
101
+ auto error_code = cudaGetDeviceCount (&count);
95
102
PADDLE_ENFORCE (
96
- cudaGetDeviceCount (&count),
97
- " cudaGetDeviceCount failed in paddle::platform::GetCUDADeviceCount" );
103
+ error_code,
104
+ " cudaGetDeviceCount failed in "
105
+ " paddle::platform::GetCUDADeviceCountImpl, error code : %d, %s" ,
106
+ error_code, CudaErrorWebsite ());
98
107
return count;
99
108
}
100
109
@@ -106,9 +115,12 @@ int GetCUDADeviceCount() {
106
115
int GetCUDAComputeCapability (int id) {
107
116
PADDLE_ENFORCE_LT (id, GetCUDADeviceCount (), " id must less than GPU count" );
108
117
cudaDeviceProp device_prop;
109
- PADDLE_ENFORCE (cudaGetDeviceProperties (&device_prop, id),
110
- " cudaGetDeviceProperties failed in "
111
- " paddle::platform::GetCUDAComputeCapability" );
118
+ auto error_code = cudaGetDeviceProperties (&device_prop, id);
119
+ PADDLE_ENFORCE (
120
+ error_code,
121
+ " cudaGetDeviceProperties failed in "
122
+ " paddle::platform::GetCUDAComputeCapability, error code : %d, %s" ,
123
+ error_code, CudaErrorWebsite ());
112
124
return device_prop.major * 10 + device_prop.minor ;
113
125
}
114
126
@@ -143,20 +155,25 @@ bool TensorCoreAvailable() {
143
155
int GetCUDAMultiProcessors (int id) {
144
156
PADDLE_ENFORCE_LT (id, GetCUDADeviceCount (), " id must less than GPU count" );
145
157
int count;
146
- PADDLE_ENFORCE (
147
- cudaDeviceGetAttribute (&count, cudaDevAttrMultiProcessorCount, id),
148
- " cudaDeviceGetAttribute failed in "
149
- " paddle::platform::GetCUDAMultiProcessors" );
158
+ auto error_code =
159
+ cudaDeviceGetAttribute (&count, cudaDevAttrMultiProcessorCount, id);
160
+ PADDLE_ENFORCE (error_code,
161
+ " cudaDeviceGetAttribute failed in "
162
+ " paddle::platform::GetCUDAMultiProcess, error code : %d, %s" ,
163
+ error_code, CudaErrorWebsite ());
150
164
return count;
151
165
}
152
166
153
167
int GetCUDAMaxThreadsPerMultiProcessor (int id) {
154
168
PADDLE_ENFORCE_LT (id, GetCUDADeviceCount (), " id must less than GPU count" );
155
169
int count;
156
- PADDLE_ENFORCE (cudaDeviceGetAttribute (
157
- &count, cudaDevAttrMaxThreadsPerMultiProcessor, id),
158
- " cudaDeviceGetAttribute failed in "
159
- " paddle::platform::GetCUDAMaxThreadsPerMultiProcessor" );
170
+ auto error_code = cudaDeviceGetAttribute (
171
+ &count, cudaDevAttrMaxThreadsPerMultiProcessor, id);
172
+ PADDLE_ENFORCE (
173
+ error_code,
174
+ " cudaDeviceGetAttribute failed in paddle::"
175
+ " platform::GetCUDAMaxThreadsPerMultiProcessor, error code : %d, %s" ,
176
+ error_code, CudaErrorWebsite ());
160
177
return count;
161
178
}
162
179
@@ -266,37 +283,50 @@ size_t GpuMaxChunkSize() {
266
283
267
284
void GpuMemcpyAsync (void *dst, const void *src, size_t count,
268
285
enum cudaMemcpyKind kind, cudaStream_t stream) {
269
- PADDLE_ENFORCE (cudaMemcpyAsync (dst, src, count, kind, stream),
286
+ auto error_code = cudaMemcpyAsync (dst, src, count, kind, stream);
287
+ PADDLE_ENFORCE (error_code,
270
288
" cudaMemcpyAsync failed in paddle::platform::GpuMemcpyAsync "
271
- " (%p -> %p, length: %d)" ,
272
- src, dst, static_cast <int >(count));
289
+ " (%p -> %p, length: %d) error code : %d, %s" ,
290
+ src, dst, static_cast <int >(count), error_code,
291
+ CudaErrorWebsite ());
273
292
}
274
293
275
294
void GpuMemcpySync (void *dst, const void *src, size_t count,
276
295
enum cudaMemcpyKind kind) {
277
- PADDLE_ENFORCE (cudaMemcpy (dst, src, count, kind),
278
- " cudaMemcpy failed in paddle::platform::GpuMemcpySync (%p -> "
279
- " %p, length: %d)" ,
280
- src, dst, static_cast <int >(count));
296
+ auto error_code = cudaMemcpy (dst, src, count, kind);
297
+ PADDLE_ENFORCE (error_code,
298
+ " cudaMemcpy failed in paddle::platform::GpuMemcpySync "
299
+ " (%p -> %p, length: %d) error code : %d, %s" ,
300
+ src, dst, static_cast <int >(count), error_code,
301
+ CudaErrorWebsite ());
281
302
}
282
303
283
304
void GpuMemcpyPeerAsync (void *dst, int dst_device, const void *src,
284
305
int src_device, size_t count, cudaStream_t stream) {
306
+ auto error_code =
307
+ cudaMemcpyPeerAsync (dst, dst_device, src, src_device, count, stream);
285
308
PADDLE_ENFORCE (
286
- cudaMemcpyPeerAsync (dst, dst_device, src, src_device, count, stream),
287
- " cudaMemcpyPeerAsync failed in paddle::platform::GpuMemcpyPeerAsync" );
309
+ error_code,
310
+ " cudaMemcpyPeerAsync failed in paddle::platform::GpuMemcpyPeerAsync "
311
+ " error code : %d, %s" ,
312
+ error_code, CudaErrorWebsite ());
288
313
}
289
314
290
315
void GpuMemcpyPeerSync (void *dst, int dst_device, const void *src,
291
316
int src_device, size_t count) {
292
- PADDLE_ENFORCE (
293
- cudaMemcpyPeer (dst, dst_device, src, src_device, count),
294
- " cudaMemcpyPeer failed in paddle::platform::GpuMemcpyPeerSync" );
317
+ auto error_code = cudaMemcpyPeer (dst, dst_device, src, src_device, count);
318
+ PADDLE_ENFORCE (error_code,
319
+ " cudaMemcpyPeer failed in paddle::platform::GpuMemcpyPeerSync "
320
+ " error code : %d, %s" ,
321
+ error_code, CudaErrorWebsite ());
295
322
}
296
323
297
324
void GpuMemsetAsync (void *dst, int value, size_t count, cudaStream_t stream) {
298
- PADDLE_ENFORCE (cudaMemsetAsync (dst, value, count, stream),
299
- " cudaMemsetAsync failed in paddle::platform::GpuMemsetAsync" );
325
+ auto error_code = cudaMemsetAsync (dst, value, count, stream);
326
+ PADDLE_ENFORCE (error_code,
327
+ " cudaMemsetAsync failed in paddle::platform::GpuMemsetAsync "
328
+ " error code : %d, %s" ,
329
+ error_code, CudaErrorWebsite ());
300
330
}
301
331
} // namespace platform
302
332
} // namespace paddle
0 commit comments