@@ -20,6 +20,10 @@ limitations under the License. */
20
20
#include < istream>
21
21
#include < ostream>
22
22
23
+ #include < cuda.h>
24
+
25
+ #include " paddle/utils/Logging.h"
26
+
23
27
#define USE_EIGEN
24
28
25
29
#ifdef USE_EIGEN // delete this #if macro
@@ -48,6 +52,27 @@ limitations under the License. */
48
52
#define PADDLE_HOSTDEVICE
49
53
#endif // __CUDACC__
50
54
55
+ #define STR (x ) #x
56
+ #define XSTR (x ) STR(x)
57
+
58
+ #ifndef __CUDACC__
59
+ #pragma message "__CUDACC__ not defined"
60
+ #else
61
+ #pragma message "__CUDACC__ defined"
62
+ #endif
63
+
64
+ #ifndef CUDA_VERSION
65
+ #pragma message "CUDA_VERSION not defined"
66
+ #else
67
+ #pragma message "CUDA_VERSION defined: " XSTR(CUDA_VERSION)
68
+ #endif
69
+
70
+ #ifdef __CUDA_ARCH__
71
+ #pragma message "The value of CUDA_ARCH: " XSTR(__CUDA_ARCH__)
72
+ #else
73
+ #pragma message "CUDA ARCH NOT DEFINED!"
74
+ #endif
75
+
51
76
#ifdef __arm__
52
77
#define PADDLE_ARM_32
53
78
#endif
@@ -359,6 +384,7 @@ struct PADDLE_ALIGN(2) float16 {
359
384
// arithmetic operators
360
385
#if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
361
386
__device__ inline float16 operator +(const float16& a, const float16& b) {
387
+ printf (" GPU Intrinsic used!" );
362
388
return float16 (__hadd (half (a), half (b)));
363
389
}
364
390
@@ -495,6 +521,7 @@ __host__ inline bool operator>=(const float16& a, const float16& b) {
495
521
496
522
#else // software emulation on other cpu
497
523
PADDLE_HOSTDEVICE inline float16 operator +(const float16& a, const float16& b) {
524
+ LOG (INFO) << " CPU emulation used" ;
498
525
return float16 (float (a) + float (b));
499
526
}
500
527
@@ -656,7 +683,7 @@ PADDLE_HOSTDEVICE inline float16 float_to_half_rn(float f) {
656
683
PADDLE_HOSTDEVICE inline float half_to_float (float16 h) {
657
684
#if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
658
685
half tmp = *reinterpret_cast <half*>(&h);
659
- return __half2float (h );
686
+ return __half2float (tmp );
660
687
661
688
#elif defined(PADDLE_NEON_64)
662
689
float res;
0 commit comments