|
9 | 9 | #define KERNEL_FLOAT_IS_DEVICE (1) |
10 | 10 | #define KERNEL_FLOAT_IS_HOST (0) |
11 | 11 | #define KERNEL_FLOAT_CUDA_ARCH (__CUDA_ARCH__) |
12 | | -#else |
| 12 | +#else // __CUDA_ARCH__ |
13 | 13 | #define KERNEL_FLOAT_INLINE __forceinline__ __host__ |
14 | 14 | #define KERNEL_FLOAT_IS_DEVICE (0) |
15 | 15 | #define KERNEL_FLOAT_IS_HOST (1) |
16 | 16 | #define KERNEL_FLOAT_CUDA_ARCH (0) |
17 | | -#endif |
18 | | -#else |
| 17 | +#endif // __CUDA_ARCH__ |
| 18 | +#else // __CUDACC__ |
19 | 19 | #define KERNEL_FLOAT_INLINE inline |
20 | 20 | #define KERNEL_FLOAT_CUDA (0) |
21 | 21 | #define KERNEL_FLOAT_IS_HOST (1) |
22 | 22 | #define KERNEL_FLOAT_IS_DEVICE (0) |
23 | 23 | #define KERNEL_FLOAT_CUDA_ARCH (0) |
24 | | -#endif |
| 24 | +#endif // __CUDACC__ |
25 | 25 |
|
26 | 26 | #ifndef KERNEL_FLOAT_FP16_AVAILABLE |
27 | 27 | #define KERNEL_FLOAT_FP16_AVAILABLE (1) |
28 | | -#endif |
| 28 | +#endif // KERNEL_FLOAT_FP16_AVAILABLE |
29 | 29 |
|
30 | 30 | #ifndef KERNEL_FLOAT_BF16_AVAILABLE |
31 | 31 | #define KERNEL_FLOAT_BF16_AVAILABLE (1) |
32 | | -#endif |
| 32 | +#endif // KERNEL_FLOAT_BF16_AVAILABLE |
33 | 33 |
|
34 | 34 | #ifndef KERNEL_FLOAT_FP8_AVAILABLE |
35 | 35 | #ifdef __CUDACC_VER_MAJOR__ |
36 | 36 | #define KERNEL_FLOAT_FP8_AVAILABLE (__CUDACC_VER_MAJOR__ >= 12) |
37 | | -#else |
| 37 | +#else // __CUDACC_VER_MAJOR__ |
38 | 38 | #define KERNEL_FLOAT_FP8_AVAILABLE (0) |
39 | | -#endif |
40 | | -#endif |
| 39 | +#endif // __CUDACC_VER_MAJOR__ |
| 40 | +#endif // KERNEL_FLOAT_FP8_AVAILABLE |
41 | 41 |
|
42 | 42 | #define KERNEL_FLOAT_ASSERT(expr) \ |
43 | 43 | do { \ |
|
49 | 49 | #define KERNEL_FLOAT_CONCAT(A, B) KERNEL_FLOAT_CONCAT_IMPL(A, B) |
50 | 50 | #define KERNEL_FLOAT_CALL(F, ...) F(__VA_ARGS__) |
51 | 51 |
|
| 52 | +// TOOD: check if this way is support across all compilers |
| 53 | +#if defined(__has_builtin) && __has_builtin(__builtin_assume_aligned) && 0 |
| 54 | +#define KERNEL_FLOAT_ASSUME_ALIGNED(TYPE, PTR, ALIGNMENT) \ |
| 55 | + static_cast<TYPE*>(__builtin_assume_aligned(static_cast<TYPE*>(PTR), (ALIGNMENT))) |
| 56 | +#else |
| 57 | +#define KERNEL_FLOAT_ASSUME_ALIGNED(TYPE, PTR, ALIGNMENT) (PTR) |
| 58 | +#endif |
| 59 | + |
| 60 | +#define KERNEL_FLOAT_MAX_ALIGNMENT (32) |
| 61 | + |
52 | 62 | #endif //KERNEL_FLOAT_MACROS_H |
0 commit comments