|
137 | 137 | typedef mt_bfloat16 nv_bfloat16; |
138 | 138 |
|
139 | 139 | /** FIXME: MUSA arch should match CUDA 11.4 */ |
140 | | -// #define CC_OFFSET_MT 99999 // should < CC_OFFSET_AMD |
141 | | -// #define __CUDA_ARCH__ CC_OFFSET_MT |
142 | | -// #define __CUDA_ARCH__ 800 // AMPERE |
| 140 | +// #define GGML_CUDA_CC_PASCAL 600 |
| 141 | +// #define GGML_CUDA_CC_DP4A 610 |
| 142 | +// #define GGML_CUDA_CC_VOLTA 700 |
| 143 | +// #define GGML_CUDA_CC_TURING 750 |
| 144 | +// #define GGML_CUDA_CC_AMPERE 800 |
143 | 145 |
|
144 | | -#define __MUSA_CC__ 800 |
| 146 | +#define __MUSA_CC__ 610 |
| 147 | +// #define __CUDA_ARCH__ __MUSA_CC__ |
145 | 148 |
|
146 | 149 |
|
147 | | -/** TODO: following apis not supported yet by musa sdk: *********** |
| 150 | +/** TODO: following apis not supported yet by musa sdk: ***********/ |
148 | 151 |
|
149 | | -__device__ __half hexp(const __half a) { |
150 | | - float f_a = __half2float(a); |
151 | | - float f_result = expf(f_a); |
152 | | - return __float2half(f_result); |
153 | | -} |
| 152 | +// __device__ __half hexp(const __half a) { |
| 153 | +// float f_a = __half2float(a); |
| 154 | +// float f_result = expf(f_a); |
| 155 | +// return __float2half(f_result); |
| 156 | +// } |
154 | 157 |
|
155 | | -__host__ __device__ __half2 h2exp(const __half2 a) { |
156 | | - // Extract lower and upper halves |
157 | | - __half lower = __low2half(a); |
158 | | - __half upper = __high2half(a); |
| 158 | +// __host__ __device__ __half2 h2exp(const __half2 a) { |
| 159 | +// // Extract lower and upper halves |
| 160 | +// __half lower = __low2half(a); |
| 161 | +// __half upper = __high2half(a); |
159 | 162 |
|
160 | | - // Compute exp for each half |
161 | | - __half exp_lower = hexp(lower); |
162 | | - __half exp_upper = hexp(upper); |
| 163 | +// // Compute exp for each half |
| 164 | +// __half exp_lower = hexp(lower); |
| 165 | +// __half exp_upper = hexp(upper); |
163 | 166 |
|
164 | | - // Combine back into __half2 |
165 | | - return __halves2half2(exp_lower, exp_upper); |
166 | | -} |
| 167 | +// // Combine back into __half2 |
| 168 | +// return __halves2half2(exp_lower, exp_upper); |
| 169 | +// } |
167 | 170 |
|
168 | | -******************************************************************/ |
| 171 | +/******************************************************************/ |
0 commit comments