|
16 | 16 | #include <arm_sve.h> |
17 | 17 | #endif // __ARM_FEATURE_SVE |
18 | 18 |
|
19 | | -#if defined(__ARM_NEON) |
20 | | -// if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example: |
21 | | -// |
22 | | -// $ ln -sfn /Library/Developer/CommandLineTools/usr/lib/clang/13.1.6/include/arm_neon.h ./src/ |
23 | | -// |
24 | | -#include <arm_neon.h> |
25 | | -#endif |
26 | | - |
27 | 19 | #if defined(__F16C__) |
28 | 20 | #include <immintrin.h> |
29 | 21 | #endif |
@@ -314,7 +306,18 @@ GGML_API void ggml_aligned_free(void * ptr, size_t size); |
314 | 306 | // 16-bit float |
315 | 307 | // on Arm, we use __fp16 |
316 | 308 | // on x86, we use uint16_t |
317 | | -#if defined(__ARM_NEON) |
| 309 | +// |
| 310 | +// for old CUDA compilers (<= 11), we use uint16_t: ref https://github.com/ggml-org/llama.cpp/pull/10616 |
| 311 | +// for MUSA compilers , we use uint16_t: ref https://github.com/ggml-org/llama.cpp/pull/11843 |
| 312 | +// |
| 313 | +#if defined(__ARM_NEON) && !(defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11) && !defined(__MUSACC__) |
| 314 | + |
| 315 | + // if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example: |
| 316 | + // |
| 317 | + // $ ln -sfn /Library/Developer/CommandLineTools/usr/lib/clang/13.1.6/include/arm_neon.h ./src/ |
| 318 | + // |
| 319 | + #include <arm_neon.h> |
| 320 | + |
318 | 321 | #define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x) |
319 | 322 | #define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x) |
320 | 323 |
|
@@ -480,7 +483,7 @@ GGML_API void ggml_aligned_free(void * ptr, size_t size); |
480 | 483 | #define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x) |
481 | 484 | #define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x) |
482 | 485 |
|
483 | | -#endif // defined(__ARM_NEON) |
| 486 | +#endif // defined(__ARM_NEON) && !(defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11) && !defined(__MUSACC__) |
484 | 487 |
|
485 | 488 | // precomputed f32 table for f16 (256 KB) |
486 | 489 | // defined in ggml.c, initialized in ggml_init() |
|
0 commit comments