Skip to content

Commit 2a4ba5a

Browse files
committed
cont : bring back CUDA/MUSA checks
ggml-ci
1 parent c764fc5 commit 2a4ba5a

File tree

1 file changed

+13
-10
lines changed

1 file changed

+13
-10
lines changed

ggml/src/ggml-impl.h

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,6 @@
1616
#include <arm_sve.h>
1717
#endif // __ARM_FEATURE_SVE
1818

19-
#if defined(__ARM_NEON)
20-
// if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
21-
//
22-
// $ ln -sfn /Library/Developer/CommandLineTools/usr/lib/clang/13.1.6/include/arm_neon.h ./src/
23-
//
24-
#include <arm_neon.h>
25-
#endif
26-
2719
#if defined(__F16C__)
2820
#include <immintrin.h>
2921
#endif
@@ -314,7 +306,18 @@ GGML_API void ggml_aligned_free(void * ptr, size_t size);
314306
// 16-bit float
315307
// on Arm, we use __fp16
316308
// on x86, we use uint16_t
317-
#if defined(__ARM_NEON)
309+
//
310+
// for old CUDA compilers (<= 11), we use uint16_t: ref https://github.com/ggml-org/llama.cpp/pull/10616
311+
// for MUSA compilers , we use uint16_t: ref https://github.com/ggml-org/llama.cpp/pull/11843
312+
//
313+
#if defined(__ARM_NEON) && !(defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11) && !defined(__MUSACC__)
314+
315+
// if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
316+
//
317+
// $ ln -sfn /Library/Developer/CommandLineTools/usr/lib/clang/13.1.6/include/arm_neon.h ./src/
318+
//
319+
#include <arm_neon.h>
320+
318321
#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
319322
#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
320323

@@ -480,7 +483,7 @@ GGML_API void ggml_aligned_free(void * ptr, size_t size);
480483
#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
481484
#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
482485

483-
#endif // defined(__ARM_NEON)
486+
#endif // defined(__ARM_NEON) && !(defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11) && !defined(__MUSACC__)
484487

485488
// precomputed f32 table for f16 (256 KB)
486489
// defined in ggml.c, initialized in ggml_init()

0 commit comments

Comments
 (0)