-
Notifications
You must be signed in to change notification settings - Fork 1.4k
ggml : simplify Arm fp16 CPU logic #1177
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -16,7 +16,7 @@ | |
| #include <arm_sve.h> | ||
| #endif // __ARM_FEATURE_SVE | ||
|
|
||
| #if defined(__ARM_NEON) && !defined(__CUDACC__) && !defined(__MUSACC__) | ||
| #if defined(__ARM_NEON) | ||
| // if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example: | ||
| // | ||
| // $ ln -sfn /Library/Developer/CommandLineTools/usr/lib/clang/13.1.6/include/arm_neon.h ./src/ | ||
|
|
@@ -311,29 +311,24 @@ GGML_API void ggml_aligned_free(void * ptr, size_t size); | |
|
|
||
| // FP16 to FP32 conversion | ||
|
|
||
| // 16-bit float | ||
| // on Arm, we use __fp16 | ||
| // on x86, we use uint16_t | ||
| #if defined(__ARM_NEON) | ||
|
||
| #if defined(_MSC_VER) || (defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11) | ||
| typedef uint16_t ggml_fp16_internal_t; | ||
| #else | ||
| typedef __fp16 ggml_fp16_internal_t; | ||
| #endif | ||
| #endif | ||
|
|
||
| #if defined(__ARM_NEON) && !defined(_MSC_VER) && !(defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11) | ||
| #define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x) | ||
| #define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x) | ||
|
|
||
| #define GGML_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x) | ||
|
|
||
| static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) { | ||
| ggml_fp16_internal_t tmp; | ||
| __fp16 tmp; | ||
| memcpy(&tmp, &h, sizeof(ggml_fp16_t)); | ||
| return (float)tmp; | ||
| } | ||
|
|
||
| static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) { | ||
| ggml_fp16_t res; | ||
| ggml_fp16_internal_t tmp = f; | ||
| __fp16 tmp = f; | ||
| memcpy(&res, &tmp, sizeof(ggml_fp16_t)); | ||
| return res; | ||
| } | ||
|
|
@@ -485,7 +480,7 @@ GGML_API void ggml_aligned_free(void * ptr, size_t size); | |
| #define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x) | ||
| #define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x) | ||
|
|
||
| #endif // defined(__ARM_NEON) && (!defined(__MSC_VER) | ||
| #endif // defined(__ARM_NEON) | ||
|
|
||
| // precomputed f32 table for f16 (256 KB) | ||
| // defined in ggml.c, initialized in ggml_init() | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.