Skip to content

Commit 7a73e86

Browse files
committed
cont
ggml-ci
1 parent 1b07edf commit 7a73e86

File tree

3 files changed

+12
-24
lines changed

3 files changed

+12
-24
lines changed

ggml/src/ggml-cpu/ggml-cpu-impl.h

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,13 @@
44

55
#include "ggml.h"
66
#include "ggml-impl.h"
7+
78
#include <stdlib.h> // load `stdlib.h` before other headers to work around MinGW bug: https://sourceforge.net/p/mingw-w64/bugs/192/
89
//#include <stddef.h>
910
#include <stdbool.h>
1011
#include <string.h> // memcpy
1112
#include <math.h> // fabsf
1213

13-
1414
#ifdef __cplusplus
1515
extern "C" {
1616
#endif
@@ -69,29 +69,16 @@ struct ggml_compute_params {
6969
#endif
7070

7171
#if defined(__ARM_FEATURE_SVE)
72-
#include <arm_sve.h>
7372
#include <sys/prctl.h>
7473
#endif
7574

76-
// 16-bit float
77-
// on Arm, we use __fp16
78-
// on x86, we use uint16_t
7975
#if defined(__ARM_NEON)
8076

81-
// if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
82-
//
83-
// $ ln -sfn /Library/Developer/CommandLineTools/usr/lib/clang/13.1.6/include/arm_neon.h ./src/
84-
//
85-
#include <arm_neon.h>
86-
77+
// ref: https://github.com/ggml-org/llama.cpp/pull/5404
8778
#ifdef _MSC_VER
88-
8979
#define ggml_vld1q_u32(w,x,y,z) { ((w) + ((uint64_t)(x) << 32)), ((y) + ((uint64_t)(z) << 32)) }
90-
9180
#else
92-
9381
#define ggml_vld1q_u32(w,x,y,z) { (w), (x), (y), (z) }
94-
9582
#endif // _MSC_VER
9683

9784
#if !defined(__aarch64__)

ggml/src/ggml-cpu/simd-mappings.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@
7171
#define GGML_F16x8 float16x8_t
7272
#define GGML_F16x8_ZERO vdupq_n_f16(0.0f)
7373
#define GGML_F16x8_SET1(x) vdupq_n_f16(x)
74-
#define GGML_F16x8_LOAD(x) vld1q_f16((const ggml_fp16_internal_t *)(x))
74+
#define GGML_F16x8_LOAD(x) vld1q_f16((const __fp16 *)(x))
7575
#define GGML_F16x8_STORE vst1q_f16
7676
#define GGML_F16x8_FMA(a, b, c) vfmaq_f16(a, b, c)
7777
#define GGML_F16x8_ADD vaddq_f16
@@ -99,7 +99,7 @@
9999
#define GGML_F16_VEC_ZERO GGML_F16x8_ZERO
100100
#define GGML_F16_VEC_SET1 GGML_F16x8_SET1
101101
#define GGML_F16_VEC_LOAD(p, i) GGML_F16x8_LOAD(p)
102-
#define GGML_F16_VEC_STORE(p, r, i) GGML_F16x8_STORE((ggml_fp16_internal_t *)(p), (r)[i])
102+
#define GGML_F16_VEC_STORE(p, r, i) GGML_F16x8_STORE((__fp16 *)(p), (r)[i])
103103
#define GGML_F16_VEC_FMA GGML_F16x8_FMA
104104
#define GGML_F16_VEC_ADD GGML_F16x8_ADD
105105
#define GGML_F16_VEC_MUL GGML_F16x8_MUL
@@ -114,7 +114,7 @@
114114
#define GGML_F32Cx4 float32x4_t
115115
#define GGML_F32Cx4_ZERO vdupq_n_f32(0.0f)
116116
#define GGML_F32Cx4_SET1(x) vdupq_n_f32(x)
117-
#define GGML_F32Cx4_LOAD(x) vcvt_f32_f16(vld1_f16((const ggml_fp16_internal_t *)(x)))
117+
#define GGML_F32Cx4_LOAD(x) vcvt_f32_f16(vld1_f16((const __fp16 *)(x)))
118118
#define GGML_F32Cx4_STORE(x, y) vst1_f16(x, vcvt_f16_f32(y))
119119
#define GGML_F32Cx4_FMA(a, b, c) vfmaq_f32(a, b, c)
120120
#define GGML_F32Cx4_ADD vaddq_f32
@@ -125,7 +125,7 @@
125125
#define GGML_F16_VEC_ZERO GGML_F32Cx4_ZERO
126126
#define GGML_F16_VEC_SET1 GGML_F32Cx4_SET1
127127
#define GGML_F16_VEC_LOAD(p, i) GGML_F32Cx4_LOAD(p)
128-
#define GGML_F16_VEC_STORE(p, r, i) GGML_F32Cx4_STORE((ggml_fp16_internal_t *)(p), r[i])
128+
#define GGML_F16_VEC_STORE(p, r, i) GGML_F32Cx4_STORE((__fp16 *)(p), r[i])
129129
#define GGML_F16_VEC_FMA GGML_F32Cx4_FMA
130130
#define GGML_F16_VEC_ADD GGML_F32Cx4_ADD
131131
#define GGML_F16_VEC_MUL GGML_F32Cx4_MUL

ggml/src/ggml-impl.h

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
#include <arm_sve.h>
1717
#endif // __ARM_FEATURE_SVE
1818

19-
#if defined(__ARM_NEON) && !defined(__CUDACC__) && !defined(__MUSACC__)
19+
#if defined(__ARM_NEON)
2020
// if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
2121
//
2222
// $ ln -sfn /Library/Developer/CommandLineTools/usr/lib/clang/13.1.6/include/arm_neon.h ./src/
@@ -311,23 +311,24 @@ GGML_API void ggml_aligned_free(void * ptr, size_t size);
311311

312312
// FP16 to FP32 conversion
313313

314+
// 16-bit float
315+
// on Arm, we use __fp16
316+
// on x86, we use uint16_t
314317
#if defined(__ARM_NEON)
315-
typedef __fp16 ggml_fp16_internal_t;
316-
317318
#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
318319
#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
319320

320321
#define GGML_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
321322

322323
static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
323-
ggml_fp16_internal_t tmp;
324+
__fp16 tmp;
324325
memcpy(&tmp, &h, sizeof(ggml_fp16_t));
325326
return (float)tmp;
326327
}
327328

328329
static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
329330
ggml_fp16_t res;
330-
ggml_fp16_internal_t tmp = f;
331+
__fp16 tmp = f;
331332
memcpy(&res, &tmp, sizeof(ggml_fp16_t));
332333
return res;
333334
}

0 commit comments

Comments
 (0)