@@ -454,21 +454,21 @@ const struct ggml_type_traits_cpu * ggml_get_type_traits_cpu(enum ggml_type type
454454#define GGML_F32x4_ADD vaddq_f32
455455#define GGML_F32x4_MUL vmulq_f32
456456#define GGML_F32x4_REDUCE_ONE (x ) vaddvq_f32(x)
457- #define GGML_F32x4_REDUCE (res , x ) \
458- { \
459- int offset = GGML_F32_ARR >> 1; \
460- for (int i = 0; i < offset; ++i) { \
461- (x)[i] = vaddq_f32((x)[i], (x)[offset+i]); \
462- } \
463- offset >>= 1; \
464- for (int i = 0; i < offset; ++i) { \
465- (x)[i] = vaddq_f32((x)[i], (x)[offset+i]); \
466- } \
467- offset >>= 1; \
468- for (int i = 0; i < offset; ++i) { \
469- (x)[i] = vaddq_f32((x)[i], (x)[offset+i]); \
470- } \
471- (res) = GGML_F32x4_REDUCE_ONE((x)[0]); \
457+ #define GGML_F32x4_REDUCE (res , x ) \
458+ { \
459+ int offset = GGML_F32_ARR >> 1; \
460+ for (int i = 0; i < offset; ++i) { \
461+ (x)[i] = vaddq_f32((x)[i], (x)[offset+i]); \
462+ } \
463+ offset >>= 1; \
464+ for (int i = 0; i < offset; ++i) { \
465+ (x)[i] = vaddq_f32((x)[i], (x)[offset+i]); \
466+ } \
467+ offset >>= 1; \
468+ for (int i = 0; i < offset; ++i) { \
469+ (x)[i] = vaddq_f32((x)[i], (x)[offset+i]); \
470+ } \
471+ (res) = (ggml_float) GGML_F32x4_REDUCE_ONE((x)[0]); \
472472}
473473
474474#define GGML_F32_VEC GGML_F32x4
@@ -2395,7 +2395,7 @@ static void ggml_init_arm_arch_features(void) {
23952395 uint32_t hwcap2 = getauxval (AT_HWCAP2 );
23962396
23972397 ggml_arm_arch_features .has_neon = !!(hwcap & HWCAP_ASIMD );
2398- ggml_arm_arch_features .has_dotprod = !!(hwcap && HWCAP_ASIMDDP );
2398+ ggml_arm_arch_features .has_dotprod = !!(hwcap & HWCAP_ASIMDDP );
23992399 ggml_arm_arch_features .has_i8mm = !!(hwcap2 & HWCAP2_I8MM );
24002400 ggml_arm_arch_features .has_sve = !!(hwcap & HWCAP_SVE );
24012401
0 commit comments