Skip to content

Commit 5c9b083

Browse files
committed
Revert "ggml-cpu: move s390x typedef to own header file"
This reverts commit 18d79e1. Signed-off-by: Aaron Teo <[email protected]>
1 parent e43dc82 commit 5c9b083

File tree

2 files changed

+139
-2
lines changed

2 files changed

+139
-2
lines changed

ggml/src/ggml-cpu/ggml-cpu-impl.h

Lines changed: 138 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,144 @@ inline static int32x4_t ggml_vdotq_s32(int32x4_t acc, int8x16_t a, int8x16_t b)
353353

354354
#if defined(__VXE__) || defined(__VXE2__)
355355
#include <vecintrin.h>
356-
#include <ggml-cpu/arch/s390/typedef.h>
356+
357+
#define vec_neg(a) (-(a)) // Vector Negate
358+
#define vec_add(a, b) ((a) + (b)) // Vector Add
359+
#define vec_sub(a, b) ((a) - (b)) // Vector Subtract
360+
#define vec_mul(a, b) ((a) * (b)) // Vector Multiply
361+
#define vec_div(a, b) ((a) / (b)) // Vector Divide
362+
#define vec_sl(a, b) ((a) << (b)) // Vector Shift Left
363+
#define vec_sra(a, b) ((a) >> (b)) // Vector Shift Right
364+
#define vec_sr(a, b) ((a) >> (b)) // Vector Shift Right Algebraic
365+
#define vec_slo(a, b) vec_slb(a, (b) << 64) // Vector Shift Left by Octet
366+
#define vec_sro(a, b) vec_srb(a, (b) << 64) // Vector Shift Right by Octet
367+
368+
#ifndef vec_and
369+
#define vec_and(a, b) ((a) & (b)) // Vector AND
370+
#endif
371+
372+
#ifndef vec_or
373+
#define vec_or(a, b) ((a) | (b)) // Vector OR
374+
#endif
375+
376+
#ifndef vec_xor
377+
#define vec_xor(a, b) ((a) ^ (b)) // Vector XOR
378+
#endif
379+
380+
typedef signed char char8x16_t __attribute__((vector_size(16)));
381+
typedef unsigned char uchar8x16_t __attribute__((vector_size(16)));
382+
383+
typedef int8_t int8x16_t __attribute__((vector_size(16)));
384+
typedef int16_t int16x8_t __attribute__((vector_size(16)));
385+
typedef int32_t int32x4_t __attribute__((vector_size(16)));
386+
387+
typedef uint8_t uint8x16_t __attribute__((vector_size(16)));
388+
typedef uint16_t uint16x8_t __attribute__((vector_size(16)));
389+
typedef uint32_t uint32x4_t __attribute__((vector_size(16)));
390+
391+
typedef float float32x4_t __attribute__((vector_size(16)));
392+
typedef double double64x2_t __attribute__((vector_size(16)));
393+
394+
typedef signed long long long64x2_t __attribute__((vector_size(16)));
395+
typedef unsigned long long ulong64x2_t __attribute__((vector_size(16)));
396+
397+
typedef struct ggml_uint8x16x2_t {
398+
uint8x16_t val[2];
399+
} ggml_uint8x16x2_t;
400+
401+
inline static ggml_uint8x16x2_t ggml_vec_xl_u8x2(const uint8_t * ptr) {
402+
ggml_uint8x16x2_t res;
403+
404+
res.val[0] = vec_xl( 0, ptr);
405+
res.val[1] = vec_xl(16, ptr);
406+
407+
return res;
408+
}
409+
410+
typedef struct ggml_uint8x16x4_t {
411+
uint8x16_t val[4];
412+
} ggml_uint8x16x4_t;
413+
414+
inline static ggml_uint8x16x4_t ggml_vec_xl_u8x4(const uint8_t * ptr) {
415+
ggml_uint8x16x4_t res;
416+
417+
res.val[0] = vec_xl( 0, ptr);
418+
res.val[1] = vec_xl(16, ptr);
419+
res.val[2] = vec_xl(32, ptr);
420+
res.val[3] = vec_xl(48, ptr);
421+
422+
return res;
423+
}
424+
425+
typedef struct ggml_int8x16x4_t {
426+
int8x16_t val[4];
427+
} ggml_int8x16x4_t;
428+
429+
inline static ggml_int8x16x4_t ggml_vec_xl_s8x4(const int8_t * ptr) {
430+
ggml_int8x16x4_t res;
431+
432+
res.val[0] = vec_xl( 0, ptr);
433+
res.val[1] = vec_xl(16, ptr);
434+
res.val[2] = vec_xl(32, ptr);
435+
res.val[3] = vec_xl(48, ptr);
436+
437+
return res;
438+
}
439+
440+
typedef struct ggml_int16x8x2_t {
441+
int16x8_t val[2];
442+
} ggml_int16x8x2_t;
443+
444+
inline static ggml_int16x8x2_t ggml_vec_xl_s16x2(const int16_t * ptr) {
445+
ggml_int16x8x2_t res;
446+
447+
res.val[0] = vec_xl( 0, ptr);
448+
res.val[1] = vec_xl(16, ptr);
449+
450+
return res;
451+
}
452+
453+
/*
454+
! WARNING: Very slow. Use vec_perm if possible. Refer to iq4_xs
455+
! or iq4_nl for example implementation.
456+
*/
457+
inline static int8x16_t ggml_vec_tbl(int8x16_t a, uint8x16_t b) {
458+
int8x16_t res;
459+
460+
res[ 0] = a[b[ 0]];
461+
res[ 1] = a[b[ 1]];
462+
res[ 2] = a[b[ 2]];
463+
res[ 3] = a[b[ 3]];
464+
res[ 4] = a[b[ 4]];
465+
res[ 5] = a[b[ 5]];
466+
res[ 6] = a[b[ 6]];
467+
res[ 7] = a[b[ 7]];
468+
res[ 8] = a[b[ 8]];
469+
res[ 9] = a[b[ 9]];
470+
res[10] = a[b[10]];
471+
res[11] = a[b[11]];
472+
res[12] = a[b[12]];
473+
res[13] = a[b[13]];
474+
res[14] = a[b[14]];
475+
res[15] = a[b[15]];
476+
477+
return res;
478+
}
479+
480+
inline static int16x8_t vec_padd_s16(int16x8_t a, int16x8_t b) {
481+
const uchar8x16_t v_maske = { 0, 1, 4, 5, 8, 9, 12, 13,
482+
16, 17, 20, 21, 24, 25, 28, 29 };
483+
484+
const int16x8_t v_abo = vec_pack((int32x4_t)a, (int32x4_t)b);
485+
const int16x8_t v_abe = vec_perm(a, b, v_maske);
486+
return v_abo + v_abe;
487+
}
488+
489+
inline static int32x4_t ggml_vec_dot(int32x4_t acc, int8x16_t a, int8x16_t b) {
490+
const int16x8_t p = vec_mule(a, b) + vec_mulo(a, b);
491+
return acc + (vec_unpackh(p) + vec_unpackl(p));
492+
}
493+
357494
#endif
358495

359496
#if defined(__loongarch_asx)

ggml/src/ggml-impl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030

3131
#if defined(GGML_VXE)
3232
#include <vecintrin.h>
33-
#include "ggml-cpu/arch/s390/typedef.h"
33+
#include <ggml-cpu/ggml-cpu-impl.h>
3434
#endif
3535

3636
#if defined(GGML_NNPA)

0 commit comments

Comments
 (0)