File tree Expand file tree Collapse file tree 3 files changed +521
-498
lines changed Expand file tree Collapse file tree 3 files changed +521
-498
lines changed Original file line number Diff line number Diff line change 11#ifndef GGML_METAL_IMPL
22#define GGML_METAL_IMPL
33
4+ // kernel parameters for mat-vec threadgroups
5+ //
6+ // N_R0: number of src0 rows to process per simdgroup
7+ // N_SG: number of simdgroups per threadgroup
8+ //
9+ // TODO: for optimal performance, become function of the device and work size
10+
11+ #define N_R0_Q4_0 4
12+ #define N_SG_Q4_0 2
13+
14+ #define N_R0_Q4_1 4
15+ #define N_SG_Q4_1 2
16+
17+ #define N_R0_Q5_0 4
18+ #define N_SG_Q5_0 2
19+
20+ #define N_R0_Q5_1 4
21+ #define N_SG_Q5_1 2
22+
23+ #define N_R0_Q8_0 4
24+ #define N_SG_Q8_0 2
25+
26+ #define N_R0_Q2_K 4
27+ #define N_SG_Q2_K 2
28+
29+ #define N_R0_Q3_K 2
30+ #define N_SG_Q3_K 2
31+
32+ #define N_R0_Q4_K 4
33+ #define N_SG_Q4_K 2
34+
35+ #define N_R0_Q5_K 2
36+ #define N_SG_Q5_K 2
37+
38+ #define N_R0_Q6_K 1
39+ #define N_SG_Q6_K 2
40+
41+ #define N_R0_IQ1_S 4
42+ #define N_SG_IQ1_S 2
43+
44+ #define N_R0_IQ1_M 4
45+ #define N_SG_IQ1_M 2
46+
47+ #define N_R0_IQ2_XXS 4
48+ #define N_SG_IQ2_XXS 2
49+
50+ #define N_R0_IQ2_XS 4
51+ #define N_SG_IQ2_XS 2
52+
53+ #define N_R0_IQ2_S 4
54+ #define N_SG_IQ2_S 2
55+
56+ #define N_R0_IQ3_XXS 4
57+ #define N_SG_IQ3_XXS 2
58+
59+ #define N_R0_IQ3_S 4
60+ #define N_SG_IQ3_S 2
61+
62+ #define N_R0_IQ4_NL 2
63+ #define N_SG_IQ4_NL 2
64+
65+ #define N_R0_IQ4_XS 2
66+ #define N_SG_IQ4_XS 2
67+
468// kernel argument structs
569//
670// - element counters (e.g. ne00) typically use int32_t to reduce register usage
You can’t perform that action at this time.
0 commit comments