Skip to content

Commit d92f452

Browse files
authored
Merge pull request #44 from tsisw/FIR-917-dynamic-kernel
@FIR-917 - GGML: Converting Static kernel to Dynamic Kernel
2 parents 8dc537f + aff1717 commit d92f452

File tree

5 files changed

+104
-93
lines changed

5 files changed

+104
-93
lines changed

examples/simple/simple-backend-tsi.cpp

Lines changed: 84 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,11 @@ float test_input_1[GGML_TSAVORITE_KERNEL_TYPE_COUNT][NUM_ELEMENTS] = {
3838
//ABS Kernel
3939
{1.1, -4.4, 10, -5, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, -23, 24, 25, -26, 27, -28, 29, -30, 31, -32.6},
4040
//SIN Kernel
41-
{1.1, 4.4, 10, 5, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 20, 20, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32.6}
42-
41+
{1.1, 4.4, 10, 5, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 20, 20, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32.6},
42+
//SIGMOID Kernel need to fix not tested
43+
{1.1, 4.4, 10, 5, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 20, 20, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32.6},
44+
//SILU Kernel
45+
{-16.0, -15.0, -14.0, -13.0, -12.0, -11.0, -10.0, -9.0, -8.0, -7.0, -6.0, -5.0, -4.0, -3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}
4346
};
4447
float test_input_2[GGML_TSAVORITE_KERNEL_TYPE_COUNT][NUM_ELEMENTS] = {
4548
//ADD KERNEL
@@ -60,7 +63,11 @@ float test_input_2[GGML_TSAVORITE_KERNEL_TYPE_COUNT][NUM_ELEMENTS] = {
6063
//ABS KERNEL input not used
6164
{1.1, 2.2, 5, 10, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32},
6265
//SIN Kernel input not used
63-
{1.1, 2.2, 5, 10, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
66+
{1.1, 2.2, 5, 10, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32},
67+
//SIGMOID Kernel not used
68+
{1.1, 4.4, 10, 5, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 20, 20, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32.6},
69+
//SILU Kernel not used
70+
{-16.0, -15.0, -14.0, -13.0, -12.0, -11.0, -10.0, -9.0, -8.0, -7.0, -6.0, -5.0, -4.0, -3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}
6471
};
6572

6673
float test_result[GGML_TSAVORITE_KERNEL_TYPE_COUNT][NUM_ELEMENTS] = {
@@ -81,7 +88,11 @@ float test_result[GGML_TSAVORITE_KERNEL_TYPE_COUNT][NUM_ELEMENTS] = {
8188
//ABS Kernel
8289
{1.1, 4.4, 10, 5, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32.6},
8390
//SIN Kernel
84-
{0.891207, -0.951602, -0.544021, -0.958924, -0.958924, -0.279416, 0.656987, 0.989358, 0.412118, -0.544021, -0.999990, -0.536573, 0.420167, 0.990607, 0.650288, -0.287903, -0.961398, -0.750987, 0.149877, 0.912945, 0.912945, 0.912945, -0.846220, -0.905578, -0.132352, 0.762559, 0.956376, 0.270906, -0.663634, -0.988032, -0.404039, 0.926149}
91+
{0.891207, -0.951602, -0.544021, -0.958924, -0.958924, -0.279416, 0.656987, 0.989358, 0.412118, -0.544021, -0.999990, -0.536573, 0.420167, 0.990607, 0.650288, -0.287903, -0.961398, -0.750987, 0.149877, 0.912945, 0.912945, 0.912945, -0.846220, -0.905578, -0.132352, 0.762559, 0.956376, 0.270906, -0.663634, -0.988032, -0.404039, 0.926149},
92+
//SIGMOID Kernel not tested
93+
{0.891207, -0.951602, -0.544021, -0.958924, -0.958924, -0.279416, 0.656987, 0.989358, 0.412118, -0.544021, -0.999990, -0.536573, 0.420167, 0.990607, 0.650288, -0.287903, -0.961398, -0.750987, 0.149877, 0.912945, 0.912945, 0.912945, -0.846220, -0.905578, -0.132352, 0.762559, 0.956376, 0.270906, -0.663634, -0.988032, -0.404039, 0.926149},
94+
// SILU Kernel
95+
{-0.000002, -0.000005, -0.000012, -0.000029, -0.000074, -0.000184, -0.000454, -0.001111, -0.002683, -0.006377, -0.014836, -0.033464, -0.071945, -0.142278, -0.238406, -0.268941, 0.000000, 0.731059, 1.761594, 2.857722, 3.928055, 4.966536, 5.985164, 6.993623, 7.997317, 8.998889, 9.999546, 10.999816, 11.999926, 12.999971, 13.999988, 14.999995}
8596

8697
};
8798

@@ -139,7 +150,15 @@ float test_input_scale_1[GGML_TSAVORITE_KERNEL_TYPE_COUNT][NUM_ELEMENTS_SCALE] =
139150
-9, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
140151
-16, 25, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
141152
-1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
142-
-1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}
153+
-1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
154+
//SIGMOID KERNEL need to fix input data
155+
{-1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
156+
-9, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
157+
-16, 25, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
158+
-1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
159+
-1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
160+
// SILU KERNEL
161+
{-16.0, -15.0, -14.0, -13.0, -12.0, -11.0, -10.0, -9.0, -8.0, -7.0, -6.0, -5.0, -4.0, -3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, -16.0, -15.0, -14.0, -13.0, -12.0, -11.0, -10.0, -9.0, -8.0, -7.0, -6.0, -5.0, -4.0, -3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, -16.0, -15.0, -14.0, -13.0, -12.0, -11.0, -10.0, -9.0, -8.0, -7.0, -6.0, -5.0, -4.0, -3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, -16.0, -15.0, -14.0, -13.0, -12.0, -11.0, -10.0, -9.0, -8.0, -7.0, -6.0, -5.0, -4.0, -3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 0.0, 1.0, 2.0, 3.0}
143162
};
144163

145164
float test_input_scale_2[GGML_TSAVORITE_KERNEL_TYPE_COUNT][NUM_ELEMENTS_SCALE] = {
@@ -197,7 +216,15 @@ float test_input_scale_2[GGML_TSAVORITE_KERNEL_TYPE_COUNT][NUM_ELEMENTS_SCALE] =
197216
-9, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
198217
-16, 25, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
199218
-1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
200-
-1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}
219+
-1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
220+
//SIGMOID KERNEL input not used
221+
{-1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
222+
-9, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
223+
-16, 25, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
224+
-1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
225+
-1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
226+
// SILU KERNEL input not used
227+
{-16.0, -15.0, -14.0, -13.0, -12.0, -11.0, -10.0, -9.0, -8.0, -7.0, -6.0, -5.0, -4.0, -3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, -16.0, -15.0, -14.0, -13.0, -12.0, -11.0, -10.0, -9.0, -8.0, -7.0, -6.0, -5.0, -4.0, -3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, -16.0, -15.0, -14.0, -13.0, -12.0, -11.0, -10.0, -9.0, -8.0, -7.0, -6.0, -5.0, -4.0, -3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, -16.0, -15.0, -14.0, -13.0, -12.0, -11.0, -10.0, -9.0, -8.0, -7.0, -6.0, -5.0, -4.0, -3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 0.0, 1.0, 2.0, 3.0}
201228
};
202229
float test_result_scale[GGML_TSAVORITE_KERNEL_TYPE_COUNT][NUM_ELEMENTS_SCALE] = {
203230
// ADD KERNEL
@@ -263,7 +290,25 @@ float test_result_scale[GGML_TSAVORITE_KERNEL_TYPE_COUNT][NUM_ELEMENTS_SCALE] =
263290
0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471,
264291
-0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471,
265292
0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471,
266-
0.841471, 0.841471, 0.841471}
293+
0.841471, 0.841471, 0.841471},
294+
// SIGMOID KERNEL, result need to change
295+
{-0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471,
296+
0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471,
297+
0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471,
298+
-0.412118,-0.756802, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471,
299+
0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471,
300+
0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471,
301+
0.287903,-0.132352, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471,
302+
0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471,
303+
0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471,
304+
-0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471,
305+
0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471,
306+
0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471,
307+
-0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471,
308+
0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471, 0.841471,
309+
0.841471, 0.841471, 0.841471},
310+
// SILU KERNEL
311+
{-0.000002, -0.000005, -0.000012, -0.000029, -0.000074, -0.000184, -0.000454, -0.001111, -0.002683, -0.006377, -0.014836, -0.033464, -0.071945, -0.142278, -0.238406, -0.268941, 0.000000, 0.731059, 1.761594, 2.857722, 3.928055, 4.966536, 5.985164, 6.993623, 7.997317, 8.998889, 9.999546, 10.999816, 11.999926, 12.999971, 13.999988, 14.999995, -0.000002, -0.000005, -0.000012, -0.000029, -0.000074, -0.000184, -0.000454, -0.001111, -0.002683, -0.006377, -0.014836, -0.033464, -0.071945, -0.142278, -0.238406, -0.268941, 0.000000, 0.731059, 1.761594, 2.857722, 3.928055, 4.966536, 5.985164, 6.993623, 7.997317, 8.998889, 9.999546, 10.999816, 11.999926, 12.999971, 13.999988, 14.999995, -0.000002, -0.000005, -0.000012, -0.000029, -0.000074, -0.000184, -0.000454, -0.001111, -0.002683, -0.006377, -0.014836, -0.033464, -0.071945, -0.142278, -0.238406, -0.268941, 0.000000, 0.731059, 1.761594, 2.857722, 3.928055, 4.966536, 5.985164, 6.993623, 7.997317, 8.998889, 9.999546, 10.999816, 11.999926, 12.999971, 13.999988, 14.999995, -0.000002, -0.000005, -0.000012, -0.000029, -0.000074, -0.000184, -0.000454, -0.001111, -0.002683, -0.006377, -0.014836, -0.033464, -0.071945, -0.142278, -0.238406, -0.268941, 0.000000, 0.731059, 1.761594, 2.857722, 3.928055, 4.966536, 5.985164, 6.993623, 7.997317, 8.998889, 9.999546, 10.999816, 11.999926, 12.999971, 13.999988, 14.999995, 0.000000, 0.731059, 1.761594, 2.857722}
267312
};
268313

269314
// This is a simple model with two tensors a and b
@@ -289,26 +334,20 @@ static void ggml_log_callback_default(ggml_log_level level, const char * text, v
289334
fflush(stderr);
290335
}
291336

337+
338+
// --- FLOAT COMPARATOR
292339
static bool ggml_tsi_compare_two_float(float a, float b) {
293-
float epsilon = 1e-5;
294-
float absA = abs(a);
295-
float absB = abs(b);
296-
float diff = abs(a - b);
297-
float minV = std::numeric_limits<float>::min();
298-
float maxV = std::numeric_limits<float>::max();
299-
300-
if (a == b) { // shortcut, handles infinities
301-
return true;
302-
} else if (a == 0 || b == 0 || (absA + absB < minV)) {
303-
// a or b is zero or both are extremely close to it
304-
// relative error is less meaningful here
305-
return diff < (epsilon * minV);
340+
// For very small values, use absolute error
341+
if (fabsf(a) < 1e-2f && fabsf(b) < 1e-2f) {
342+
return fabsf(a - b) < 1e-6f; // Accept up to 1e-6 difference for small values
306343
}
307-
// use relative error
308-
return diff /std::min((absA + absB), maxV) < epsilon;
344+
// For larger values, use relative error
345+
const float epsilon = 1e-4f;
346+
float diff = fabsf(a - b);
347+
float max_val = fmaxf(fabsf(a), fabsf(b));
348+
return diff < epsilon * max_val;
309349
}
310350

311-
312351
static bool load_model(simple_model & model, float * a, float * b, enum ggml_type data_type, int elements_A, int elements_B) {
313352
ggml_log_set(ggml_log_callback_default, nullptr);
314353

@@ -436,6 +475,12 @@ static struct ggml_cgraph * build_graph(const simple_model& model, enum ggml_tsa
436475
case GGML_TSAVORITE_KERNEL_TYPE_SIN:
437476
result = ggml_sin(ctx0, model.a);
438477
break;
478+
case GGML_TSAVORITE_KERNEL_TYPE_SIGMOID:
479+
result = ggml_sigmoid(ctx0, model.a);
480+
break;
481+
case GGML_TSAVORITE_KERNEL_TYPE_SILU:
482+
result = ggml_silu(ctx0, model.a);
483+
break;
439484
default:
440485
ggml_free(ctx0);
441486
fprintf(stderr, "\n Non Supported Operation \n");
@@ -488,6 +533,10 @@ enum ggml_tsavorite_kernel_type convert_testcase_to_ops_type (const char *testCa
488533
return GGML_TSAVORITE_KERNEL_TYPE_ABS;
489534
else if (!strcmp(testCase,"sin"))
490535
return GGML_TSAVORITE_KERNEL_TYPE_SIN;
536+
else if (!strcmp(testCase,"sigmoid"))
537+
return GGML_TSAVORITE_KERNEL_TYPE_SIGMOID;
538+
else if (!strcmp(testCase,"silu"))
539+
return GGML_TSAVORITE_KERNEL_TYPE_SILU;
491540

492541
fprintf(stderr, "\n un-supported test case %s hence running default test case which is add operation \n", testCase);
493542
return GGML_TSAVORITE_KERNEL_TYPE_ADD;
@@ -523,6 +572,8 @@ const char* convert_ops_type_to_testcase(enum ggml_tsavorite_kernel_type ops_typ
523572
}
524573
}
525574

575+
// --- TEST HARNESS DEBUG BLOCK ---
576+
#define DEBUG_COMPARE 1
526577

527578
int main(int argc, char *argv[]) {
528579
ggml_time_init();
@@ -549,7 +600,9 @@ int main(int argc, char *argv[]) {
549600
ops_type == GGML_TSAVORITE_KERNEL_TYPE_SQR ||
550601
ops_type == GGML_TSAVORITE_KERNEL_TYPE_NEG ||
551602
ops_type == GGML_TSAVORITE_KERNEL_TYPE_ABS ||
552-
ops_type == GGML_TSAVORITE_KERNEL_TYPE_SIN)
603+
ops_type == GGML_TSAVORITE_KERNEL_TYPE_SIN ||
604+
ops_type == GGML_TSAVORITE_KERNEL_TYPE_SIGMOID ||
605+
ops_type == GGML_TSAVORITE_KERNEL_TYPE_SILU)
553606
num_of_input_tensors = NUM_INPUT_URINARY_TENSORS;
554607
else
555608
num_of_input_tensors = NUM_INPUT_TENSORS;
@@ -618,6 +671,13 @@ int main(int argc, char *argv[]) {
618671

619672
fprintf(stderr, "\n compute is also done \n");
620673
for (int i = 0; i < result->ne[0] /* cols */; i++) {
674+
675+
#if DEBUG_COMPARE
676+
uint32_t bits_expected, bits_actual;
677+
memcpy(&bits_expected, &result_data[ops_type][i], sizeof(float));
678+
memcpy(&bits_actual, &out_data[i], sizeof(float));
679+
fprintf(stderr, "Index %d: expected bits %08x, actual bits %08x\n", i, bits_expected, bits_actual);
680+
#endif
621681
if (ggml_tsi_compare_two_float(out_data[i], result_data[ops_type][i])) {
622682
continue;
623683
}

ggml-tsi-kernel

ggml/include/ggml-tsavorite.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@
5353
extern "C" {
5454
#endif
5555

56-
#define TSAVORITE_KERNEL_SIZE 64
5756
#define TSAVORITE_DEVICE_MAX_BUF_LEN 1024 * 1024 * 128
5857

5958
enum ggml_tsavorite_input_tensors_count {

0 commit comments

Comments
 (0)