@@ -236,7 +236,7 @@ int main(int argc, char** argv) {
236236 int n4 = useQ4_1 ? kVecSize / QK4_1 : kVecSize / QK4_0; n4 = 64 *((n4 + 63 )/64 );
237237 int n8 = kVecSize / QK8_0; n8 = 64 *((n8 + 63 )/64 );
238238
239- auto funcs = useQ4_1 ? ggml_internal_get_type_traits (GGML_TYPE_Q4_1) : ggml_internal_get_type_traits (GGML_TYPE_Q4_0);
239+ const auto * funcs = useQ4_1 ? ggml_internal_get_type_traits (GGML_TYPE_Q4_1) : ggml_internal_get_type_traits (GGML_TYPE_Q4_0);
240240
241241 std::vector<block_q4_0> q40;
242242 std::vector<block_q4_1> q41;
@@ -261,9 +261,9 @@ int main(int argc, char** argv) {
261261 // Note, we do not include this in the timing as in practical application
262262 // we already have the quantized model weights.
263263 if (useQ4_1) {
264- funcs. from_float (x1.data (), q41.data (), kVecSize );
264+ funcs-> from_float (x1.data (), q41.data (), kVecSize );
265265 } else {
266- funcs. from_float (x1.data (), q40.data (), kVecSize );
266+ funcs-> from_float (x1.data (), q40.data (), kVecSize );
267267 }
268268
269269 // Now measure time the dot product needs using the "scalar" version above
@@ -282,10 +282,10 @@ int main(int argc, char** argv) {
282282 dot_q4_q8 (kVecSize , &result, q40.data (), q8.data ());
283283 }
284284 else {
285- auto vdot = ggml_internal_get_type_traits (funcs. vec_dot_type );
286- vdot. from_float (y1.data (), q8.data (), kVecSize );
287- if (useQ4_1) funcs. vec_dot (kVecSize , &result, 0 , q41.data (), 0 , q8.data (), 0 , 1 );
288- else funcs. vec_dot (kVecSize , &result, 0 , q40.data (), 0 , q8.data (), 0 , 1 );
285+ const auto * vdot = ggml_internal_get_type_traits (funcs-> vec_dot_type );
286+ vdot-> from_float (y1.data (), q8.data (), kVecSize );
287+ if (useQ4_1) funcs-> vec_dot (kVecSize , &result, 0 , q41.data (), 0 , q8.data (), 0 , 1 );
288+ else funcs-> vec_dot (kVecSize , &result, 0 , q40.data (), 0 , q8.data (), 0 , 1 );
289289 }
290290 sumq += result;
291291 t2 = std::chrono::high_resolution_clock::now ();
0 commit comments