File tree Expand file tree Collapse file tree 1 file changed +9
-2
lines changed Expand file tree Collapse file tree 1 file changed +9
-2
lines changed Original file line number Diff line number Diff line change @@ -17788,13 +17788,20 @@ static void llama_tensor_dequantize_internal(
1778817788 return;
1778917789 }
1779017790
17791- if (nthread < 2) {
17791+ if (nthread < 2 || (ggml_is_quantized(tensor->type) && qtype.row_meta_size > 0) ) {
1779217792 if (tensor->type == GGML_TYPE_F16) {
1779317793 ggml_fp16_to_fp32_row((ggml_fp16_t *)tensor->data, f32_output, nelements);
1779417794 } else if (tensor->type == GGML_TYPE_BF16) {
1779517795 ggml_bf16_to_fp32_row((ggml_bf16_t *)tensor->data, f32_output, nelements);
1779617796 } else if (ggml_is_quantized(tensor->type)) {
17797- qtype.to_float(tensor->data, f32_output, nelements);
17797+ auto row_size = ggml_row_size(tensor->type, tensor->ne[0]);
17798+ int nrows = ggml_nrows(tensor);
17799+ auto qsrc = (const char *)tensor->data;
17800+ for (int row = 0; row < nrows; ++row) {
17801+ qtype.to_float(qsrc, f32_output, tensor->ne[0]);
17802+ qsrc += row_size;
17803+ f32_output += tensor->ne[0];
17804+ }
1779817805 } else {
1779917806 GGML_ABORT("fatal error"); // unreachable
1780017807 }
You can’t perform that action at this time.
0 commit comments