Skip to content

Commit 1eb7c35

Browse files
committed
ggml-zdnn: code cleanup
Signed-off-by: Aaron Teo <[email protected]>
1 parent b7a77cf commit 1eb7c35

File tree

1 file changed

+15
-16
lines changed

1 file changed

+15
-16
lines changed

ggml/src/ggml-zdnn/ggml-zdnn-rewrite.cpp

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -67,12 +67,20 @@ inline void ggml_zdnn_init_tensor(ggml_backend_zdnn_buffer * buffer, const ggml_
6767

6868
default:
6969
{
70+
// For 4D tensors, GGML uses NCHW layout. However, because zDNN
71+
// automatically transforms everything to NHWC, we will use it
72+
// directly to avoid the performance penalty changing the
73+
// layout and reshaping the tensor.
7074
zdnn_init_pre_transformed_desc(
7175
ZDNN_NHWC,
7276
ggml_zdnn_type_mapping(tensor->type),
7377
&buffer->pre_tfm_desc,
7478
tensor->ne[3], tensor->ne[2], tensor->ne[1], tensor->ne[0]
7579
);
80+
81+
// TODO: Consider adding a ggml check.
82+
// TODO: If tensor = 4D, use ZDNN_NCHW by default.
83+
// TODO: If tensor = 2D, use ZDNN_NHWC by default.
7684
} break;
7785
}
7886

@@ -108,11 +116,8 @@ static void ggml_zdnn_mul_mat_op(ggml_backend_zdnn_context * ctx, const ggml_ten
108116
ggml_backend_zdnn_buffer * inputs_extra = (ggml_backend_zdnn_buffer *)inputs->extra;
109117
ggml_backend_zdnn_buffer * output_extra = (ggml_backend_zdnn_buffer *)output->extra;
110118

111-
zdnn_tensor_desc ptd_weights, td_weights;
112-
zdnn_tensor_desc ptd_inputs, td_inputs;
113-
zdnn_tensor_desc ptd_bias, td_bias;
114-
zdnn_tensor_desc ptd_output, td_output;
115-
zdnn_ztensor zt_weights, zt_inputs, zt_bias, zt_output;
119+
zdnn_tensor_desc ptd_bias, td_bias;
120+
zdnn_ztensor zt_bias;
116121

117122
const int64_t weights_rows = ne01;
118123
const int64_t weights_cols = ne00;
@@ -129,8 +134,7 @@ static void ggml_zdnn_mul_mat_op(ggml_backend_zdnn_context * ctx, const ggml_ten
129134
const int64_t bias_dim [GGML_MAX_DIMS] = { 1, 1, 1, output_cols };
130135
const int64_t output_dim[GGML_MAX_DIMS] = { 1, 1, output_cols, output_rows };
131136

132-
ggml_zdnn_create_tensor(ptd_bias, td_bias, zt_bias, output, bias_dim, ZDNN_1D);
133-
// ggml_zdnn_create_tensor(ptd_output, td_output, zt_output, output, output_dim, ZDNN_2D);
137+
ggml_zdnn_create_tensor(ptd_bias, td_bias, zt_bias, output, bias_dim, ZDNN_1D);
134138

135139
void * bias_data = (void *)calloc(ne0, ggml_element_size(output));
136140
if (weights_extra->ztensor.is_transformed == false) {
@@ -140,8 +144,7 @@ static void ggml_zdnn_mul_mat_op(ggml_backend_zdnn_context * ctx, const ggml_ten
140144
if (inputs_extra->ztensor.is_transformed == false) {
141145
ggml_zdnn_load_tensor(inputs_extra->ztensor, inputs->data);
142146
}
143-
ggml_zdnn_load_tensor(zt_bias, bias_data);
144-
// ggml_zdnn_load_tensor(output_extra->ztensor, output->data);
147+
ggml_zdnn_load_tensor(zt_bias, bias_data);
145148

146149
// GGML_LOG_INFO("%s: tensor '%s' tensor dimensions: [%ld, %ld, %ld, %ld] pre_tfm_desc dimensions: [%ld, %ld, %ld, %ld]\n",
147150
// __func__, weights_extra->name,
@@ -159,21 +162,17 @@ static void ggml_zdnn_mul_mat_op(ggml_backend_zdnn_context * ctx, const ggml_ten
159162
// inputs_extra->pre_tfm_desc.dim3,
160163
// inputs_extra->pre_tfm_desc.dim4);
161164

162-
// GGML_ASSERT(weights_extra->pre_tfm_desc.layout == ZDNN_2D && "weights_extra->pre_tfm_desc.layout must be ZDNN_2D");
163-
// GGML_ASSERT(inputs_extra->pre_tfm_desc.layout == ZDNN_2D && "inputs_extra->pre_tfm_desc.layout must be ZDNN_2D");
164165
GGML_ASSERT(weights_extra->pre_tfm_desc.dim1 == weights->ne[0] && "weights_extra->pre_tfm_desc.dim1 must match weights->ne[0]");
165166
GGML_ASSERT(weights_extra->pre_tfm_desc.dim2 == weights->ne[1] && "weights_extra->pre_tfm_desc.dim2 must match weights->ne[1]");
166-
GGML_ASSERT(inputs_extra->pre_tfm_desc.dim1 == inputs->ne[0] && "inputs_extra->pre_tfm_desc.dim1 must match inputs->ne[0]");
167-
GGML_ASSERT(inputs_extra->pre_tfm_desc.dim2 == inputs->ne[1] && "inputs_extra->pre_tfm_desc.dim2 must match inputs->ne[1]");
168-
169-
std::raise(SIGINT);
167+
GGML_ASSERT(inputs_extra->pre_tfm_desc.dim1 == inputs->ne[0] && "inputs_extra->pre_tfm_desc.dim1 must match inputs->ne[0]");
168+
GGML_ASSERT(inputs_extra->pre_tfm_desc.dim2 == inputs->ne[1] && "inputs_extra->pre_tfm_desc.dim2 must match inputs->ne[1]");
170169

171170
ZDNN_CHECK(zdnn_matmul_transpose_op(&inputs_extra->ztensor, &weights_extra->ztensor, &zt_bias,
172171
false, true, MATMUL_OP_ADDITION, &output_extra->ztensor));
172+
// TODO: Remove in the future as we are currently DLF16 -> FP32 then in the next op, FP32 -> DLF16 again. Inefficient.
173173
ZDNN_CHECK(zdnn_transform_origtensor(&output_extra->ztensor, output->data));
174174

175175
ZDNN_CHECK(zdnn_free_ztensor_buffer(&zt_bias));
176-
177176
free(bias_data);
178177
}
179178

0 commit comments

Comments
 (0)