Skip to content

Commit 06b41e6

Browse files
committed
zdnn: initial matmul refactor
Signed-off-by: Aaron Teo <[email protected]>
1 parent 432cf43 commit 06b41e6

File tree

5 files changed

+100
-78
lines changed

5 files changed

+100
-78
lines changed

ggml/src/ggml-zdnn/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
zdnn.h

ggml/src/ggml-zdnn/common.hpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#ifndef GGML_ZDNN_COMMON_HPP
2+
#define GGML_ZDNN_COMMON_HPP
3+
4+
#include "ggml.h"
5+
#include "ggml-impl.h"
6+
#include "ggml-zdnn-impl.h"
7+
8+
#include "zdnn.h"
9+
10+
#endif // GGML_ZDNN_COMMON_HPP

ggml/src/ggml-zdnn/ggml-zdnn.cpp

Lines changed: 5 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
1-
#include "zdnn.h"
2-
#include "ggml-zdnn.h"
31
#include "ggml-zdnn-impl.h"
4-
52
#include "ggml-impl.h"
63
#include "ggml-backend-impl.h"
74

5+
#include "ggml-zdnn/common.hpp"
6+
#include "ggml-zdnn/mmf.hpp"
7+
#include "ggml.h"
8+
89
#include <vector>
910
#include <memory>
1011
#include <csignal>
@@ -88,80 +89,6 @@ inline void ggml_zdnn_init_tensor(ggml_backend_zdnn_buffer * buffer, const ggml_
8889
ZDNN_CHECK(zdnn_init_ztensor_with_malloc(&buffer->pre_tfm_desc, &buffer->tfm_desc, &buffer->ztensor));
8990
}
9091

91-
static void ggml_zdnn_mul_mat_op(ggml_backend_zdnn_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
92-
GGML_TENSOR_BINARY_OP_LOCALS;
93-
94-
const enum ggml_type type = src0->type;
95-
96-
GGML_ASSERT(ne0 == ne01);
97-
GGML_ASSERT(ne1 == ne11);
98-
GGML_ASSERT(ne2 == ne12);
99-
GGML_ASSERT(ne3 == ne13);
100-
101-
// we don't support permuted src0 or src1
102-
GGML_ASSERT(nb00 == ggml_type_size(type));
103-
GGML_ASSERT(nb10 == ggml_type_size(src1->type));
104-
105-
// dst cannot be transposed or permuted
106-
GGML_ASSERT(nb0 == sizeof(float));
107-
GGML_ASSERT(nb0 <= nb1);
108-
GGML_ASSERT(nb1 <= nb2);
109-
GGML_ASSERT(nb2 <= nb3);
110-
111-
const ggml_tensor * weights = src0;
112-
const ggml_tensor * inputs = src1;
113-
ggml_tensor * output = dst;
114-
115-
ggml_backend_zdnn_buffer * weights_extra = (ggml_backend_zdnn_buffer *)weights->extra;
116-
ggml_backend_zdnn_buffer * inputs_extra = (ggml_backend_zdnn_buffer *)inputs->extra;
117-
ggml_backend_zdnn_buffer * output_extra = (ggml_backend_zdnn_buffer *)output->extra;
118-
ggml_backend_zdnn_buffer * bias_extra = (ggml_backend_zdnn_buffer *)output_extra->extra;
119-
120-
const int64_t weights_rows = ne01;
121-
const int64_t weights_cols = ne00;
122-
const int64_t inputs_rows = ne11;
123-
const int64_t inputs_cols = ne10;
124-
125-
assert(inputs_cols == weights_cols);
126-
127-
const int64_t output_rows = ne1;
128-
const int64_t output_cols = ne0;
129-
130-
// GGML_LOG_INFO("%s: tensor '%s' tensor dimensions: [%ld, %ld, %ld, %ld] pre_tfm_desc dimensions: [%ld, %ld, %ld, %ld]\n",
131-
// __func__, weights_extra->name,
132-
// weights->ne[3], weights->ne[2], weights->ne[1], weights->ne[0],
133-
// weights_extra->pre_tfm_desc.dim1,
134-
// weights_extra->pre_tfm_desc.dim2,
135-
// weights_extra->pre_tfm_desc.dim3,
136-
// weights_extra->pre_tfm_desc.dim4);
137-
138-
// GGML_LOG_INFO("%s: tensor '%s' tensor dimensions: [%ld, %ld, %ld, %ld] pre_tfm_desc dimensions: [%ld, %ld, %ld, %ld]\n",
139-
// __func__, inputs_extra->name,
140-
// inputs->ne[3], inputs->ne[2], inputs->ne[1], inputs->ne[0],
141-
// inputs_extra->pre_tfm_desc.dim1,
142-
// inputs_extra->pre_tfm_desc.dim2,
143-
// inputs_extra->pre_tfm_desc.dim3,
144-
// inputs_extra->pre_tfm_desc.dim4);
145-
146-
GGML_ASSERT(weights_extra->pre_tfm_desc.dim1 == weights->ne[0] && "weights_extra->pre_tfm_desc.dim1 must match weights->ne[0]");
147-
GGML_ASSERT(weights_extra->pre_tfm_desc.dim2 == weights->ne[1] && "weights_extra->pre_tfm_desc.dim2 must match weights->ne[1]");
148-
GGML_ASSERT(inputs_extra->pre_tfm_desc.dim1 == inputs->ne[0] && "inputs_extra->pre_tfm_desc.dim1 must match inputs->ne[0]");
149-
GGML_ASSERT(inputs_extra->pre_tfm_desc.dim2 == inputs->ne[1] && "inputs_extra->pre_tfm_desc.dim2 must match inputs->ne[1]");
150-
151-
ZDNN_CHECK(zdnn_matmul_transpose_op(&inputs_extra->ztensor, &weights_extra->ztensor, &bias_extra->ztensor,
152-
false, true, MATMUL_OP_ADDITION, &output_extra->ztensor));
153-
// TODO: Remove in the future as we are currently DLF16 -> FP32 then in the next op, FP32 -> DLF16 again. Inefficient.
154-
ZDNN_CHECK(zdnn_transform_origtensor(&output_extra->ztensor, output->data));
155-
156-
GGML_UNUSED(ctx);
157-
GGML_UNUSED(weights_rows);
158-
GGML_UNUSED(weights_cols);
159-
GGML_UNUSED(inputs_rows);
160-
GGML_UNUSED(inputs_cols);
161-
GGML_UNUSED(output_rows);
162-
GGML_UNUSED(output_cols);
163-
}
164-
16592
static void ggml_zdnn_mul_mat_dispatch(ggml_backend_zdnn_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
16693
// debug helpers
16794
// GGML_LOG_INFO("%s: use_mul_mat_vec = %d\n", __func__, use_mul_mat_vec);
@@ -174,7 +101,7 @@ static void ggml_zdnn_mul_mat_dispatch(ggml_backend_zdnn_context * ctx, const gg
174101
// GGML_LOG_INFO("%s: src0 is contiguous %d, transposed %d, type = %s, name = %s\n", __func__, ggml_is_contiguous(src0), ggml_is_transposed(src0), ggml_type_name(src0->type), src0->name);
175102
// GGML_LOG_INFO("%s: src1 is contiguous %d, transposed %d, type = %s, name = %s\n", __func__, ggml_is_contiguous(src1), ggml_is_transposed(src1), ggml_type_name(src1->type), src1->name);
176103

177-
ggml_zdnn_mul_mat_op(ctx, src0, src1, dst);
104+
ggml_zdnn_mul_mat_f(ctx, src0, src1, dst);
178105
}
179106

180107
static bool ggml_zdnn_compute_forward(ggml_backend_zdnn_context * ctx, ggml_tensor * dst) {

ggml/src/ggml-zdnn/mmf.cpp

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
#include "ggml.h"
2+
#include "mmf.hpp"
3+
4+
static void ggml_zdnn_mul_mat_f(ggml_backend_zdnn_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
5+
GGML_TENSOR_BINARY_OP_LOCALS;
6+
7+
const enum ggml_type type = src0->type;
8+
9+
GGML_ASSERT(ne0 == ne01);
10+
GGML_ASSERT(ne1 == ne11);
11+
GGML_ASSERT(ne2 == ne12);
12+
GGML_ASSERT(ne3 == ne13);
13+
14+
// we don't support permuted src0 or src1
15+
GGML_ASSERT(nb00 == ggml_type_size(type));
16+
GGML_ASSERT(nb10 == ggml_type_size(src1->type));
17+
18+
// dst cannot be transposed or permuted
19+
GGML_ASSERT(nb0 == sizeof(float));
20+
GGML_ASSERT(nb0 <= nb1);
21+
GGML_ASSERT(nb1 <= nb2);
22+
GGML_ASSERT(nb2 <= nb3);
23+
24+
const ggml_tensor * weights = src0;
25+
const ggml_tensor * inputs = src1;
26+
ggml_tensor * output = dst;
27+
28+
ggml_backend_zdnn_buffer * weights_extra = (ggml_backend_zdnn_buffer *)weights->extra;
29+
ggml_backend_zdnn_buffer * inputs_extra = (ggml_backend_zdnn_buffer *)inputs->extra;
30+
ggml_backend_zdnn_buffer * output_extra = (ggml_backend_zdnn_buffer *)output->extra;
31+
ggml_backend_zdnn_buffer * bias_extra = (ggml_backend_zdnn_buffer *)output_extra->extra;
32+
33+
const int64_t weights_rows = ne01;
34+
const int64_t weights_cols = ne00;
35+
const int64_t inputs_rows = ne11;
36+
const int64_t inputs_cols = ne10;
37+
38+
assert(inputs_cols == weights_cols);
39+
40+
const int64_t output_rows = ne1;
41+
const int64_t output_cols = ne0;
42+
43+
// GGML_LOG_INFO("%s: tensor '%s' tensor dimensions: [%ld, %ld, %ld, %ld] pre_tfm_desc dimensions: [%ld, %ld, %ld, %ld]\n",
44+
// __func__, weights_extra->name,
45+
// weights->ne[3], weights->ne[2], weights->ne[1], weights->ne[0],
46+
// weights_extra->pre_tfm_desc.dim1,
47+
// weights_extra->pre_tfm_desc.dim2,
48+
// weights_extra->pre_tfm_desc.dim3,
49+
// weights_extra->pre_tfm_desc.dim4);
50+
51+
// GGML_LOG_INFO("%s: tensor '%s' tensor dimensions: [%ld, %ld, %ld, %ld] pre_tfm_desc dimensions: [%ld, %ld, %ld, %ld]\n",
52+
// __func__, inputs_extra->name,
53+
// inputs->ne[3], inputs->ne[2], inputs->ne[1], inputs->ne[0],
54+
// inputs_extra->pre_tfm_desc.dim1,
55+
// inputs_extra->pre_tfm_desc.dim2,
56+
// inputs_extra->pre_tfm_desc.dim3,
57+
// inputs_extra->pre_tfm_desc.dim4);
58+
59+
GGML_ASSERT(weights_extra->pre_tfm_desc.dim1 == weights->ne[0] && "weights_extra->pre_tfm_desc.dim1 must match weights->ne[0]");
60+
GGML_ASSERT(weights_extra->pre_tfm_desc.dim2 == weights->ne[1] && "weights_extra->pre_tfm_desc.dim2 must match weights->ne[1]");
61+
GGML_ASSERT(inputs_extra->pre_tfm_desc.dim1 == inputs->ne[0] && "inputs_extra->pre_tfm_desc.dim1 must match inputs->ne[0]");
62+
GGML_ASSERT(inputs_extra->pre_tfm_desc.dim2 == inputs->ne[1] && "inputs_extra->pre_tfm_desc.dim2 must match inputs->ne[1]");
63+
64+
ZDNN_CHECK(zdnn_matmul_transpose_op(&inputs_extra->ztensor, &weights_extra->ztensor, &bias_extra->ztensor,
65+
false, true, MATMUL_OP_ADDITION, &output_extra->ztensor));
66+
// TODO: Remove in the future as we are currently DLF16 -> FP32 then in the next op, FP32 -> DLF16 again. Inefficient.
67+
ZDNN_CHECK(zdnn_transform_origtensor(&output_extra->ztensor, output->data));
68+
69+
GGML_UNUSED(ctx);
70+
GGML_UNUSED(weights_rows);
71+
GGML_UNUSED(weights_cols);
72+
GGML_UNUSED(inputs_rows);
73+
GGML_UNUSED(inputs_cols);
74+
GGML_UNUSED(output_rows);
75+
GGML_UNUSED(output_cols);
76+
}

ggml/src/ggml-zdnn/mmf.hpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#ifndef GGML_ZDNN_MMF_HPP
2+
#define GGML_ZDNN_MMF_HPP
3+
4+
#include "common.hpp"
5+
6+
static void ggml_zdnn_mul_mat_f(ggml_backend_zdnn_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
7+
8+
#endif // GGML_ZDNN_MMF_HPP

0 commit comments

Comments
 (0)