Skip to content

Commit bca9678

Browse files
committed
ggml-zdnn: move mulmat forward around
Signed-off-by: Aaron Teo <[email protected]>
1 parent 4cf39ae commit bca9678

File tree

5 files changed

+118
-92
lines changed

5 files changed

+118
-92
lines changed

ggml/src/ggml-zdnn/ggml-zdnn.cpp

Lines changed: 10 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -4,111 +4,31 @@
44

55
#include "ggml-zdnn/common.hpp"
66
#include "ggml-zdnn/mmf.hpp"
7+
#include "ggml-zdnn/utils.hpp"
78
#include "ggml.h"
89

910
#include <vector>
1011
#include <memory>
1112
#include <csignal> // raise(SIGTRAP)
1213
#include <unistd.h>
1314

14-
inline zdnn_data_types ggml_zdnn_type_mapping(ggml_type type) {
15-
switch (type) {
16-
case GGML_TYPE_F32:
17-
return FP32;
18-
case GGML_TYPE_F16:
19-
return FP16;
20-
case GGML_TYPE_BF16:
21-
return BFLOAT;
22-
case GGML_TYPE_I8:
23-
return INT8;
24-
case GGML_TYPE_I32:
25-
return INT32;
26-
case GGML_TYPE_Q8_0:
27-
return INT8;
28-
default:
29-
GGML_ABORT("%s: fatal: unable to determine zTensor data type",
30-
__func__);
31-
break;
32-
}
33-
}
34-
35-
inline void ggml_zdnn_create_tensor(zdnn_tensor_desc & pre_tfm_desc,
36-
zdnn_tensor_desc & tfm_desc,
37-
zdnn_ztensor & ztensor,
38-
const ggml_tensor * src,
39-
const int64_t * ne,
40-
const zdnn_data_layouts layout) {
41-
zdnn_init_pre_transformed_desc(
42-
layout,
43-
ggml_zdnn_type_mapping(src->type),
44-
&pre_tfm_desc,
45-
ne[3], ne[2], ne[1], ne[0]
46-
);
47-
48-
ZDNN_CHECK(zdnn_generate_transformed_desc(&pre_tfm_desc, &tfm_desc));
49-
ZDNN_CHECK(zdnn_init_ztensor_with_malloc(&pre_tfm_desc, &tfm_desc, &ztensor));
50-
}
51-
52-
inline void ggml_zdnn_load_tensor(zdnn_ztensor & ztensor,
53-
void * buffer) {
54-
ZDNN_CHECK(zdnn_transform_ztensor(&ztensor, buffer));
55-
}
56-
57-
inline void ggml_zdnn_init_tensor(ggml_backend_zdnn_buffer * buffer, const ggml_tensor * tensor) {
58-
switch (tensor->op) {
59-
case GGML_OP_MUL_MAT:
60-
{
61-
zdnn_init_pre_transformed_desc(
62-
ZDNN_2D,
63-
ggml_zdnn_type_mapping(tensor->type),
64-
&buffer->pre_tfm_desc,
65-
tensor->ne[1], tensor->ne[0]
66-
);
67-
} break;
68-
69-
default:
70-
{
71-
// For 4D tensors, GGML uses NCHW layout. However, because zDNN
72-
// automatically transforms everything to NHWC, we will use it
73-
// directly to avoid the performance penalty changing the
74-
// layout and reshaping the tensor.
75-
zdnn_init_pre_transformed_desc(
76-
ZDNN_NHWC,
77-
ggml_zdnn_type_mapping(tensor->type),
78-
&buffer->pre_tfm_desc,
79-
tensor->ne[3], tensor->ne[2], tensor->ne[1], tensor->ne[0]
80-
);
81-
82-
// TODO: Consider adding a ggml check.
83-
// TODO: If tensor = 4D, use ZDNN_NCHW by default.
84-
// TODO: If tensor = 2D, use ZDNN_NHWC by default.
85-
} break;
86-
}
87-
88-
ZDNN_CHECK(zdnn_generate_transformed_desc(&buffer->pre_tfm_desc, &buffer->tfm_desc));
89-
ZDNN_CHECK(zdnn_init_ztensor_with_malloc(&buffer->pre_tfm_desc, &buffer->tfm_desc, &buffer->ztensor));
90-
}
15+
static void ggml_zdnn_compute_forward_mul_mat(
16+
const ggml_backend_zdnn_context * ctx,
17+
ggml_tensor * dst) {
9118

92-
static void ggml_zdnn_mul_mat_dispatch(ggml_backend_zdnn_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
93-
// debug helpers
94-
// GGML_LOG_INFO("%s: use_mul_mat_vec = %d\n", __func__, use_mul_mat_vec);
95-
// GGML_LOG_INFO("%s: use_mul_mat_vec_q = %d\n", __func__, use_mul_mat_vec_q);
96-
// GGML_LOG_INFO("%s: use_mul_mat_q = %d\n", __func__, use_mul_mat_q);
97-
// GGML_LOG_INFO("%s: src0: %8d %8d %8d %8d\n", __func__, src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3]);
98-
// GGML_LOG_INFO("%s: %8d %8d %8d %8d\n", __func__, src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3]);
99-
// GGML_LOG_INFO("%s: src1: %8d %8d %8d %8d\n", __func__, src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3]);
100-
// GGML_LOG_INFO("%s: %8d %8d %8d %8d\n", __func__, src1->nb[0], src1->nb[1], src1->nb[2], src1->nb[3]);
101-
// GGML_LOG_INFO("%s: src0 is contiguous %d, transposed %d, type = %s, name = %s\n", __func__, ggml_is_contiguous(src0), ggml_is_transposed(src0), ggml_type_name(src0->type), src0->name);
102-
// GGML_LOG_INFO("%s: src1 is contiguous %d, transposed %d, type = %s, name = %s\n", __func__, ggml_is_contiguous(src1), ggml_is_transposed(src1), ggml_type_name(src1->type), src1->name);
19+
const ggml_tensor * src0 = dst->src[0]; // weights
20+
const ggml_tensor * src1 = dst->src[1]; // inputs
10321

22+
// TODO: implement support for quantized types
10423
ggml_zdnn_mul_mat_f(ctx, src0, src1, dst);
10524
}
10625

10726
static bool ggml_zdnn_compute_forward(ggml_backend_zdnn_context * ctx, ggml_tensor * dst) {
10827
switch (dst->op) {
10928
case GGML_OP_MUL_MAT:
110-
ggml_zdnn_mul_mat_dispatch(ctx, dst->src[0], dst->src[1], dst);
111-
break;
29+
{
30+
ggml_zdnn_compute_forward_mul_mat(ctx, dst);
31+
} break;
11232

11333
default:
11434
return false;

ggml/src/ggml-zdnn/mmf.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
#include "ggml.h"
22
#include "mmf.hpp"
33

4-
void ggml_zdnn_mul_mat_f(ggml_backend_zdnn_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
4+
void ggml_zdnn_mul_mat_f(
5+
const ggml_backend_zdnn_context * ctx,
6+
const ggml_tensor * src0,
7+
const ggml_tensor * src1,
8+
ggml_tensor * dst) {
59
GGML_TENSOR_BINARY_OP_LOCALS;
610

711
const enum ggml_type type = src0->type;

ggml/src/ggml-zdnn/mmf.hpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33

44
#include "common.hpp"
55

6-
void ggml_zdnn_mul_mat_f(ggml_backend_zdnn_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
6+
void ggml_zdnn_mul_mat_f(
7+
const ggml_backend_zdnn_context * ctx,
8+
const ggml_tensor * src0,
9+
const ggml_tensor * src1,
10+
ggml_tensor * dst);
711

812
#endif // GGML_ZDNN_MMF_HPP

ggml/src/ggml-zdnn/utils.cpp

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
#include "ggml.h"
2+
#include "utils.hpp"
3+
4+
inline zdnn_data_types ggml_zdnn_type_mapping(ggml_type type) {
5+
switch (type) {
6+
case GGML_TYPE_F32:
7+
return FP32;
8+
case GGML_TYPE_F16:
9+
return FP16;
10+
case GGML_TYPE_BF16:
11+
return BFLOAT;
12+
case GGML_TYPE_Q8_0:
13+
return INT8;
14+
case GGML_TYPE_I8:
15+
return INT8;
16+
case GGML_TYPE_I32:
17+
return INT32;
18+
default:
19+
GGML_ABORT("%s: fatal: unable to determine zTensor data type",
20+
__func__);
21+
break;
22+
}
23+
}
24+
25+
inline void ggml_zdnn_create_tensor(zdnn_tensor_desc & pre_tfm_desc,
26+
zdnn_tensor_desc & tfm_desc,
27+
zdnn_ztensor & ztensor,
28+
const ggml_tensor * src,
29+
const int64_t * ne,
30+
const zdnn_data_layouts layout) {
31+
zdnn_init_pre_transformed_desc(
32+
layout,
33+
ggml_zdnn_type_mapping(src->type),
34+
&pre_tfm_desc,
35+
ne[3], ne[2], ne[1], ne[0]
36+
);
37+
38+
ZDNN_CHECK(zdnn_generate_transformed_desc(&pre_tfm_desc, &tfm_desc));
39+
ZDNN_CHECK(zdnn_init_ztensor_with_malloc(&pre_tfm_desc, &tfm_desc, &ztensor));
40+
}
41+
42+
inline void ggml_zdnn_load_tensor(zdnn_ztensor & ztensor, void * buffer) {
43+
ZDNN_CHECK(zdnn_transform_ztensor(&ztensor, buffer));
44+
}
45+
46+
inline void ggml_zdnn_init_tensor(ggml_backend_zdnn_buffer * buffer, const ggml_tensor * tensor) {
47+
switch (tensor->op) {
48+
case GGML_OP_MUL_MAT:
49+
{
50+
zdnn_init_pre_transformed_desc(
51+
ZDNN_2D,
52+
ggml_zdnn_type_mapping(tensor->type),
53+
&buffer->pre_tfm_desc,
54+
tensor->ne[1], tensor->ne[0]
55+
);
56+
} break;
57+
58+
default:
59+
{
60+
// For 4D tensors, GGML uses NCHW layout. However, because zDNN
61+
// automatically transforms everything to NHWC, we will use it
62+
// directly to avoid the performance penalty changing the
63+
// layout and reshaping the tensor.
64+
zdnn_init_pre_transformed_desc(
65+
ZDNN_NHWC,
66+
ggml_zdnn_type_mapping(tensor->type),
67+
&buffer->pre_tfm_desc,
68+
tensor->ne[3], tensor->ne[2], tensor->ne[1], tensor->ne[0]
69+
);
70+
71+
// TODO: Consider adding a ggml check.
72+
// TODO: If tensor = 4D, use ZDNN_NCHW by default.
73+
// TODO: If tensor = 2D, use ZDNN_NHWC by default.
74+
} break;
75+
}
76+
77+
ZDNN_CHECK(zdnn_generate_transformed_desc(&buffer->pre_tfm_desc, &buffer->tfm_desc));
78+
ZDNN_CHECK(zdnn_init_ztensor_with_malloc(&buffer->pre_tfm_desc, &buffer->tfm_desc, &buffer->ztensor));
79+
}

ggml/src/ggml-zdnn/utils.hpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#ifndef GGML_ZDNN_UTILITIES_HPP
2+
#define GGML_ZDNN_UTILITIES_HPP
3+
4+
#include "common.hpp"
5+
6+
inline zdnn_data_types ggml_zdnn_type_mapping(ggml_type type);
7+
8+
inline void ggml_zdnn_create_tensor(zdnn_tensor_desc & pre_tfm_desc,
9+
zdnn_tensor_desc & tfm_desc,
10+
zdnn_ztensor & ztensor,
11+
const ggml_tensor * src,
12+
const int64_t * ne,
13+
const zdnn_data_layouts layout);
14+
15+
inline void ggml_zdnn_load_tensor(zdnn_ztensor & ztensor, void * buffer);
16+
17+
inline void ggml_zdnn_init_tensor(ggml_backend_zdnn_buffer * buffer, const ggml_tensor * tensor);
18+
19+
#endif // GGML_ZDNN_UTILITIES_HPP

0 commit comments

Comments
 (0)