Skip to content

Commit b971d06

Browse files
committed
ggml : fix quantized cpy op
ggml-ci
1 parent e128a1b commit b971d06

File tree

2 files changed

+22
-9
lines changed

2 files changed

+22
-9
lines changed

ggml/src/ggml-cpu/ggml-cpu.c

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3111,7 +3111,7 @@ static void ggml_compute_forward_dup_same_cont(
31113111
const int nth = params->nth; // number of threads
31123112

31133113
// parallelize by elements
3114-
const int ne = ggml_nelements(dst);
3114+
const int ne = ggml_nelements(src0)/ggml_blck_size(src0->type);
31153115
const int dr = (ne + nth - 1) / nth;
31163116
const int ie0 = dr * ith;
31173117
const int ie1 = MIN(ie0 + dr, ne);
@@ -4055,7 +4055,6 @@ static void ggml_compute_forward_dup_f32(
40554055
static void ggml_compute_forward_dup_bytes(
40564056
const struct ggml_compute_params * params,
40574057
struct ggml_tensor * dst) {
4058-
40594058
const struct ggml_tensor * src0 = dst->src[0];
40604059

40614060
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
@@ -4069,10 +4068,10 @@ static void ggml_compute_forward_dup_bytes(
40694068
}
40704069

40714070
const size_t type_size = ggml_type_size(src0->type);
4071+
40724072
const int ith = params->ith; // thread index
40734073
const int nth = params->nth; // number of threads
40744074

4075-
40764075
// parallelize by rows
40774076
const int nr = ne01;
40784077
// number of rows per thread
@@ -4082,10 +4081,10 @@ static void ggml_compute_forward_dup_bytes(
40824081
const int ir1 = MIN(ir0 + dr, nr);
40834082

40844083
if (src0->type == dst->type &&
4085-
ne00 == ne0 &&
4084+
ggml_are_same_shape(src0, dst) &&
40864085
nb00 == type_size && nb0 == type_size) {
40874086
// copy by rows
4088-
const size_t rs = ne00 * type_size;
4087+
const size_t rs = ggml_row_size(src0->type, ne00);
40894088
for (int64_t i03 = 0; i03 < ne03; i03++) {
40904089
for (int64_t i02 = 0; i02 < ne02; i02++) {
40914090
for (int64_t i01 = ir0; i01 < ir1; i01++) {
@@ -4146,9 +4145,12 @@ static void ggml_compute_forward_dup_bytes(
41464145
int64_t i12 = 0;
41474146
int64_t i13 = 0;
41484147

4148+
// number of blocks in a row
4149+
const int64_t nb = ne00/ggml_blck_size(src0->type);
4150+
41494151
for (int64_t i03 = 0; i03 < ne03; i03++) {
41504152
for (int64_t i02 = 0; i02 < ne02; i02++) {
4151-
i10 += ne00 * ir0;
4153+
i10 += nb * ir0;
41524154
while (i10 >= ne0) {
41534155
i10 -= ne0;
41544156
if (++i11 == ne1) {
@@ -4162,7 +4164,7 @@ static void ggml_compute_forward_dup_bytes(
41624164
}
41634165
}
41644166
for (int64_t i01 = ir0; i01 < ir1; i01++) {
4165-
for (int64_t i00 = 0; i00 < ne00; i00++) {
4167+
for (int64_t i00 = 0; i00 < nb; i00++) {
41664168
const char * src0_ptr = ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03);
41674169
char * dst_ptr = ((char *) dst->data + i10*nb0 + i11*nb1 + i12*nb2 + i13*nb3);
41684170

@@ -4182,7 +4184,7 @@ static void ggml_compute_forward_dup_bytes(
41824184
}
41834185
}
41844186
}
4185-
i10 += ne00 * (ne01 - ir1);
4187+
i10 += nb * (ne01 - ir1);
41864188
while (i10 >= ne0) {
41874189
i10 -= ne0;
41884190
if (++i11 == ne1) {
@@ -14067,7 +14069,9 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
1406714069
}
1406814070

1406914071
// extra_buffer op?
14070-
if (ggml_cpu_extra_compute_forward(params, tensor)) return;
14072+
if (ggml_cpu_extra_compute_forward(params, tensor)) {
14073+
return;
14074+
}
1407114075

1407214076
switch (tensor->op) {
1407314077
case GGML_OP_DUP:

tests/test-backend-ops.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3929,6 +3929,15 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
39293929
test_cases.emplace_back(new test_set(GGML_TYPE_I32, GGML_TYPE_I32, {6, 5, 4, 3}, dim));
39303930
}
39313931

3932+
test_cases.emplace_back(new test_cpy(GGML_TYPE_Q8_0, GGML_TYPE_Q8_0, {256, 2, 3, 4}, {0, 1, 2, 3}));
3933+
test_cases.emplace_back(new test_cpy(GGML_TYPE_Q8_0, GGML_TYPE_Q8_0, {256, 2, 3, 4}, {0, 2, 1, 3}));
3934+
3935+
test_cases.emplace_back(new test_cpy(GGML_TYPE_Q4_0, GGML_TYPE_Q4_0, {256, 2, 3, 4}, {0, 1, 2, 3}));
3936+
test_cases.emplace_back(new test_cpy(GGML_TYPE_Q4_0, GGML_TYPE_Q4_0, {256, 2, 3, 4}, {0, 2, 3, 1}));
3937+
3938+
test_cases.emplace_back(new test_cpy(GGML_TYPE_Q4_K, GGML_TYPE_Q4_K, {256, 2, 3, 4}, {0, 1, 2, 3}));
3939+
test_cases.emplace_back(new test_cpy(GGML_TYPE_Q4_K, GGML_TYPE_Q4_K, {256, 2, 3, 4}, {0, 3, 1, 2}));
3940+
39323941
for (ggml_type type_src : {GGML_TYPE_F16, GGML_TYPE_F32}) {
39333942
for (ggml_type type_dst : all_types) {
39343943
test_cases.emplace_back(new test_cpy(type_src, type_dst, {256, 4, 4, 4}));

0 commit comments

Comments
 (0)