Skip to content

Commit 7013227

Browse files
committed
more clean up
1 parent a3b4d8d commit 7013227

File tree

3 files changed

+9
-35
lines changed

3 files changed

+9
-35
lines changed

ggml/src/ggml-cuda/cpy.cu

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ void ggml_cuda_cpy_dest_ptrs_copy(ggml_cuda_graph * cuda_graph, char ** host_des
138138
#endif
139139
}
140140

141-
template<typename src_t, typename dst_t, bool transpose = false>
141+
template<typename src_t, typename dst_t>
142142
static void ggml_cpy_flt_cuda(
143143
const char * cx, char * cdst, const int ne,
144144
const int ne00, const int ne01, const int ne02, const int nb00, const int nb01, const int nb02,
@@ -332,7 +332,7 @@ void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, gg
332332
CUDA_CHECK(cudaMemcpyAsync(src1_ddc, src0_ddc, ggml_nbytes(src0), cudaMemcpyDeviceToDevice, main_stream));
333333
}
334334
} else if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32) {
335-
ggml_cpy_flt_cuda<float, float, false> (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream, dest_ptrs_d, graph_cpynode_index);
335+
ggml_cpy_flt_cuda<float, float> (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream, dest_ptrs_d, graph_cpynode_index);
336336
} else if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_BF16) {
337337
ggml_cpy_flt_cuda<float, nv_bfloat16> (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream, dest_ptrs_d, graph_cpynode_index);
338338
} else if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F16) {
@@ -363,7 +363,7 @@ void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, gg
363363
} else if (src0->type == GGML_TYPE_Q5_1 && src1->type == GGML_TYPE_F32) {
364364
ggml_cpy_q5_1_f32_cuda(src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream, dest_ptrs_d, graph_cpynode_index);
365365
} else if (src0->type == GGML_TYPE_F16 && src1->type == GGML_TYPE_F16) {
366-
ggml_cpy_flt_cuda<half, half, false> (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream, dest_ptrs_d, graph_cpynode_index);
366+
ggml_cpy_flt_cuda<half, half> (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream, dest_ptrs_d, graph_cpynode_index);
367367
} else if (src0->type == GGML_TYPE_F16 && src1->type == GGML_TYPE_BF16) {
368368
ggml_cpy_flt_cuda<half, nv_bfloat16> (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream, dest_ptrs_d, graph_cpynode_index);
369369
} else if (src0->type == GGML_TYPE_F16 && src1->type == GGML_TYPE_F32) {

ggml/src/ggml.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4570,7 +4570,7 @@ struct ggml_tensor * ggml_conv_2d_direct(
45704570
return result;
45714571
}
45724572

4573-
// ggml_conv_3d
4573+
// ggml_conv_3d_direct
45744574

45754575
struct ggml_tensor * ggml_conv_3d_direct(
45764576
struct ggml_context * ctx,

tests/test-backend-ops.cpp

Lines changed: 5 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -2414,7 +2414,6 @@ struct test_cpy : public test_case {
24142414
const std::array<int64_t, 4> permute_dst;
24152415
bool _src_use_permute;
24162416
bool _dst_use_permute;
2417-
bool is_transpose;
24182417

24192418
std::string vars() override {
24202419
return VARS_TO_STR5(type_src, type_dst, ne, permute_src, permute_dst);
@@ -2431,12 +2430,10 @@ struct test_cpy : public test_case {
24312430
test_cpy(ggml_type type_src = GGML_TYPE_F32, ggml_type type_dst = GGML_TYPE_F32,
24322431
std::array<int64_t, 4> ne = {10, 10, 10, 1},
24332432
std::array<int64_t, 4> permute_src = {0, 0, 0, 0},
2434-
std::array<int64_t, 4> permute_dst = {0, 0, 0, 0},
2435-
bool transpose = false)
2433+
std::array<int64_t, 4> permute_dst = {0, 0, 0, 0})
24362434
: type_src(type_src), type_dst(type_dst), ne(ne), permute_src(permute_src), permute_dst(permute_dst),
24372435
_src_use_permute(permute_src[0] + permute_src[1] + permute_src[2] + permute_src[3] > 0),
2438-
_dst_use_permute(permute_dst[0] + permute_dst[1] + permute_dst[2] + permute_dst[3] > 0),
2439-
is_transpose(transpose) {}
2436+
_dst_use_permute(permute_dst[0] + permute_dst[1] + permute_dst[2] + permute_dst[3] > 0) {}
24402437

24412438
ggml_tensor * build_graph(ggml_context * ctx) override {
24422439
ggml_tensor * src = ggml_new_tensor(ctx, type_src, 4, ne.data());
@@ -2457,8 +2454,6 @@ struct test_cpy : public test_case {
24572454
}
24582455

24592456
ggml_tensor * out = ggml_cpy(ctx, src, dst);
2460-
if(is_transpose)
2461-
src->op_params[10] = 999;
24622457
ggml_set_name(out, "out");
24632458

24642459
return out;
@@ -6024,7 +6019,6 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
60246019
test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_I32, {256, 2, 3, 4}, {1, 0, 2, 3}));
60256020
test_cases.emplace_back(new test_cpy(GGML_TYPE_I32, GGML_TYPE_F32, {256, 2, 3, 4}));
60266021
test_cases.emplace_back(new test_cpy(GGML_TYPE_I32, GGML_TYPE_F32, {256, 2, 3, 4}, {1, 0, 2, 3}));
6027-
test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_F32, {48, 48, 1, 1}, {1, 0, 2, 3}, {0, 0, 0, 0}, true));
60286022

60296023
test_cases.emplace_back(new test_cont());
60306024
test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 1, 1 ,1}));
@@ -6685,32 +6679,12 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
66856679
GGML_TYPE_F32, 1, 1, p0, p1, 1, 1, false));
66866680
}
66876681

6688-
// for (auto act_case : cases_sd) {
6689-
// GGML_ASSERT(act_case[idx_sd["kw"]] == 3 || act_case[idx_sd["kw"]] == 1);
6690-
// GGML_ASSERT(act_case[idx_sd["kh"]] == 3 || act_case[idx_sd["kh"]] == 1);
6691-
6692-
// uint32_t p0 = act_case[idx_sd["kw"]] == 3 ? 1 : 0;
6693-
// uint32_t p1 = act_case[idx_sd["kh"]] == 3 ? 1 : 0;
6694-
6695-
// test_cases.emplace_back(new test_conv_2d_implicit(
6696-
// { act_case[idx_sd["iw"]], act_case[idx_sd["ih"]], act_case[idx_sd["Cin"]], act_case[idx_sd["B"]] },
6697-
// { act_case[idx_sd["kw"]], act_case[idx_sd["kh"]], act_case[idx_sd["Cin"]], act_case[idx_sd["Cout"]] },
6698-
// GGML_TYPE_F16, 1, 1, p0, p1, 1, 1, false));
6699-
// }
6700-
67016682
test_cases.emplace_back(new test_bin_bcast(ggml_add, GGML_TYPE_F32, {4096, 1, 1, 1}, {1, 1, 1, 1}));
67026683
test_cases.emplace_back(new test_bin_bcast(ggml_add, GGML_TYPE_F32, {4096, 1, 1, 1}, {1, 512, 1, 1}));
67036684

6704-
// test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_F16, {512, 3072, 1, 1}));
6705-
// test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_F32, {8192, 512, 2, 1}, {0, 2, 1, 3}));
6706-
// test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_F32, {3072, 512, 2, 1}, {0, 2, 1, 3}));
6707-
test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_F32, {768*1024, 256, 1, 1}, {1, 0, 2, 3}, {0, 0, 0, 0}, true));
6708-
test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_F32, {768*1024, 256, 1, 1}, {1, 0, 2, 3}, {0, 0, 0, 0}, false));
6709-
test_cases.emplace_back(new test_cpy(GGML_TYPE_F16, GGML_TYPE_F16, {768*1024, 256, 1, 1}, {1, 0, 2, 3}, {0, 0, 0, 0}, true));
6710-
test_cases.emplace_back(new test_cpy(GGML_TYPE_F16, GGML_TYPE_F16, {768*1024, 256, 1, 1}, {1, 0, 2, 3}, {0, 0, 0, 0}, false));
6711-
test_cases.emplace_back(new test_cpy(GGML_TYPE_F16, GGML_TYPE_F16, {768, 1024, 256, 1}, {1, 0, 2, 3}, {0, 0, 0, 0}, true));
6712-
test_cases.emplace_back(new test_cpy(GGML_TYPE_F16, GGML_TYPE_F16, {768, 1024, 256, 1}, {1, 0, 2, 3}, {0, 0, 0, 0}, false));
6713-
6685+
test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_F16, {512, 3072, 1, 1}));
6686+
test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_F32, {8192, 512, 2, 1}, {0, 2, 1, 3}));
6687+
test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_F32, {3072, 512, 2, 1}, {0, 2, 1, 3}));
67146688

67156689
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {4096, 4096, 5, 1}, false, false, GGML_TYPE_F32, {1, 1}, 1.0f, 0.0f));
67166690
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {12888, 256, 5, 1}, false, false, GGML_TYPE_F32, {1, 1}, 1.0f, 0.0f));

0 commit comments

Comments
 (0)