Skip to content

Commit c625544

Browse files
committed
minor updates
1 parent 53a2ccb commit c625544

File tree

2 files changed

+69
-13
lines changed

2 files changed

+69
-13
lines changed

tests/test-backend-ops.cpp

Lines changed: 61 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
#include <string_view>
4040
#include <thread>
4141
#include <vector>
42+
#include <map>
4243

4344
static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float max = 1.0f) {
4445
size_t nels = ggml_nelements(tensor);
@@ -6725,14 +6726,66 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
67256726
}
67266727
}
67276728

6728-
for (auto kernel_type : {GGML_TYPE_F32, GGML_TYPE_F16}) {
6729-
for (auto act_case : cases) {
6730-
// Direct CONV_2D
6731-
test_cases.emplace_back(new test_conv_2d_implicit(
6732-
{ act_case[iwh_idx], act_case[iwh_idx], act_case[Cin_idx], act_case[B_idx] },
6733-
{ act_case[kwh_idx], act_case[kwh_idx], act_case[Cin_idx], act_case[Cout_idx] },
6734-
kernel_type, 1, 1, 0, 0, 1, 1, false));
6735-
}
6729+
// for (auto kernel_type : {GGML_TYPE_F32, GGML_TYPE_F16}) {
6730+
// for (auto act_case : cases) {
6731+
// // Direct CONV_2D
6732+
// test_cases.emplace_back(new test_conv_2d_implicit(
6733+
// { act_case[iwh_idx], act_case[iwh_idx], act_case[Cin_idx], act_case[B_idx] },
6734+
// { act_case[kwh_idx], act_case[kwh_idx], act_case[Cin_idx], act_case[Cout_idx] },
6735+
// kernel_type, 1, 1, 0, 0, 1, 1, false));
6736+
// }
6737+
// }
6738+
6739+
// Stable-diffusion layers
6740+
std::map<std::string, uint32_t> idx_sd{
6741+
{ "iw", 0 },
6742+
{ "ih", 1 },
6743+
{ "kw", 2 },
6744+
{ "kh", 3 },
6745+
{ "Cout", 4 },
6746+
{ "Cin", 5 },
6747+
{ "B", 6 },
6748+
};
6749+
6750+
// Input image size
6751+
uint32_t w = 768;
6752+
uint32_t h = 1024;
6753+
6754+
// Number of filters (base)
6755+
uint32_t Cout_b = 128;
6756+
uint32_t Cin_b = 128;
6757+
6758+
std::vector<std::array<uint32_t, 7>> cases_sd = {
6759+
{ w / 8, h / 8, 3, 3, Cout_b * 4, Cin_b * 4, 1 }, // x10 (called 10 times)
6760+
{ w / 4, h / 4, 3, 3, Cout_b * 4, Cin_b * 4, 1 }, // x7
6761+
{ w / 2, h / 2, 3, 3, Cout_b * 2, Cin_b * 2, 1 }, // x5
6762+
{ w, h, 3, 3, Cout_b, Cin_b, 1 }, // x5
6763+
{ w / 8, h / 8, 1, 1, Cout_b * 4, Cin_b * 4, 1 }, // x4
6764+
{ w / 8, h / 8, 1, 1, 4, 4, 1 },
6765+
{ w / 8, h / 8, 3, 3, Cout_b * 4, 4, 1 },
6766+
6767+
{ w / 2, h / 2, 3, 3, Cout_b * 4, Cin_b * 4, 1 },
6768+
{ w / 2, h / 2, 3, 3, Cout_b * 2, Cin_b * 4, 1 },
6769+
{ w / 2, h / 2, 1, 1, Cout_b * 2, Cin_b * 4, 1 },
6770+
6771+
{ w, h, 3, 3, Cout_b, Cin_b * 2, 1 },
6772+
{ w, h, 1, 1, Cout_b, Cin_b * 2, 1 },
6773+
{ w, h, 3, 3, Cout_b * 2, Cin_b * 2, 1 },
6774+
6775+
{ w, h, 3, 3, 3, Cin_b, 1 },
6776+
};
6777+
6778+
for (auto act_case : cases_sd) {
6779+
GGML_ASSERT(act_case[idx_sd["kw"]] == 3 || act_case[idx_sd["kw"]] == 1);
6780+
GGML_ASSERT(act_case[idx_sd["kh"]] == 3 || act_case[idx_sd["kh"]] == 1);
6781+
6782+
uint32_t p0 = act_case[idx_sd["kw"]] == 3 ? 1 : 0;
6783+
uint32_t p1 = act_case[idx_sd["kh"]] == 3 ? 1 : 0;
6784+
6785+
test_cases.emplace_back(new test_conv_2d_implicit(
6786+
{ act_case[idx_sd["iw"]], act_case[idx_sd["ih"]], act_case[idx_sd["Cin"]], act_case[idx_sd["B"]] },
6787+
{ act_case[idx_sd["kw"]], act_case[idx_sd["kh"]], act_case[idx_sd["Cin"]], act_case[idx_sd["Cout"]] },
6788+
GGML_TYPE_F16, 1, 1, p0, p1, 1, 1, false));
67366789
}
67376790

67386791

tests/test-conv2d-implicit.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,8 @@ void load_model(test_model & model, int ic, int oc, int iw, int ih, bool use_gpu
6363

6464
size_t buffer_size = 0;
6565
{
66-
// buffer_size += KW * KH * IC * OC * ggml_type_size(GGML_TYPE_F32); // tensor a
67-
buffer_size += KW * KH * IC * OC * ggml_type_size(GGML_TYPE_F16); // tensor a
66+
buffer_size += KW * KH * IC * OC * ggml_type_size(GGML_TYPE_F32); // tensor a
67+
// buffer_size += KW * KH * IC * OC * ggml_type_size(GGML_TYPE_F16); // tensor a
6868
buffer_size += IW * IH * IC * N * ggml_type_size(GGML_TYPE_F32); // tensor b
6969
buffer_size += 1024; // overhead
7070
}
@@ -112,7 +112,8 @@ void load_model(test_model & model, int ic, int oc, int iw, int ih, bool use_gpu
112112
model.ctx = ggml_init(params);
113113

114114
// create tensors
115-
model.a = ggml_new_tensor_4d(model.ctx, GGML_TYPE_F16, KW, KH, IC, OC);
115+
// model.a = ggml_new_tensor_4d(model.ctx, GGML_TYPE_F16, KW, KH, IC, OC);
116+
model.a = ggml_new_tensor_4d(model.ctx, GGML_TYPE_F32, KW, KH, IC, OC);
116117
model.b = ggml_new_tensor_4d(model.ctx, GGML_TYPE_F32, IW, IH, IC, N);
117118

118119
// create a allocator
@@ -123,9 +124,11 @@ void load_model(test_model & model, int ic, int oc, int iw, int ih, bool use_gpu
123124

124125
// load data to buffer
125126
if(ggml_backend_is_cpu(model.backend)) {
126-
memcpy(model.a->data, hadata.data(), ggml_nbytes(model.a));
127+
// memcpy(model.a->data, hadata.data(), ggml_nbytes(model.a));
128+
memcpy(model.a->data, adata.data(), ggml_nbytes(model.a));
127129
} else {
128-
ggml_backend_tensor_set(model.a, hadata.data(), 0, ggml_nbytes(model.a));
130+
// ggml_backend_tensor_set(model.a, hadata.data(), 0, ggml_nbytes(model.a));
131+
ggml_backend_tensor_set(model.a, adata.data(), 0, ggml_nbytes(model.a));
129132
}
130133

131134
// alloc memory

0 commit comments

Comments
 (0)