|
39 | 39 | #include <string_view> |
40 | 40 | #include <thread> |
41 | 41 | #include <vector> |
| 42 | +#include <map> |
42 | 43 |
|
43 | 44 | static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float max = 1.0f) { |
44 | 45 | size_t nels = ggml_nelements(tensor); |
@@ -6725,14 +6726,66 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() { |
6725 | 6726 | } |
6726 | 6727 | } |
6727 | 6728 |
|
6728 | | - for (auto kernel_type : {GGML_TYPE_F32, GGML_TYPE_F16}) { |
6729 | | - for (auto act_case : cases) { |
6730 | | - // Direct CONV_2D |
6731 | | - test_cases.emplace_back(new test_conv_2d_implicit( |
6732 | | - { act_case[iwh_idx], act_case[iwh_idx], act_case[Cin_idx], act_case[B_idx] }, |
6733 | | - { act_case[kwh_idx], act_case[kwh_idx], act_case[Cin_idx], act_case[Cout_idx] }, |
6734 | | - kernel_type, 1, 1, 0, 0, 1, 1, false)); |
6735 | | - } |
| 6729 | + // for (auto kernel_type : {GGML_TYPE_F32, GGML_TYPE_F16}) { |
| 6730 | + // for (auto act_case : cases) { |
| 6731 | + // // Direct CONV_2D |
| 6732 | + // test_cases.emplace_back(new test_conv_2d_implicit( |
| 6733 | + // { act_case[iwh_idx], act_case[iwh_idx], act_case[Cin_idx], act_case[B_idx] }, |
| 6734 | + // { act_case[kwh_idx], act_case[kwh_idx], act_case[Cin_idx], act_case[Cout_idx] }, |
| 6735 | + // kernel_type, 1, 1, 0, 0, 1, 1, false)); |
| 6736 | + // } |
| 6737 | + // } |
| 6738 | + |
| 6739 | + // Stable-diffusion layers |
| 6740 | + std::map<std::string, uint32_t> idx_sd{ |
| 6741 | + { "iw", 0 }, |
| 6742 | + { "ih", 1 }, |
| 6743 | + { "kw", 2 }, |
| 6744 | + { "kh", 3 }, |
| 6745 | + { "Cout", 4 }, |
| 6746 | + { "Cin", 5 }, |
| 6747 | + { "B", 6 }, |
| 6748 | + }; |
| 6749 | + |
| 6750 | + // Input image size |
| 6751 | + uint32_t w = 768; |
| 6752 | + uint32_t h = 1024; |
| 6753 | + |
| 6754 | + // Number of filters (base) |
| 6755 | + uint32_t Cout_b = 128; |
| 6756 | + uint32_t Cin_b = 128; |
| 6757 | + |
| 6758 | + std::vector<std::array<uint32_t, 7>> cases_sd = { |
| 6759 | + { w / 8, h / 8, 3, 3, Cout_b * 4, Cin_b * 4, 1 }, // x10 (called 10 times) |
| 6760 | + { w / 4, h / 4, 3, 3, Cout_b * 4, Cin_b * 4, 1 }, // x7 |
| 6761 | + { w / 2, h / 2, 3, 3, Cout_b * 2, Cin_b * 2, 1 }, // x5 |
| 6762 | + { w, h, 3, 3, Cout_b, Cin_b, 1 }, // x5 |
| 6763 | + { w / 8, h / 8, 1, 1, Cout_b * 4, Cin_b * 4, 1 }, // x4 |
| 6764 | + { w / 8, h / 8, 1, 1, 4, 4, 1 }, |
| 6765 | + { w / 8, h / 8, 3, 3, Cout_b * 4, 4, 1 }, |
| 6766 | + |
| 6767 | + { w / 2, h / 2, 3, 3, Cout_b * 4, Cin_b * 4, 1 }, |
| 6768 | + { w / 2, h / 2, 3, 3, Cout_b * 2, Cin_b * 4, 1 }, |
| 6769 | + { w / 2, h / 2, 1, 1, Cout_b * 2, Cin_b * 4, 1 }, |
| 6770 | + |
| 6771 | + { w, h, 3, 3, Cout_b, Cin_b * 2, 1 }, |
| 6772 | + { w, h, 1, 1, Cout_b, Cin_b * 2, 1 }, |
| 6773 | + { w, h, 3, 3, Cout_b * 2, Cin_b * 2, 1 }, |
| 6774 | + |
| 6775 | + { w, h, 3, 3, 3, Cin_b, 1 }, |
| 6776 | + }; |
| 6777 | + |
| 6778 | + for (auto act_case : cases_sd) { |
| 6779 | + GGML_ASSERT(act_case[idx_sd["kw"]] == 3 || act_case[idx_sd["kw"]] == 1); |
| 6780 | + GGML_ASSERT(act_case[idx_sd["kh"]] == 3 || act_case[idx_sd["kh"]] == 1); |
| 6781 | + |
| 6782 | + uint32_t p0 = act_case[idx_sd["kw"]] == 3 ? 1 : 0; |
| 6783 | + uint32_t p1 = act_case[idx_sd["kh"]] == 3 ? 1 : 0; |
| 6784 | + |
| 6785 | + test_cases.emplace_back(new test_conv_2d_implicit( |
| 6786 | + { act_case[idx_sd["iw"]], act_case[idx_sd["ih"]], act_case[idx_sd["Cin"]], act_case[idx_sd["B"]] }, |
| 6787 | + { act_case[idx_sd["kw"]], act_case[idx_sd["kh"]], act_case[idx_sd["Cin"]], act_case[idx_sd["Cout"]] }, |
| 6788 | + GGML_TYPE_F16, 1, 1, p0, p1, 1, 1, false)); |
6736 | 6789 | } |
6737 | 6790 |
|
6738 | 6791 |
|
|
0 commit comments