@@ -2246,6 +2246,51 @@ struct test_im2col : public test_case {
22462246 }
22472247};
22482248
2249+ // GGML_Conv2D
2250+ struct test_conv2d : public test_case {
2251+ const ggml_type type_input;
2252+ const ggml_type type_kernel;
2253+ const ggml_type dst_type;
2254+ const std::array<int64_t , 4 > ne_input;
2255+ const std::array<int64_t , 4 > ne_kernel;
2256+ // stride
2257+ const int s0;
2258+ const int s1;
2259+ // padding
2260+ const int p0;
2261+ const int p1;
2262+ // dilation
2263+ const int d0;
2264+ const int d1;
2265+ // mode
2266+
2267+ std::string vars () override {
2268+ return VARS_TO_STR11 (type_input, type_kernel, dst_type, ne_input, ne_kernel, s0, s1, p0, p1, d0, d1);
2269+ }
2270+
2271+ test_conv2d (ggml_type type_input = GGML_TYPE_F32, ggml_type type_kernel = GGML_TYPE_F16, ggml_type dst_type = GGML_TYPE_F32,
2272+ std::array<int64_t , 4 > ne_input = {10 , 10 , 3 , 1 }, // [input_width, input_height, input_channels, 1]
2273+ std::array<int64_t , 4 > ne_kernel = {3 , 3 , 3 , 1 }, // [kernel_width, kernel_height, input_channels, 1]
2274+ int s0 = 1 , int s1 = 1 ,
2275+ int p0 = 1 , int p1 = 1 ,
2276+ int d0 = 1 , int d1 = 1 )
2277+ : type_input(type_input), type_kernel(type_kernel), dst_type(dst_type), ne_input(ne_input), ne_kernel(ne_kernel), s0(s0), s1(s1), p0(p0), p1(p1), d0(d0), d1(d1)
2278+ {}
2279+
2280+ ggml_tensor * build_graph (ggml_context * ctx) override {
2281+ ggml_tensor * input = ggml_new_tensor (ctx, type_input, 4 , ne_input.data ());
2282+ ggml_set_name (input, " input" );
2283+
2284+ ggml_tensor * kernel = ggml_new_tensor (ctx, type_kernel, 4 , ne_kernel.data ());
2285+ ggml_set_name (kernel, " kernel" );
2286+
2287+ ggml_tensor * out = ggml_conv_2d_3x3 (ctx, kernel, input);
2288+ ggml_set_name (out, " out" );
2289+
2290+ return out;
2291+ }
2292+ };
2293+
22492294// GGML_OP_CONCAT
22502295struct test_concat : public test_case {
22512296 const ggml_type type;
@@ -3252,6 +3297,10 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
32523297 test_cases.emplace_back (new test_im2col (GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F32, {3000 , 128 , 1 , 1 }, {3 , 128 , 1280 , 1 }, 1 , 0 , 1 , 0 , 1 , 0 , false ));
32533298 test_cases.emplace_back (new test_im2col (GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {3000 , 128 , 1 , 1 }, {3 , 128 , 1280 , 1 }, 1 , 0 , 1 , 0 , 1 , 0 , false ));
32543299
3300+ test_cases.emplace_back (new test_conv2d (GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F32, {56 , 80 , 640 , 1 }, {3 , 3 , 640 , 960 }, 1 , 1 , 1 , 1 , 1 , 1 ));
3301+ test_cases.emplace_back (new test_conv2d (GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F32, {56 , 80 , 1280 , 1 }, {3 , 3 , 1280 , 1280 }, 1 , 1 , 1 , 1 , 1 , 1 ));
3302+ test_cases.emplace_back (new test_conv2d (GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F32, {56 , 80 , 1280 , 1 }, {3 , 3 , 1280 , 2560 }, 1 , 1 , 1 , 1 , 1 , 1 ));
3303+
32553304 // sycl backend will limit task global_range < MAX_INT
32563305 // test cases for 2D im2col with large input W and H (occurs in stable-diffusion)
32573306 // however these cases need to alloc more memory which may fail in some devices (Intel Arc770, etc.)
0 commit comments