@@ -3325,13 +3325,13 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
33253325 test_cases.emplace_back (new test_im2col (GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {12 , 12 , 2 , 2048 }, {3 , 3 , 2 , 2048 }, 1 , 1 , 1 , 1 , 1 , 1 , true ));
33263326 test_cases.emplace_back (new test_im2col (GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {12 , 12 , 1 , 2560 }, {3 , 3 , 1 , 2560 }, 1 , 1 , 1 , 1 , 1 , 1 , true ));
33273327 test_cases.emplace_back (new test_im2col (GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {12 , 12 , 2 , 2560 }, {3 , 3 , 2 , 2560 }, 1 , 1 , 1 , 1 , 1 , 1 , true ));
3328-
3328+
33293329 // sycl backend will limit task global_range < MAX_INT
33303330 // test cases for 2D im2col with large input W and H (occurs in stable-diffusion)
33313331 // however these cases need to alloc more memory which may fail in some devices (Intel Arc770, etc.)
33323332 // these cases are verified (pass) in Intel(R) Data Center GPU Max 1100 (sycl backend) and NV A30 (cuda backend)
3333- // test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {1024, 1024, 256, 1}, {3, 3, 256, 1}, 1, 1, 1, 1, 1, 1, true));
3334- // test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F32, {1024, 1024, 256, 1}, {3, 3, 256, 1}, 1, 1, 1, 1, 1, 1, true));
3333+ // test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {1024, 1024, 256, 1}, {3, 3, 256, 1}, 1, 1, 1, 1, 1, 1, true));
3334+ // test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F32, {1024, 1024, 256, 1}, {3, 3, 256, 1}, 1, 1, 1, 1, 1, 1, true));
33353335
33363336 test_cases.emplace_back (new test_conv_transpose_1d ());
33373337 test_cases.emplace_back (new test_conv_transpose_1d ({3 ,2 ,1 ,1 }, {2 ,3 ,2 ,1 }, 3 , 0 , 1 ));
0 commit comments