@@ -1463,11 +1463,13 @@ struct test_cpy : public test_case {
14631463    const  ggml_type type_src;
14641464    const  ggml_type type_dst;
14651465    const  std::array<int64_t , 4 > ne;
1466-     const  std::array<int64_t , 4 > permute;
1466+     const  std::array<int64_t , 4 > permute_src;
1467+     const  std::array<int64_t , 4 > permute_dst;
14671468    bool  _src_use_permute;
1469+     bool  _dst_use_permute;
14681470
14691471    std::string vars () override  {
1470-         return  VARS_TO_STR4 (type_src, type_dst, ne, permute );
1472+         return  VARS_TO_STR5 (type_src, type_dst, ne, permute_src, permute_dst );
14711473    }
14721474
14731475    double  max_nmse_err () override  {
@@ -1480,23 +1482,30 @@ struct test_cpy : public test_case {
14801482
14811483    test_cpy (ggml_type type_src = GGML_TYPE_F32, ggml_type type_dst = GGML_TYPE_F32,
14821484            std::array<int64_t , 4 > ne = {10 , 10 , 10 , 1 },
1483-             std::array<int64_t , 4 > permute = {0 , 0 , 0 , 0 })
1484-         : type_src(type_src), type_dst(type_dst), ne(ne), permute(permute),
1485-           _src_use_permute (permute[0 ] + permute[1 ] + permute[2 ] + permute[3 ] > 0 ) {}
1485+             std::array<int64_t , 4 > permute_src = {0 , 0 , 0 , 0 },
1486+             std::array<int64_t , 4 > permute_dst = {0 , 0 , 0 , 0 })
1487+         : type_src(type_src), type_dst(type_dst), ne(ne), permute_src(permute_src), permute_dst(permute_dst),
1488+           _src_use_permute (permute_src[0 ] + permute_src[1 ] + permute_src[2 ] + permute_src[3 ] > 0 ),
1489+           _dst_use_permute(permute_dst[0 ] + permute_dst[1 ] + permute_dst[2 ] + permute_dst[3 ] > 0 ) {}
14861490
14871491    ggml_tensor * build_graph (ggml_context * ctx) override  {
14881492        ggml_tensor * src = ggml_new_tensor (ctx, type_src, 4 , ne.data ());
14891493        ggml_set_param (ctx, src);
14901494        ggml_set_name (src, " src"  );
14911495
14921496        if  (_src_use_permute) {
1493-             src = ggml_permute (ctx, src, permute [0 ], permute [1 ], permute [2 ], permute [3 ]);
1497+             src = ggml_permute (ctx, src, permute_src [0 ], permute_src [1 ], permute_src [2 ], permute_src [3 ]);
14941498            ggml_set_name (src, " src_permuted"  );
14951499        }
14961500
1497-         ggml_tensor* dst = ggml_new_tensor (ctx, type_dst, 4 , src->ne );
1501+         ggml_tensor  * dst = ggml_new_tensor (ctx, type_dst, 4 , src->ne );
14981502        ggml_set_name (dst, " dst"  );
14991503
1504+         if  (_dst_use_permute) {
1505+             dst = ggml_permute (ctx, dst, permute_dst[0 ], permute_dst[1 ], permute_dst[2 ], permute_dst[3 ]);
1506+             ggml_set_name (dst, " dst_permuted"  );
1507+         }
1508+ 
15001509        ggml_tensor * out = ggml_cpy (ctx, src, dst);
15011510        ggml_set_name (out, " out"  );
15021511
@@ -4004,14 +4013,25 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
40044013        test_cases.emplace_back (new  test_set (GGML_TYPE_I32, GGML_TYPE_I32, {6 , 5 , 4 , 3 }, dim));
40054014    }
40064015
4007-     for  (ggml_type type_src : {GGML_TYPE_F16, GGML_TYPE_F32}) {
4016+     //  same-type copy
4017+     for  (ggml_type type : all_types) {
4018+         const  auto  nk = ggml_blck_size (type);
4019+ 
4020+         for  (int  k = 1 ; k < 4 ; ++k) {
4021+             test_cases.emplace_back (new  test_cpy (type, type, {k*nk, 2 , 3 , 4 }));
4022+             test_cases.emplace_back (new  test_cpy (type, type, {k*nk, 2 , 3 , 4 }, {0 , 2 , 1 , 3 }));
4023+             test_cases.emplace_back (new  test_cpy (type, type, {k*nk, 2 , 3 , 4 }, {0 , 3 , 1 , 2 }, {0 , 2 , 1 , 3 }));
4024+         }
4025+     }
4026+ 
4027+     for  (ggml_type type_src : {GGML_TYPE_F16, GGML_TYPE_BF16, GGML_TYPE_F32}) {
40084028        for  (ggml_type type_dst : all_types) {
40094029            test_cases.emplace_back (new  test_cpy (type_src, type_dst, {256 , 4 , 4 , 4 }));
40104030            test_cases.emplace_back (new  test_cpy (type_src, type_dst, {256 , 2 , 3 , 4 }, {0 , 2 , 1 , 3 })); //  cpy by rows
40114031        }
40124032    }
4013-     for  (ggml_type type_dst  : {GGML_TYPE_F32} ) {
4014-         for  (ggml_type type_src  : all_types ) {
4033+     for  (ggml_type type_src  : all_types ) {
4034+         for  (ggml_type type_dst  : {GGML_TYPE_F32} ) {
40154035            test_cases.emplace_back (new  test_cpy (type_src, type_dst, {256 , 4 , 4 , 4 }));
40164036            test_cases.emplace_back (new  test_cpy (type_src, type_dst, {256 , 2 , 3 , 4 }, {0 , 2 , 1 , 3 })); //  cpy by rows
40174037        }
0 commit comments