@@ -625,6 +625,95 @@ TEST_CASE("Test tensor operation with first touch: unary (zero, relu, copy) & ma
625625 * =================================================================================================
626626 */
627627
628+ TEST_CASE (" Test tensor operation with outer loop with main kernel: unary (zero, relu, copy)" , " [tensor_operation][unary][correctness]" )
629+ {
630+ using namespace mini_jit ;
631+
632+ auto type = GENERATE (TensorOperation::prim_t ::zero, TensorOperation::prim_t ::relu, TensorOperation::prim_t ::copy);
633+
634+ CAPTURE (type);
635+
636+ constexpr TensorOperation::dim_t dim_types[]{TensorOperation::dim_t ::n, TensorOperation::dim_t ::k, TensorOperation::dim_t ::c,
637+ TensorOperation::dim_t ::m, TensorOperation::dim_t ::k, TensorOperation::dim_t ::m,
638+ TensorOperation::dim_t ::m, TensorOperation::dim_t ::n};
639+ constexpr TensorOperation::exec_t exec_types[]{TensorOperation::exec_t ::seq, TensorOperation::exec_t ::seq, TensorOperation::exec_t ::seq,
640+ TensorOperation::exec_t ::seq, TensorOperation::exec_t ::seq, TensorOperation::exec_t ::seq,
641+ TensorOperation::exec_t ::prim, TensorOperation::exec_t ::prim};
642+ constexpr int64_t dim_sizes[]{2 , 3 , 5 , 8 , 13 , 21 , 16 , 16 };
643+ constexpr int64_t strides_in0[]{16 * 16 * 1 * 13 * 8 * 1 * 3 ,
644+ 0 , // k-dim
645+ 16 * 16 * 1 * 13 * 8 ,
646+ 16 * 16 * 1 * 13 ,
647+ 0 , // k-dim
648+ 16 * 16 ,
649+ 1 ,
650+ 16 };
651+ constexpr int64_t strides_in1[]{0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 };
652+ constexpr int64_t strides_out[]{16 * 16 * 1 * 13 * 8 * 1 * 3 ,
653+ 0 , // k-dim
654+ 16 * 16 * 1 * 13 * 8 ,
655+ 16 * 16 * 1 * 13 ,
656+ 0 , // k-dim
657+ 16 * 16 ,
658+ 1 ,
659+ 16 };
660+
661+ GenerationTest test (16 , 16 , 16 , 1 , 16 * 16 * 21 * 13 * 8 * 5 * 3 * 2 , 0 , 16 * 16 * 21 * 13 * 8 * 5 * 3 * 2 );
662+ test.SetUp (TestInfill::Random);
663+
664+ mini_jit::TensorOperation tensor_op;
665+ TensorOperation::error_t err = tensor_op.setup (
666+ TensorOperation::dtype_t ::fp32, TensorOperation::prim_t ::none, type, TensorOperation::prim_t ::none, std::span{dim_types},
667+ std::span{exec_types}, std::span{dim_sizes}, std::span{strides_in0}, std::span{strides_in1}, std::span{strides_out});
668+
669+ REQUIRE (err == TensorOperation::error_t ::success);
670+
671+ tensor_op.execute (test.matrix_a .data (), nullptr , test.matrix_c .data ());
672+
673+ UnaryType test_type = UnaryType::None;
674+ switch (type)
675+ {
676+ case TensorOperation::prim_t ::zero:
677+ test_type = UnaryType::Zero;
678+ break ;
679+ case TensorOperation::prim_t ::copy:
680+ test_type = UnaryType::Identity;
681+ break ;
682+ case TensorOperation::prim_t ::relu:
683+ test_type = UnaryType::ReLu;
684+ break ;
685+ default :
686+ FAIL (" Could not parse the unary type!" );
687+ break ;
688+ }
689+
690+ for (size_t i0 = 0 ; i0 < dim_sizes[0 ]; i0++)
691+ {
692+ for (size_t i1 = 0 ; i1 < dim_sizes[1 ]; i1++)
693+ {
694+ for (size_t i2 = 0 ; i2 < dim_sizes[2 ]; i2++)
695+ {
696+ for (size_t i3 = 0 ; i3 < dim_sizes[3 ]; i3++)
697+ {
698+ for (size_t i4 = 0 ; i4 < dim_sizes[4 ]; i4++)
699+ {
700+ for (size_t i5 = 0 ; i5 < dim_sizes[5 ]; i5++)
701+ {
702+ uint64_t offset_a = i0 * strides_in0[0 ] + i1 * strides_in0[1 ] + i2 * strides_in0[2 ] + i3 * strides_in0[3 ] +
703+ i4 * strides_in0[4 ] + i5 * strides_in0[5 ];
704+ uint64_t offset_c = i0 * strides_out[0 ] + i1 * strides_out[1 ] + i2 * strides_out[2 ] + i3 * strides_out[3 ] +
705+ i4 * strides_out[4 ] + i5 * strides_out[5 ];
706+ test.naive_unary_M_N (test.matrix_a .data () + offset_a, test.matrix_c_verify .data () + offset_c, 16 , 16 , false , test_type);
707+ }
708+ }
709+ }
710+ }
711+ }
712+ }
713+
714+ test.verify_matmul (test.matrix_c_verify .data (), test.matrix_c .data (), test.matrix_c .size ());
715+ }
716+
628717TEST_CASE (" Test tensor operation with outer loop with main kernel: gemm" , " [tensor_operation][gemm][correctness]" )
629718{
630719 using namespace mini_jit ;
@@ -1187,6 +1276,96 @@ TEST_CASE("Test tensor operation with outer loop with first touch: unary (zero,
11871276 * #################################################################################################
11881277 */
11891278
1279+ TEST_CASE (" Test parallel tensor operation with outer loop with main kernel: unary (zero, relu, copy)" ,
1280+ " [tensor_operation][unary][correctness]" )
1281+ {
1282+ using namespace mini_jit ;
1283+
1284+ auto type = GENERATE (TensorOperation::prim_t ::zero, TensorOperation::prim_t ::relu, TensorOperation::prim_t ::copy);
1285+
1286+ CAPTURE (type);
1287+
1288+ constexpr TensorOperation::dim_t dim_types[]{TensorOperation::dim_t ::n, TensorOperation::dim_t ::m, TensorOperation::dim_t ::c,
1289+ TensorOperation::dim_t ::m, TensorOperation::dim_t ::k, TensorOperation::dim_t ::m,
1290+ TensorOperation::dim_t ::m, TensorOperation::dim_t ::n};
1291+ constexpr TensorOperation::exec_t exec_types[]{
1292+ TensorOperation::exec_t ::shared, TensorOperation::exec_t ::shared, TensorOperation::exec_t ::shared, TensorOperation::exec_t ::seq,
1293+ TensorOperation::exec_t ::seq, TensorOperation::exec_t ::seq, TensorOperation::exec_t ::prim, TensorOperation::exec_t ::prim};
1294+ constexpr int64_t dim_sizes[]{2 , 3 , 5 , 8 , 13 , 21 , 16 , 16 };
1295+ constexpr int64_t strides_in0[]{16 * 16 * 1 * 13 * 8 * 1 * 3 ,
1296+ 16 * 16 * 1 * 13 * 8 * 1 , // m-dim
1297+ 16 * 16 * 1 * 13 * 8 ,
1298+ 16 * 16 * 1 * 13 ,
1299+ 0 , // k-dim
1300+ 16 * 16 ,
1301+ 1 ,
1302+ 16 };
1303+ constexpr int64_t strides_in1[]{0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 };
1304+ constexpr int64_t strides_out[]{16 * 16 * 1 * 13 * 8 * 1 * 3 ,
1305+ 16 * 16 * 1 * 13 * 8 * 1 , // m-dim
1306+ 16 * 16 * 1 * 13 * 8 ,
1307+ 16 * 16 * 1 * 13 ,
1308+ 0 , // k-dim
1309+ 16 * 16 ,
1310+ 1 ,
1311+ 16 };
1312+
1313+ GenerationTest test (16 , 16 , 16 , 1 , 16 * 16 * 21 * 13 * 8 * 5 * 3 * 2 , 0 , 16 * 16 * 21 * 13 * 8 * 5 * 3 * 2 );
1314+ test.SetUp (TestInfill::Random);
1315+
1316+ mini_jit::TensorOperation tensor_op;
1317+ TensorOperation::error_t err = tensor_op.setup (
1318+ TensorOperation::dtype_t ::fp32, TensorOperation::prim_t ::none, type, TensorOperation::prim_t ::none, std::span{dim_types},
1319+ std::span{exec_types}, std::span{dim_sizes}, std::span{strides_in0}, std::span{strides_in1}, std::span{strides_out});
1320+
1321+ REQUIRE (err == TensorOperation::error_t ::success);
1322+
1323+ tensor_op.execute (test.matrix_a .data (), nullptr , test.matrix_c .data ());
1324+
1325+ UnaryType test_type = UnaryType::None;
1326+ switch (type)
1327+ {
1328+ case TensorOperation::prim_t ::zero:
1329+ test_type = UnaryType::Zero;
1330+ break ;
1331+ case TensorOperation::prim_t ::copy:
1332+ test_type = UnaryType::Identity;
1333+ break ;
1334+ case TensorOperation::prim_t ::relu:
1335+ test_type = UnaryType::ReLu;
1336+ break ;
1337+ default :
1338+ FAIL (" Could not parse the unary type!" );
1339+ break ;
1340+ }
1341+
1342+ for (size_t i0 = 0 ; i0 < dim_sizes[0 ]; i0++)
1343+ {
1344+ for (size_t i1 = 0 ; i1 < dim_sizes[1 ]; i1++)
1345+ {
1346+ for (size_t i2 = 0 ; i2 < dim_sizes[2 ]; i2++)
1347+ {
1348+ for (size_t i3 = 0 ; i3 < dim_sizes[3 ]; i3++)
1349+ {
1350+ for (size_t i4 = 0 ; i4 < dim_sizes[4 ]; i4++)
1351+ {
1352+ for (size_t i5 = 0 ; i5 < dim_sizes[5 ]; i5++)
1353+ {
1354+ uint64_t offset_a = i0 * strides_in0[0 ] + i1 * strides_in0[1 ] + i2 * strides_in0[2 ] + i3 * strides_in0[3 ] +
1355+ i4 * strides_in0[4 ] + i5 * strides_in0[5 ];
1356+ uint64_t offset_c = i0 * strides_out[0 ] + i1 * strides_out[1 ] + i2 * strides_out[2 ] + i3 * strides_out[3 ] +
1357+ i4 * strides_out[4 ] + i5 * strides_out[5 ];
1358+ test.naive_unary_M_N (test.matrix_a .data () + offset_a, test.matrix_c_verify .data () + offset_c, 16 , 16 , false , test_type);
1359+ }
1360+ }
1361+ }
1362+ }
1363+ }
1364+ }
1365+
1366+ test.verify_matmul (test.matrix_c_verify .data (), test.matrix_c .data (), test.matrix_c .size ());
1367+ }
1368+
11901369TEST_CASE (" Test parallel tensor operation with outer loop with main kernel: gemm" , " [tensor_operation][gemm][correctness]" )
11911370{
11921371 using namespace mini_jit ;
@@ -1606,9 +1785,9 @@ TEST_CASE(
16061785 TensorOperation::dim_t ::m, TensorOperation::dim_t ::k, TensorOperation::dim_t ::m, TensorOperation::dim_t ::n, TensorOperation::dim_t ::k};
16071786
16081787 constexpr TensorOperation::exec_t exec_types[]{
1609- TensorOperation::exec_t ::seq, TensorOperation::exec_t ::seq , TensorOperation::exec_t ::seq, TensorOperation::exec_t ::seq,
1610- TensorOperation::exec_t ::seq, TensorOperation::exec_t ::seq, TensorOperation::exec_t ::prim, TensorOperation::exec_t ::prim,
1611- TensorOperation::exec_t ::prim, TensorOperation::exec_t ::prim};
1788+ TensorOperation::exec_t ::shared, TensorOperation::exec_t ::shared , TensorOperation::exec_t ::shared, TensorOperation::exec_t ::seq,
1789+ TensorOperation::exec_t ::seq, TensorOperation::exec_t ::seq, TensorOperation::exec_t ::prim, TensorOperation::exec_t ::prim,
1790+ TensorOperation::exec_t ::prim, TensorOperation::exec_t ::prim};
16121791
16131792 constexpr int64_t dim_sizes[]{2 , 3 , 5 , 8 , 13 , 21 , 3 , 16 , 16 , 16 };
16141793 constexpr int64_t strides_in0[]{0 , // n-dim
0 commit comments