@@ -9,13 +9,13 @@ namespace f = torch::nn::functional;
99namespace pmpp ::test::ops
1010{
1111
12- TEST_F (OpTest, VecAdd )
12+ TEST_F (OpTest, VecAddv0 )
1313{
1414
1515 const YAML::Node& configs = getConfigs ()[" OpTest" ][" VecAdd" ];
1616
1717 static auto custom_op = torch::Dispatcher::singleton ()
18- .findSchemaOrThrow (" pmpp::vector_add " , " " )
18+ .findSchemaOrThrow (" pmpp::vector_add_v0 " , " " )
1919 .typed <torch::Tensor (const torch::Tensor&,
2020 const torch::Tensor&)>();
2121
@@ -36,7 +36,76 @@ TEST_F(OpTest, VecAdd)
3636 f::cosine_similarity (matCh.flatten (), matCd2h.flatten (),
3737 f::CosineSimilarityFuncOptions ().dim (0 ));
3838
39+ EXPECT_TRUE (matCh.allclose (matCd2h));
3940 EXPECT_GE (cosSim.item <fp32_t >(), 0.99 );
4041 }
4142}
43+
44+ TEST_F (OpTest, VecAddv1)
45+ {
46+
47+ const YAML::Node& configs = getConfigs ()[" OpTest" ][" VecAdd" ];
48+
49+ static auto custom_op = torch::Dispatcher::singleton ()
50+ .findSchemaOrThrow (" pmpp::vector_add_v1" , " " )
51+ .typed <torch::Tensor (const torch::Tensor&,
52+ const torch::Tensor&)>();
53+
54+ for (const auto & cfg : configs) {
55+
56+ auto nElems = cfg[" nElems" ].as <pmpp::size_t >();
57+
58+ torch::Tensor matAh = torch::rand (nElems, torch::kF32 );
59+ torch::Tensor matBh = torch::rand (nElems, torch::kF32 );
60+ torch::Tensor matCh = custom_op.call (matAh, matBh);
61+
62+ ASSERT_TRUE (torch::cuda::is_available ());
63+ torch::Tensor matAd = matAh.to (torch::kCUDA );
64+ torch::Tensor matBd = matBh.to (matAd.device ());
65+ torch::Tensor matCd2h = custom_op.call (matAd, matBd).to (torch::kCPU );
66+
67+ Tensor cosSim =
68+ f::cosine_similarity (matCh.flatten (), matCd2h.flatten (),
69+ f::CosineSimilarityFuncOptions ().dim (0 ));
70+
71+ EXPECT_TRUE (matCh.allclose (matCd2h));
72+ EXPECT_GE (cosSim.item <fp32_t >(), 0.99 );
73+ }
74+ }
75+
76+ TEST_F (OpTest, VecAddv2)
77+ {
78+
79+ const YAML::Node& configs = getConfigs ()[" OpTest" ][" VecAdd" ];
80+
81+ static auto custom_op = torch::Dispatcher::singleton ()
82+ .findSchemaOrThrow (" pmpp::vector_add_v2" , " " )
83+ .typed <torch::Tensor (const torch::Tensor&,
84+ const torch::Tensor&)>();
85+
86+ for (const auto & cfg : configs) {
87+
88+ auto nElems = cfg[" nElems" ].as <pmpp::size_t >();
89+
90+ torch::Tensor matAh = torch::rand (nElems, torch::kF32 );
91+ torch::Tensor matBh = torch::rand (nElems, torch::kF32 );
92+ torch::Tensor matCh = custom_op.call (matAh, matBh);
93+
94+ ASSERT_TRUE (torch::cuda::is_available ());
95+ torch::Tensor matAd = matAh.to (torch::kCUDA );
96+ torch::Tensor matBd = matBh.to (matAd.device ());
97+ torch::Tensor matCd2h = custom_op.call (matAd, matBd).to (torch::kCPU );
98+
99+ Tensor cosSim =
100+ f::cosine_similarity (matCh.flatten (), matCd2h.flatten (),
101+ f::CosineSimilarityFuncOptions ().dim (0 ));
102+
103+ std::cout << std::format (" nElems: {}, cosSim: {}\n " , nElems,
104+ cosSim.item <fp32_t >());
105+
106+ // // [NOTE] This won't pass because the kernel is deliberately wrong
107+ // EXPECT_TRUE(matCh.allclose(matCd2h));
108+ // EXPECT_GE(cosSim.item<fp32_t>(), 0.99);
109+ }
110+ }
42111} // namespace pmpp::test::ops
0 commit comments