Skip to content

Commit c3cf646

Browse files
authored
Fix bad optimized kernel for add.
Differential Revision: D80914321 Pull Request resolved: #13633
1 parent 732aff9 commit c3cf646

File tree

2 files changed

+13
-2
lines changed

2 files changed

+13
-2
lines changed

kernels/optimized/cpu/op_add_sub_impl.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,10 +116,9 @@ Tensor& opt_add_sub_out_impl(
116116

117117
if (selected_optimized_path == ElementwiseOptimizedPath::kTreatAs1d) {
118118
// Resize for dynamic shape
119-
auto error = resize_tensor(out, a.sizes());
120119
ET_KERNEL_CHECK_MSG(
121120
ctx,
122-
error == Error::Ok,
121+
resize_to_broadcast_target_size(a, b, out) == Error::Ok,
123122
InvalidArgument,
124123
out,
125124
"Failed to resize output tensor.");

kernels/test/op_add_test.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -591,6 +591,18 @@ TEST_F(OpAddOutKernelTest, BroadcastNDTest) {
591591
test_broadcast_last_dim<ScalarType::BFloat16>();
592592
}
593593

594+
TEST_F(OpAddOutKernelTest, BroadcastBToA) {
595+
TensorFactory<ScalarType::Float> tf_a;
596+
Tensor a = tf_a.make({1, 3}, /*data=*/{1, 2, 3});
597+
Tensor b = tf_a.make({1, 1, 3}, /*data=*/{3.2, 1.3, 5.5});
598+
// Destination for output of add.
599+
Tensor out = tf_a.zeros({1, 1, 3});
600+
601+
// Check that it matches the expected output.
602+
Tensor expected = tf_a.make({1, 1, 3}, /*data=*/{4.2, 3.3, 8.5});
603+
EXPECT_TENSOR_CLOSE(op_add_out(a, b, 1.0, out), expected);
604+
}
605+
594606
//
595607
// Death Tests
596608
//

0 commit comments

Comments
 (0)