Skip to content

Commit ecb639a

Browse files
authored
use dtype agnostic implementation for non optimized op_permute_copy
Differential Revision: D80280179 Pull Request resolved: #13438
1 parent 4915f9a commit ecb639a

File tree

4 files changed

+258
-18
lines changed

4 files changed

+258
-18
lines changed

backends/cadence/hifi/operators/op_permute_copy.cpp

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,6 @@ Tensor& permute_copy_out(
7070
out);
7171

7272
const auto in_type = out.scalar_type();
73-
74-
constexpr auto name = "permute_copy.out";
7573
constexpr int kNnlibMaxDim = 16;
7674

7775
bool optimized = false;
@@ -150,23 +148,27 @@ Tensor& permute_copy_out(
150148
size_t trailing_dims_memo[kTensorDimensionLimit];
151149
executorch::runtime::memoizeTrailingDims(in, trailing_dims_memo);
152150

153-
// in and out must be the same dtype
154-
ET_SWITCH_ALL_TYPES(in_type, ctx, name, CTYPE, [&] {
155-
const CTYPE* const in_data = in.const_data_ptr<CTYPE>();
156-
CTYPE* const out_data = out.mutable_data_ptr<CTYPE>();
151+
const char* const in_data = static_cast<const char*>(in.const_data_ptr());
152+
char* const out_data = static_cast<char*>(out.mutable_data_ptr());
153+
const size_t element_size = out.element_size();
157154

158-
for (size_t i = 0; i < out.numel(); ++i) {
159-
out_data[i] =
160-
in_data[executorch::runtime::coordinateToIndexWithTrailingDimsMemo(
161-
in, in_coord, trailing_dims_memo)];
162-
increment_coordinate_permuted(in, in_coord, dims);
163-
}
164-
});
155+
for (size_t i = 0; i < out.numel(); ++i) {
156+
const size_t in_index =
157+
executorch::runtime::coordinateToIndexWithTrailingDimsMemo(
158+
in, in_coord, trailing_dims_memo);
159+
160+
std::memcpy(
161+
out_data + i * element_size,
162+
in_data + in_index * element_size,
163+
element_size);
164+
165+
increment_coordinate_permuted(in, in_coord, dims);
166+
}
165167

166168
return out;
167169
}
168170

169171
} // namespace native
170172
} // namespace HiFi
171173
} // namespace impl
172-
} // namespace cadence
174+
} // namespace cadence

backends/cadence/hifi/operators/operators.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,12 @@ ::executorch::aten::Tensor& cat_out(
128128
int64_t dim,
129129
::executorch::aten::Tensor& out);
130130

131+
::executorch::aten::Tensor& permute_copy_out(
132+
::executorch::runtime::KernelRuntimeContext& ctx,
133+
const ::executorch::aten::Tensor& in,
134+
::executorch::aten::IntArrayRef dims,
135+
::executorch::aten::Tensor& out);
136+
131137
} // namespace native
132138
} // namespace HiFi
133139
} // namespace impl
Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,232 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#include <gtest/gtest.h>
10+
#include <sys/times.h>
11+
#include <xtensa/sim.h>
12+
13+
#include <executorch/kernels/test/TestUtil.h>
14+
#include <executorch/runtime/core/error.h>
15+
#include <executorch/runtime/core/exec_aten/exec_aten.h>
16+
#include <executorch/runtime/core/exec_aten/testing_util/tensor_factory.h>
17+
#include <executorch/runtime/core/exec_aten/testing_util/tensor_util.h>
18+
#include <executorch/runtime/platform/runtime.h>
19+
20+
#include <executorch/backends/cadence/hifi/operators/operators.h>
21+
22+
namespace cadence {
23+
namespace impl {
24+
namespace HiFi {
25+
namespace native {
26+
namespace {
27+
28+
using ::executorch::aten::IntArrayRef;
29+
using ::executorch::aten::ScalarType;
30+
using ::executorch::aten::Tensor;
31+
using ::executorch::aten::TensorImpl;
32+
using ::executorch::runtime::Error;
33+
using ::executorch::runtime::KernelRuntimeContext;
34+
using ::executorch::runtime::runtime_init;
35+
using ::executorch::runtime::testing::TensorFactory;
36+
37+
class HiFiPermuteCopyTest : public OperatorTest {
38+
public:
39+
protected:
40+
Tensor& permute_copy_out(const Tensor& in, IntArrayRef dims, Tensor& out) {
41+
return ::cadence::impl::HiFi::native::permute_copy_out(
42+
context_, in, dims, out);
43+
}
44+
};
45+
46+
TEST_F(HiFiPermuteCopyTest, FloatPermute2DTest) {
47+
TensorFactory<ScalarType::Float> tf;
48+
Tensor in = tf.make({2, 3}, {1.0, 2.0, 3.0, 4.0, 5.0, 6.0});
49+
Tensor expected = tf.make({3, 2}, {1.0, 4.0, 2.0, 5.0, 3.0, 6.0});
50+
51+
Tensor out = tf.zeros({3, 2});
52+
std::vector<int64_t> dims = {1, 0};
53+
54+
permute_copy_out(in, IntArrayRef(dims.data(), dims.size()), out);
55+
EXPECT_TENSOR_EQ(out, expected);
56+
}
57+
58+
TEST_F(HiFiPermuteCopyTest, IntPermute2DTest) {
59+
TensorFactory<ScalarType::Int> tf;
60+
Tensor in = tf.make({2, 3}, {1, 2, 3, 4, 5, 6});
61+
Tensor expected = tf.make({3, 2}, {1, 4, 2, 5, 3, 6});
62+
63+
Tensor out = tf.zeros({3, 2});
64+
std::vector<int64_t> dims = {1, 0};
65+
66+
permute_copy_out(in, IntArrayRef(dims.data(), dims.size()), out);
67+
EXPECT_TENSOR_EQ(out, expected);
68+
}
69+
70+
TEST_F(HiFiPermuteCopyTest, Int8Permute2DTest) {
71+
TensorFactory<ScalarType::Char> tf;
72+
Tensor in = tf.make({2, 3}, {1, 2, 3, 4, 5, 6});
73+
Tensor expected = tf.make({3, 2}, {1, 4, 2, 5, 3, 6});
74+
75+
Tensor out = tf.zeros({3, 2});
76+
std::vector<int64_t> dims = {1, 0};
77+
78+
permute_copy_out(in, IntArrayRef(dims.data(), dims.size()), out);
79+
EXPECT_TENSOR_EQ(out, expected);
80+
}
81+
82+
TEST_F(HiFiPermuteCopyTest, UInt8Permute2DTest) {
83+
TensorFactory<ScalarType::Byte> tf;
84+
Tensor in = tf.make({2, 3}, {1, 2, 3, 4, 5, 6});
85+
Tensor expected = tf.make({3, 2}, {1, 4, 2, 5, 3, 6});
86+
87+
Tensor out = tf.zeros({3, 2});
88+
std::vector<int64_t> dims = {1, 0};
89+
90+
permute_copy_out(in, IntArrayRef(dims.data(), dims.size()), out);
91+
EXPECT_TENSOR_EQ(out, expected);
92+
}
93+
94+
TEST_F(HiFiPermuteCopyTest, DoublePermute2DTest) {
95+
TensorFactory<ScalarType::Double> tf;
96+
Tensor in = tf.make({2, 3}, {1.0, 2.0, 3.0, 4.0, 5.0, 6.0});
97+
Tensor expected = tf.make({3, 2}, {1.0, 4.0, 2.0, 5.0, 3.0, 6.0});
98+
99+
Tensor out = tf.zeros({3, 2});
100+
std::vector<int64_t> dims = {1, 0};
101+
102+
permute_copy_out(in, IntArrayRef(dims.data(), dims.size()), out);
103+
EXPECT_TENSOR_EQ(out, expected);
104+
}
105+
106+
TEST_F(HiFiPermuteCopyTest, Long8Permute2DTest) {
107+
TensorFactory<ScalarType::Long> tf;
108+
Tensor in = tf.make({2, 3}, {1, 2, 3, 4, 5, 6});
109+
Tensor expected = tf.make({3, 2}, {1, 4, 2, 5, 3, 6});
110+
111+
Tensor out = tf.zeros({3, 2});
112+
std::vector<int64_t> dims = {1, 0};
113+
114+
permute_copy_out(in, IntArrayRef(dims.data(), dims.size()), out);
115+
EXPECT_TENSOR_EQ(out, expected);
116+
}
117+
118+
TEST_F(HiFiPermuteCopyTest, BoolPermute2DTest) {
119+
TensorFactory<ScalarType::Bool> tf;
120+
Tensor in = tf.make({2, 3}, {true, false, true, false, true, false});
121+
Tensor expected = tf.make({3, 2}, {true, false, false, true, true, false});
122+
123+
Tensor out = tf.zeros({3, 2});
124+
std::vector<int64_t> dims = {1, 0};
125+
126+
permute_copy_out(in, IntArrayRef(dims.data(), dims.size()), out);
127+
EXPECT_TENSOR_EQ(out, expected);
128+
}
129+
130+
TEST_F(HiFiPermuteCopyTest, Float3DPermuteTest) {
131+
TensorFactory<ScalarType::Float> tf;
132+
Tensor in = tf.make({2, 2, 2}, {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0});
133+
Tensor expected =
134+
tf.make({2, 2, 2}, {1.0, 3.0, 5.0, 7.0, 2.0, 4.0, 6.0, 8.0});
135+
136+
Tensor out = tf.zeros({2, 2, 2});
137+
std::vector<int64_t> dims = {2, 0, 1};
138+
139+
permute_copy_out(in, IntArrayRef(dims.data(), dims.size()), out);
140+
EXPECT_TENSOR_EQ(out, expected);
141+
}
142+
143+
TEST_F(HiFiPermuteCopyTest, Float4DPermuteTest) {
144+
TensorFactory<ScalarType::Float> tf;
145+
Tensor in = tf.make({1, 2, 2, 2}, {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0});
146+
Tensor expected =
147+
tf.make({2, 1, 2, 2}, {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0});
148+
149+
Tensor out = tf.zeros({2, 1, 2, 2});
150+
std::vector<int64_t> dims = {1, 0, 2, 3};
151+
152+
permute_copy_out(in, IntArrayRef(dims.data(), dims.size()), out);
153+
EXPECT_TENSOR_EQ(out, expected);
154+
}
155+
156+
TEST_F(HiFiPermuteCopyTest, IdentityPermuteTest) {
157+
TensorFactory<ScalarType::Float> tf;
158+
Tensor in = tf.make({2, 3}, {1.0, 2.0, 3.0, 4.0, 5.0, 6.0});
159+
Tensor expected = tf.make({2, 3}, {1.0, 2.0, 3.0, 4.0, 5.0, 6.0});
160+
161+
Tensor out = tf.zeros({2, 3});
162+
std::vector<int64_t> dims = {0, 1};
163+
164+
permute_copy_out(in, IntArrayRef(dims.data(), dims.size()), out);
165+
EXPECT_TENSOR_EQ(out, expected);
166+
}
167+
168+
TEST_F(HiFiPermuteCopyTest, LargeTensorPermuteTest) {
169+
TensorFactory<ScalarType::Float> tf;
170+
std::vector<float> input_data;
171+
for (int i = 0; i < 60; ++i) {
172+
input_data.push_back(static_cast<float>(i + 1));
173+
}
174+
Tensor in = tf.make({3, 4, 5}, input_data);
175+
176+
// Permute: [3, 4, 5] -> [5, 3, 4] with dims [2, 0, 1]
177+
std::vector<float> expected_data(60);
178+
for (int i = 0; i < 3; ++i) {
179+
for (int j = 0; j < 4; ++j) {
180+
for (int k = 0; k < 5; ++k) {
181+
int old_idx = i * 20 + j * 5 + k;
182+
int new_idx = k * 12 + i * 4 + j;
183+
expected_data[new_idx] = static_cast<float>(old_idx + 1);
184+
}
185+
}
186+
}
187+
188+
Tensor expected = tf.make({5, 3, 4}, expected_data);
189+
Tensor out = tf.zeros({5, 3, 4});
190+
std::vector<int64_t> dims = {2, 0, 1};
191+
192+
permute_copy_out(in, IntArrayRef(dims.data(), dims.size()), out);
193+
EXPECT_TENSOR_EQ(out, expected);
194+
}
195+
196+
TEST_F(HiFiPermuteCopyTest, HighDimPermuteTest) {
197+
TensorFactory<ScalarType::Double> tf;
198+
std::vector<int32_t> shape = {2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2};
199+
std::vector<double> input_data = {1.0, 2.0, 3.0, 4.0};
200+
Tensor in = tf.make(shape, input_data);
201+
202+
// Simple transpose: swap first and last dimension
203+
std::vector<int64_t> dims(16);
204+
for (int i = 0; i < 16; ++i) {
205+
dims[i] = i;
206+
}
207+
std::swap(dims[0], dims[15]);
208+
Tensor out = tf.zeros(shape);
209+
210+
permute_copy_out(in, IntArrayRef(dims.data(), dims.size()), out);
211+
EXPECT_DOUBLE_EQ(out.const_data_ptr<double>()[0], 1.0);
212+
EXPECT_DOUBLE_EQ(out.const_data_ptr<double>()[1], 3.0);
213+
EXPECT_DOUBLE_EQ(out.const_data_ptr<double>()[2], 2.0);
214+
EXPECT_DOUBLE_EQ(out.const_data_ptr<double>()[3], 4.0);
215+
}
216+
217+
TEST_F(HiFiPermuteCopyTest, MixedDataTypesTest) {
218+
TensorFactory<ScalarType::Short> tf_short;
219+
Tensor in_short = tf_short.make({2, 2}, {1, 2, 3, 4});
220+
Tensor expected_short = tf_short.make({2, 2}, {1, 3, 2, 4});
221+
Tensor out_short = tf_short.zeros({2, 2});
222+
std::vector<int64_t> dims = {1, 0};
223+
224+
permute_copy_out(in_short, IntArrayRef(dims.data(), dims.size()), out_short);
225+
EXPECT_TENSOR_EQ(out_short, expected_short);
226+
}
227+
228+
} // namespace
229+
} // namespace native
230+
} // namespace HiFi
231+
} // namespace impl
232+
} // namespace cadence

backends/cadence/hifi/operators/tests/test_op_quantize_per_tensor.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,8 @@ TEST_F(HiFiQuantizePerTensorTest, CheckSingleElementIntQuantize) {
118118
constexpr int64_t kQuantMin = std::numeric_limits<int32_t>::min();
119119
constexpr int64_t kQuantMax = std::numeric_limits<int32_t>::max();
120120
constexpr float kInputValue = 100.0f;
121-
constexpr int32_t kExpectedOutputValue =
122-
static_cast<int32_t>(kInputValue / kScale + kZeroPoint);
121+
constexpr int32_t kExpectedOutputValue = static_cast<int32_t>(
122+
static_cast<double>(kInputValue) / kScale + kZeroPoint);
123123

124124
quantize_per_tensor_out(
125125
tf.make(sizes, {kInputValue}),
@@ -144,8 +144,8 @@ TEST_F(HiFiQuantizePerTensorTest, CheckSingleElementUInt16Quantize) {
144144
constexpr int64_t kQuantMin = std::numeric_limits<uint16_t>::min();
145145
constexpr int64_t kQuantMax = std::numeric_limits<uint16_t>::max();
146146
constexpr float kInputValue = 100.0f;
147-
constexpr uint16_t kExpectedOutputValue =
148-
static_cast<uint16_t>(kInputValue / kScale + kZeroPoint);
147+
constexpr uint16_t kExpectedOutputValue = static_cast<uint16_t>(
148+
static_cast<double>(kInputValue) / kScale + kZeroPoint);
149149

150150
quantize_per_tensor_out(
151151
tf.make(sizes, {kInputValue}),

0 commit comments

Comments
 (0)