Skip to content

Commit 978bfca

Browse files
authored
Fix CPU EP Tile 0D overvalidation (microsoft#25821)
### Description Fixes microsoft#11523. Scalars should just be nops for tile. Simply removing the overvalidation lets the case work. ### Motivation and Context Conformance with expectation.
1 parent 88b63d0 commit 978bfca

File tree

2 files changed

+16
-8
lines changed

2 files changed

+16
-8
lines changed

onnxruntime/core/providers/cpu/tensor/tile.cc

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -185,8 +185,6 @@ Status Tile::Compute(OpKernelContext* ctx) const {
185185
tensor_pointer = ctx->Input<Tensor>(1);
186186
if (tensor_pointer == nullptr) return Status(common::ONNXRUNTIME, common::FAIL, "Input count of Tile OP mismatch, the second one is empty");
187187
const Tensor& repeats_tensor = *tensor_pointer;
188-
if (input_rank < 1)
189-
return Status(ONNXRUNTIME, INVALID_ARGUMENT, "the tensor to be tiled using Tile OP must be atleast 1 dimensional");
190188
if (repeats_tensor.Shape().NumDimensions() != 1)
191189
return Status(ONNXRUNTIME, INVALID_ARGUMENT, "'repeat' input tensor must be 1 dimensional");
192190
if (size_t(repeats_tensor.Shape().Size()) != input_rank)

onnxruntime/test/providers/cpu/tensor/tile_op_test.cc

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53,12 +53,14 @@ void RunTest(const std::vector<int64_t>& input_dims,
5353
std::vector<T> output_data(output_size);
5454
std::vector<int64_t> input_strides(rank);
5555
std::vector<int64_t> output_strides(rank);
56-
input_strides[rank - 1] = output_strides[rank - 1] = 1;
57-
if (rank > 1) {
58-
for (size_t i = rank - 2;; --i) {
59-
input_strides[i] = input_dims[i + 1] * input_strides[i + 1];
60-
output_strides[i] = output_dims[i + 1] * output_strides[i + 1];
61-
if (i == 0) break;
56+
if (rank >= 1) {
57+
input_strides[rank - 1] = output_strides[rank - 1] = 1;
58+
if (rank > 1) {
59+
for (size_t i = rank - 2;; --i) {
60+
input_strides[i] = input_dims[i + 1] * input_strides[i + 1];
61+
output_strides[i] = output_dims[i + 1] * output_strides[i + 1];
62+
if (i == 0) break;
63+
}
6264
}
6365
}
6466
for (size_t i = 0; i < output_size; ++i) {
@@ -142,6 +144,14 @@ void RunTestWrapper() {
142144
RunTest<T>({2, 1, 3}, {2, 2, 1});
143145
RunTest<T>({2, 1, 3}, {2, 2, 1}, true);
144146

147+
// The WebGPU EP is not currently prepared for this possibility:
148+
// onnxruntime/core/providers/webgpu/program.cc:46
149+
// ProgramUniformVariableValue(...) length > 0 was false. number of element of uniform variable must be greater than 0.
150+
#if !defined(USE_WEBGPU)
151+
// Tile0D (nop)
152+
RunTest<T>({}, {});
153+
#endif
154+
145155
#if defined(USE_CUDA) || defined(USE_ROCM) || defined(USE_WEBGPU)
146156
// _TileMemcpyKernelFromInput, vectorized 4
147157
RunTest<T>({256, 512}, {3, 1});

0 commit comments

Comments
 (0)