Skip to content

Commit 197b6ea

Browse files
committed
revert: "fix: Fill the padding area with zeros in CpuIm2ColKernel"
This reverts commit 179518a. Reason for revert: Failing tests Change-Id: I49eb0afb8fa6b5d1d7c720900209ff21e5e6b27a Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/14423 Reviewed-by: Gunes Bayir <[email protected]> Tested-by: Gunes Bayir <[email protected]>
1 parent 179518a commit 197b6ea

File tree

8 files changed

+168
-439
lines changed

8 files changed

+168
-439
lines changed

arm_compute/core/utils/misc/ShapeCalculator.h

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -575,14 +575,14 @@ inline TensorShape compute_deconvolution_output_shape(const std::pair<unsigned i
575575

576576
/** Calculate the im2col output shape of a tensor
577577
*
578-
* @param[in] input Input tensor info
579-
* @param[in] kernel_dims The kernel dimensions (width and height).
580-
* @param[in] conv_info Contains padding and stride information
581-
* @param[in] has_bias In case biases are provided expands the matrix with 1
582-
* @param[in] dilation Dilation, in elements, across x and y
583-
* @param[in] batch_size_on_z True if batch size is on z axis
584-
* @param[in] num_groups (Optional) Number of groups when performing a grouped convolution
585-
* @param[in] channel_pad_right (Optional) Amount of padding applied to the channel dimension
578+
* @param[in] input Input tensor info
579+
* @param[in] kernel_dims The kernel dimensions (width and height).
580+
* @param[in] conv_info Contains padding and stride information
581+
* @param[in] has_bias In case biases are provided expands the matrix with 1
582+
* @param[in] dilation Dilation, in elements, across x and y
583+
* @param[in] batch_size_on_z True if batch size is on z axis
584+
* @param[in] num_groups (Optional) Number of groups when performing a grouped convolution
585+
* @param[in] input_pad_right (Optional) When fast-math is selected, per element padding for the im2col matrix may be necessary
586586
*
587587
* @return the calculated shape
588588
*/
@@ -592,8 +592,8 @@ inline TensorShape compute_im2col_conv_shape(const ITensorInfo *input,
592592
bool has_bias,
593593
const Size2D &dilation,
594594
bool batch_size_on_z,
595-
unsigned int num_groups = 1,
596-
unsigned int channel_pad_right = 0)
595+
unsigned int num_groups = 1,
596+
unsigned int input_pad_right = 0)
597597
{
598598
// The output shape will be the 3D shape [ out_channels * kernel_area, num_elems_per_out_channel, batches ] if batch_size_on_z == true
599599
// or the 4D shape [ out_channels * kernel_area / num_groups, num_elems_per_out_channel, num_groups, batches ] if batch_size_on_z == false
@@ -611,7 +611,7 @@ inline TensorShape compute_im2col_conv_shape(const ITensorInfo *input,
611611

612612
std::pair<unsigned int, unsigned int> out_dims = scaled_dimensions(
613613
output_shape[width_idx], output_shape[height_idx], kernel_dims.width, kernel_dims.height, conv_info, dilation);
614-
output_shape.set(0, ((output_shape[channel_idx] + channel_pad_right) / num_groups * kernel_dims.area() +
614+
output_shape.set(0, ((output_shape[channel_idx] + input_pad_right) / num_groups * kernel_dims.area() +
615615
(has_bias ? 1 : 0))); // NOLINT
616616
output_shape.set(1, (out_dims.first * out_dims.second));
617617
if (batch_size_on_z && output_shape.num_dimensions() >= 3)

src/cpu/kernels/CpuIm2ColKernel.cpp

Lines changed: 48 additions & 49 deletions
Large diffs are not rendered by default.

src/cpu/kernels/CpuIm2ColKernel.h

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017-2023, 2025 Arm Limited.
2+
* Copyright (c) 2017-2023 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -67,26 +67,26 @@ class CpuIm2ColKernel : public ICpuKernel<CpuIm2ColKernel>
6767
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuIm2ColKernel);
6868
/** Set the input and output of the kernel.
6969
*
70-
* @param[in] src The input tensor info to convert. 3 lower dimensions represent a single input [width, height, IFM],
71-
* while every optional dimension from 4 and above represent a batch of inputs.
72-
* Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32
73-
* Note: QASYMM8/QASYMM8_SIGNED works only for has_bias = false
74-
* @param[out] dst The output tensor info. Data types supported: Same as @p input
75-
* @param[in] kernel_dims The kernel dimensions (width and height).
76-
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
77-
* @param[in] has_bias In case biases are provided expands the matrix with 1.
78-
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
79-
* @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported
80-
* @param[in] channel_pad_right (Optional) Amount of padding applied to the channel dimension.
70+
* @param[in] src The input tensor info to convert. 3 lower dimensions represent a single input [width, height, IFM],
71+
* while every optional dimension from 4 and above represent a batch of inputs.
72+
* Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32
73+
* Note: QASYMM8/QASYMM8_SIGNED works only for has_bias = false
74+
* @param[out] dst The output tensor info. Data types supported: Same as @p input
75+
* @param[in] kernel_dims The kernel dimensions (width and height).
76+
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
77+
* @param[in] has_bias In case biases are provided expands the matrix with 1.
78+
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
79+
* @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported
80+
* @param[in] input_pad_right (Optional) When fast-math is selected, per element padding for the im2col matrix may be necessary
8181
*/
8282
void configure(const ITensorInfo *src,
8383
ITensorInfo *dst,
8484
const Size2D &kernel_dims,
8585
const PadStrideInfo &conv_info,
8686
bool has_bias,
87-
const Size2D &dilation = Size2D(1U, 1U),
88-
unsigned int num_groups = 1,
89-
unsigned int channel_pad_right = 0);
87+
const Size2D &dilation = Size2D(1U, 1U),
88+
unsigned int num_groups = 1,
89+
unsigned int input_pad_right = 0);
9090
/** Static function to check if given info will lead to a valid configuration
9191
*
9292
* Similar to CpuIm2ColKernel::configure()
@@ -98,9 +98,9 @@ class CpuIm2ColKernel : public ICpuKernel<CpuIm2ColKernel>
9898
const Size2D &kernel_dims,
9999
const PadStrideInfo &conv_info,
100100
bool has_bias,
101-
const Size2D &dilation = Size2D(1U, 1U),
102-
unsigned int num_groups = 1,
103-
unsigned int channel_pad_right = 0);
101+
const Size2D &dilation = Size2D(1U, 1U),
102+
unsigned int num_groups = 1,
103+
unsigned int input_pad_right = 0);
104104

105105
// Inherited methods overridden:
106106
void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
@@ -127,15 +127,15 @@ class CpuIm2ColKernel : public ICpuKernel<CpuIm2ColKernel>
127127
std::pair<unsigned int, unsigned int> convolved_dims,
128128
const Size2D &kernel_dims,
129129
const Size2D &dilation,
130-
uint32_t channel_pad_right,
130+
uint32_t input_pad_right,
131131
bool has_bias);
132132

133133
Im2ColFunctionPtr _func{nullptr};
134134
std::pair<unsigned int, unsigned int> _convolved_dims{};
135135
PadStrideInfo _conv_info{};
136136
unsigned int _kernel_width{0};
137137
unsigned int _kernel_height{0};
138-
unsigned int _channel_pad_right{0};
138+
unsigned int _input_pad_right{0};
139139
bool _has_bias{false};
140140
Size2D _dilation{1U, 1U};
141141
DataLayout _data_layout{DataLayout::UNKNOWN};

src/cpu/kernels/directconv2d/impl.h

Lines changed: 15 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2022-2023, 2025 Arm Limited.
2+
* Copyright (c) 2022-2023 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -61,13 +61,13 @@ void linearize_volume_nchw(const uint8_t *const in_ptr,
6161
int dilation_x,
6262
int dilation_y)
6363
{
64-
const int kernel_area = kernel_width * kernel_height;
65-
const int x_e = top_left_x + kernel_width * dilation_x;
66-
const int y_e = top_left_y + kernel_height * dilation_y;
64+
const int kernel_size2 = kernel_width * kernel_height;
65+
const int x_e = top_left_x + kernel_width * dilation_x;
66+
const int y_e = top_left_y + kernel_height * dilation_y;
6767

6868
// Linearize volume
6969
int d = 0;
70-
// This for loop linearizes a volume with 3 slices. This allows:
70+
// This for loop linearize a volume with 3 slices. This allows:
7171
// 1) to reduce the iterations of the outer for loop "d"
7272
// 2) to have an optimized im2col for the first convolution layer where usually we have 3 IFMs
7373
for (; d <= (kernel_depth - 3); d += 3)
@@ -79,9 +79,9 @@ void linearize_volume_nchw(const uint8_t *const in_ptr,
7979
// All the values will be the offset (will be zeros when not quantized)
8080
for (int x = top_left_x; x < x_e; x += dilation_x, ++out_ptr)
8181
{
82-
*(out_ptr + 0 * kernel_area) = pad_value;
83-
*(out_ptr + 1 * kernel_area) = pad_value;
84-
*(out_ptr + 2 * kernel_area) = pad_value;
82+
*(out_ptr + 0 * kernel_size2) = pad_value;
83+
*(out_ptr + 1 * kernel_size2) = pad_value;
84+
*(out_ptr + 2 * kernel_size2) = pad_value;
8585
}
8686
}
8787
else
@@ -90,23 +90,23 @@ void linearize_volume_nchw(const uint8_t *const in_ptr,
9090
{
9191
if ((x < 0 || x >= input_w) && has_pads)
9292
{
93-
*(out_ptr + 0 * kernel_area) = pad_value;
94-
*(out_ptr + 1 * kernel_area) = pad_value;
95-
*(out_ptr + 2 * kernel_area) = pad_value;
93+
*(out_ptr + 0 * kernel_size2) = pad_value;
94+
*(out_ptr + 1 * kernel_size2) = pad_value;
95+
*(out_ptr + 2 * kernel_size2) = pad_value;
9696
}
9797
else
9898
{
99-
*(out_ptr + 0 * kernel_area) = *(reinterpret_cast<const T *>(
99+
*(out_ptr + 0 * kernel_size2) = *(reinterpret_cast<const T *>(
100100
in_ptr + ((d + 0) * input_stride_z + y * input_stride_y + x * input_stride_x)));
101-
*(out_ptr + 1 * kernel_area) = *(reinterpret_cast<const T *>(
101+
*(out_ptr + 1 * kernel_size2) = *(reinterpret_cast<const T *>(
102102
in_ptr + ((d + 1) * input_stride_z + y * input_stride_y + x * input_stride_x)));
103-
*(out_ptr + 2 * kernel_area) = *(reinterpret_cast<const T *>(
103+
*(out_ptr + 2 * kernel_size2) = *(reinterpret_cast<const T *>(
104104
in_ptr + ((d + 2) * input_stride_z + y * input_stride_y + x * input_stride_x)));
105105
}
106106
}
107107
}
108108
}
109-
out_ptr += 2 * kernel_area;
109+
out_ptr += 2 * kernel_size2;
110110
}
111111

112112
// Left over
@@ -252,7 +252,6 @@ void linearize_volume_nhwc(const uint8_t *const in_ptr,
252252
for (int e = 0; e < kernel_width; e++)
253253
{
254254
memcpy(out_ptr, reinterpret_cast<const T *>(offset_ptr + e * channel_chunk_size), channel_chunk_size);
255-
memset(static_cast<void *>(out_ptr + input_c), pad_value, pad_right * element_size);
256255
out_ptr += input_c + pad_right;
257256
}
258257
}
@@ -279,7 +278,6 @@ void linearize_volume_nhwc(const uint8_t *const in_ptr,
279278
{
280279
memcpy(out_ptr, reinterpret_cast<const T *>(in_ptr + (y * input_stride_z + x * input_stride_y)),
281280
channel_chunk_size);
282-
memset(static_cast<void *>(out_ptr + input_c), pad_value, pad_right * element_size);
283281
out_ptr += input_c + pad_right;
284282
}
285283
}
@@ -291,7 +289,6 @@ void linearize_volume_nhwc(const uint8_t *const in_ptr,
291289
{
292290
memcpy(out_ptr, reinterpret_cast<const T *>(offset_ptr + e * channel_chunk_size),
293291
channel_chunk_size);
294-
memset(static_cast<void *>(out_ptr + input_c), pad_value, pad_right * element_size);
295292
out_ptr += input_c + pad_right;
296293
}
297294
}
@@ -362,7 +359,6 @@ void run_im2col(const ITensor *src,
362359
// Linearize volume
363360
if (is_nchw)
364361
{
365-
ARM_COMPUTE_ERROR_ON(input_pad_right > 0);
366362
linearize_volume_nchw<T, has_pads>(
367363
input_ptr, output_ptr, has_bias, start_w, start_h, kernel_width, kernel_height, input_c, input_w,
368364
input_h, input_stride_x, input_stride_y, input_stride_z, pad_value, dilation.x(), dilation.y());

0 commit comments

Comments
 (0)