Skip to content

Commit 846ecad

Browse files
authored
[onert] Remove internal Tensor from DepthwiseConvolutionLayer (#15337)
This commit removes internal Tensor from DepthwiseConvolutionLayer. - Delete prepareF32() and its references in CPU op and train op - Remove `_padded_filter`, `_filter_buffers` members and related includes - Simplify convFloat32() to always call DepthwiseConv without extra buffers ONE-DCO-1.0-Signed-off-by: ragmani <ragmani0216@gmail.com>
1 parent b1b820e commit 846ecad

File tree

4 files changed

+43
-64
lines changed

4 files changed

+43
-64
lines changed

runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc

Lines changed: 18 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -22,43 +22,6 @@
2222
namespace onert::backend::cpu::ops
2323
{
2424

25-
void DepthwiseConvolutionLayer::prepareF32()
26-
{
27-
if (_dilationWidth != 1 || _dilationHeight != 1 || _strideWidth != _strideHeight)
28-
return;
29-
30-
// DepthwiseConvOp cpu kernel needs additional memory to perform with multi-
31-
// threads. So, we allocate it here and pass it to the kernel.
32-
const int64_t k_packet_size = nnfw::cker::eigen_support::kPacketSize<float>();
33-
34-
const auto out_shape = getShape(_output);
35-
const auto filter_shape = getShape(_kernel);
36-
const int batch = out_shape.Dims(0);
37-
const int out_depth = out_shape.Dims(3);
38-
const int filter_rows = filter_shape.Dims(1);
39-
const int filter_cols = filter_shape.Dims(2);
40-
41-
const int filter_spatial_size = filter_rows * filter_cols;
42-
const int padded_filter_inner_dim_size =
43-
((out_depth + k_packet_size - 1) / k_packet_size) * k_packet_size;
44-
45-
_use_padded_filter = (out_depth % k_packet_size) == 0 ? false : true;
46-
47-
// prepare padded_filter buffer for cker
48-
auto padded_filter_info = ir::OperandInfo(_kernel->get_info());
49-
padded_filter_info.shape({batch, filter_spatial_size, padded_filter_inner_dim_size});
50-
_padded_filter = std::make_unique<Tensor>(padded_filter_info, nullptr);
51-
_padded_filter->setBuffer(std::make_shared<basic::Allocator>(_padded_filter->total_size()));
52-
53-
// prepare out_bprop and in_bprop buffer for cker
54-
const int thread_count = nnfw::cker::eigen_support::getThreadCount() + 1;
55-
56-
auto filter_buffers_info = ir::OperandInfo(_kernel->get_info());
57-
filter_buffers_info.shape({thread_count, filter_spatial_size, padded_filter_inner_dim_size});
58-
_filter_buffers = std::make_unique<Tensor>(filter_buffers_info, nullptr);
59-
_filter_buffers->setBuffer(std::make_shared<basic::Allocator>(_filter_buffers->total_size()));
60-
}
61-
6225
void DepthwiseConvolutionLayer::convFloat32()
6326
{
6427
float output_activation_min = 0, output_activation_max = 0;
@@ -75,23 +38,24 @@ void DepthwiseConvolutionLayer::convFloat32()
7538
op_params.float_activation_min = output_activation_min;
7639
op_params.float_activation_max = output_activation_max;
7740

78-
// Since DepthwiseConvOp does not support dilation and different W/H stride yet,
79-
// it uses the existing kernel in this case.
80-
if (_dilationWidth == 1 && _dilationHeight == 1 && _strideWidth == _strideHeight)
81-
{
82-
nnfw::cker::DepthwiseConvOp(op_params, getShape(_input), getBuffer<float>(_input),
83-
getShape(_kernel), getBuffer<float>(_kernel), getShape(_bias),
84-
getBuffer<float>(_bias), getBuffer<float>(_padded_filter.get()),
85-
_use_padded_filter, getBuffer<float>(_filter_buffers.get()),
86-
getShape(_output), getBuffer<float>(_output));
87-
}
88-
else
89-
{
90-
nnfw::cker::DepthwiseConv<float, float>(
91-
op_params, getShape(_input), getBuffer<float>(_input), getShape(_kernel),
92-
getBuffer<float>(_kernel), getShape(_bias), getBuffer<float>(_bias), getShape(_output),
93-
getBuffer<float>(_output), _external_context->ruy_context());
94-
}
41+
// TODO: Use the following call if TensorBuilder manages padded_filter_data
42+
// and filter_buffers_data:
43+
//
44+
// void DepthwiseConvOp(
45+
// const DepthwiseConvParams &params,
46+
// const Shape &input_shape, const float *input_data,
47+
// const Shape &filter_shape, const float *filter_data,
48+
// const Shape &bias_shape, const float *bias_data,
49+
// float *padded_filter_data, bool pad_filter,
50+
// float *filter_buffers_data,
51+
// const Shape &output_shape, float *output_data
52+
// );
53+
//
54+
// See https://github.com/Samsung/ONE/pull/13669 for an example of using DepthwiseConvOp
55+
nnfw::cker::DepthwiseConv<float, float>(
56+
op_params, getShape(_input), getBuffer<float>(_input), getShape(_kernel),
57+
getBuffer<float>(_kernel), getShape(_bias), getBuffer<float>(_bias), getShape(_output),
58+
getBuffer<float>(_output), _external_context->ruy_context());
9559
}
9660

9761
void DepthwiseConvolutionLayer::convQ8uPerTensor()
@@ -309,10 +273,6 @@ void DepthwiseConvolutionLayer::configure(
309273
prepareQ8iHybridPerChannel();
310274
_prepared = true;
311275
}
312-
else if (_input->data_type() == OperandType::FLOAT32)
313-
{
314-
prepareF32();
315-
}
316276
else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM)
317277
{
318278
if (_kernel->is_constant() && !_input->is_dynamic() && !_output->is_dynamic())

runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
#include <backend/IPortableTensor.h>
2121
#include "OperationUtils.h"
2222
#include "../ExternalContext.h"
23-
#include "../Tensor.h"
2423

2524
#include <exec/IFunction.h>
2625

@@ -52,7 +51,6 @@ class DepthwiseConvolutionLayer : public ::onert::exec::IFunction
5251
void run() override;
5352

5453
private:
55-
void prepareF32();
5654
void prepareQ8i();
5755
void prepareQ8uPerChannel();
5856
void prepareQ8iHybridPerChannel();
@@ -79,10 +77,6 @@ class DepthwiseConvolutionLayer : public ::onert::exec::IFunction
7977

8078
ir::Activation _activation{ir::Activation::NONE};
8179

82-
bool _use_padded_filter{false};
83-
std::unique_ptr<Tensor> _padded_filter{nullptr};
84-
std::unique_ptr<Tensor> _filter_buffers{nullptr};
85-
8680
private:
8781
std::shared_ptr<ExternalContext> _external_context;
8882

runtime/onert/backend/train/ops/DepthwiseConvolutionLayer.cc

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ namespace onert::backend::train::ops
2828
DepthwiseConvolutionLayer::DepthwiseConvolutionLayer()
2929
: cpu::ops::DepthwiseConvolutionLayer(), _grad_weights{nullptr}, _grad_bias{nullptr},
3030
_back_prop_input{nullptr}, _back_prop_output{nullptr}, _act_back_prop_output{nullptr},
31+
_use_padded_filter{false}, _padded_filter{nullptr}, _filter_buffers{nullptr},
3132
_filter_dim_buffers{nullptr}
3233
{
3334
// DO NOTHING
@@ -83,6 +84,26 @@ void DepthwiseConvolutionLayer::configureBackward(IPortableTensor *back_prop_inp
8384
_filter_dim_buffers = std::make_unique<Tensor>(filter_dim_buffers_info);
8485
_filter_dim_buffers->setBuffer(
8586
std::make_shared<basic::Allocator>(_filter_dim_buffers->total_size()));
87+
88+
_use_padded_filter = (out_depth % k_packet_size) == 0 ? false : true;
89+
90+
const auto filter_shape = getShape(_kernel);
91+
const int batch = incoming_shape.Dims(0);
92+
93+
const int filter_rows = filter_shape.Dims(1);
94+
const int filter_cols = filter_shape.Dims(2);
95+
const int filter_spatial_size = filter_rows * filter_cols;
96+
97+
// prepare padded_filter buffer for cker
98+
auto padded_filter_info = ir::OperandInfo(_kernel->get_info());
99+
padded_filter_info.shape({batch, filter_spatial_size, padded_filter_inner_dim_size});
100+
_padded_filter = std::make_unique<Tensor>(padded_filter_info);
101+
_padded_filter->setBuffer(std::make_shared<basic::Allocator>(_padded_filter->total_size()));
102+
103+
auto filter_buffers_info = ir::OperandInfo(_kernel->get_info());
104+
filter_buffers_info.shape({thread_count, filter_spatial_size, padded_filter_inner_dim_size});
105+
_filter_buffers = std::make_unique<Tensor>(filter_buffers_info);
106+
_filter_buffers->setBuffer(std::make_shared<basic::Allocator>(_filter_buffers->total_size()));
86107
}
87108

88109
void DepthwiseConvolutionLayer::forward(bool) { cpu::ops::DepthwiseConvolutionLayer::run(); }

runtime/onert/backend/train/ops/DepthwiseConvolutionLayer.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@ class DepthwiseConvolutionLayer : public ::onert::exec::train::ITrainableFunctio
5050

5151
// TODO Consider if these tensors should be built in TensorBuilder
5252
std::unique_ptr<BackPropTensor> _act_back_prop_output;
53+
54+
bool _use_padded_filter;
55+
std::unique_ptr<Tensor> _padded_filter;
56+
std::unique_ptr<Tensor> _filter_buffers;
5357
std::unique_ptr<Tensor> _filter_dim_buffers;
5458
};
5559

0 commit comments

Comments
 (0)