2222namespace onert ::backend::cpu::ops
2323{
2424
25- void DepthwiseConvolutionLayer::prepareF32 ()
26- {
27- if (_dilationWidth != 1 || _dilationHeight != 1 || _strideWidth != _strideHeight)
28- return ;
29-
30- // DepthwiseConvOp cpu kernel needs additional memory to perform with multi-
31- // threads. So, we allocate it here and pass it to the kernel.
32- const int64_t k_packet_size = nnfw::cker::eigen_support::kPacketSize <float >();
33-
34- const auto out_shape = getShape (_output);
35- const auto filter_shape = getShape (_kernel);
36- const int batch = out_shape.Dims (0 );
37- const int out_depth = out_shape.Dims (3 );
38- const int filter_rows = filter_shape.Dims (1 );
39- const int filter_cols = filter_shape.Dims (2 );
40-
41- const int filter_spatial_size = filter_rows * filter_cols;
42- const int padded_filter_inner_dim_size =
43- ((out_depth + k_packet_size - 1 ) / k_packet_size) * k_packet_size;
44-
45- _use_padded_filter = (out_depth % k_packet_size) == 0 ? false : true ;
46-
47- // prepare padded_filter buffer for cker
48- auto padded_filter_info = ir::OperandInfo (_kernel->get_info ());
49- padded_filter_info.shape ({batch, filter_spatial_size, padded_filter_inner_dim_size});
50- _padded_filter = std::make_unique<Tensor>(padded_filter_info, nullptr );
51- _padded_filter->setBuffer (std::make_shared<basic::Allocator>(_padded_filter->total_size ()));
52-
53- // prepare out_bprop and in_bprop buffer for cker
54- const int thread_count = nnfw::cker::eigen_support::getThreadCount () + 1 ;
55-
56- auto filter_buffers_info = ir::OperandInfo (_kernel->get_info ());
57- filter_buffers_info.shape ({thread_count, filter_spatial_size, padded_filter_inner_dim_size});
58- _filter_buffers = std::make_unique<Tensor>(filter_buffers_info, nullptr );
59- _filter_buffers->setBuffer (std::make_shared<basic::Allocator>(_filter_buffers->total_size ()));
60- }
61-
6225void DepthwiseConvolutionLayer::convFloat32 ()
6326{
6427 float output_activation_min = 0 , output_activation_max = 0 ;
@@ -75,23 +38,24 @@ void DepthwiseConvolutionLayer::convFloat32()
7538 op_params.float_activation_min = output_activation_min;
7639 op_params.float_activation_max = output_activation_max;
7740
78- // Since DepthwiseConvOp does not support dilation and different W/H stride yet,
79- // it uses the existing kernel in this case.
80- if (_dilationWidth == 1 && _dilationHeight == 1 && _strideWidth == _strideHeight)
81- {
82- nnfw::cker::DepthwiseConvOp (op_params, getShape (_input), getBuffer<float >(_input),
83- getShape (_kernel), getBuffer<float >(_kernel), getShape (_bias),
84- getBuffer<float >(_bias), getBuffer<float >(_padded_filter.get ()),
85- _use_padded_filter, getBuffer<float >(_filter_buffers.get ()),
86- getShape (_output), getBuffer<float >(_output));
87- }
88- else
89- {
90- nnfw::cker::DepthwiseConv<float , float >(
91- op_params, getShape (_input), getBuffer<float >(_input), getShape (_kernel),
92- getBuffer<float >(_kernel), getShape (_bias), getBuffer<float >(_bias), getShape (_output),
93- getBuffer<float >(_output), _external_context->ruy_context ());
94- }
41+ // TODO: Use the following call if TensorBuilder manages padded_filter_data
42+ // and filter_buffers_data:
43+ //
44+ // void DepthwiseConvOp(
45+ // const DepthwiseConvParams ¶ms,
46+ // const Shape &input_shape, const float *input_data,
47+ // const Shape &filter_shape, const float *filter_data,
48+ // const Shape &bias_shape, const float *bias_data,
49+ // float *padded_filter_data, bool pad_filter,
50+ // float *filter_buffers_data,
51+ // const Shape &output_shape, float *output_data
52+ // );
53+ //
54+ // See https://github.com/Samsung/ONE/pull/13669 for an example of using DepthwiseConvOp
55+ nnfw::cker::DepthwiseConv<float , float >(
56+ op_params, getShape (_input), getBuffer<float >(_input), getShape (_kernel),
57+ getBuffer<float >(_kernel), getShape (_bias), getBuffer<float >(_bias), getShape (_output),
58+ getBuffer<float >(_output), _external_context->ruy_context ());
9559}
9660
9761void DepthwiseConvolutionLayer::convQ8uPerTensor ()
@@ -309,10 +273,6 @@ void DepthwiseConvolutionLayer::configure(
309273 prepareQ8iHybridPerChannel ();
310274 _prepared = true ;
311275 }
312- else if (_input->data_type () == OperandType::FLOAT32)
313- {
314- prepareF32 ();
315- }
316276 else if (_input->data_type () == OperandType::QUANT_INT8_ASYMM)
317277 {
318278 if (_kernel->is_constant () && !_input->is_dynamic () && !_output->is_dynamic ())
0 commit comments