Skip to content

Commit 937286e

Browse files
Sqvidannop-w
andcommitted
feat: add stateless wrapper for CpuFullyConnected
To maintain thread-safety guarantees, the stateless wrapper is restricted to fixed-format inputs only. Resolves: MLINFSW-1504 Change-Id: I2673f3c8e502e85aba867bfbe31f9e64d9119797 Co-authored-by: Annop Wongwathanarat <[email protected]> Signed-off-by: Siddhartha Menon <[email protected]> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/14686 Benchmark: Arm Jenkins <[email protected]> Comments-Addressed: Arm Jenkins <[email protected]> Tested-by: Arm Jenkins <[email protected]> Reviewed-by: Gunes Bayir <[email protected]>
1 parent f0800e1 commit 937286e

File tree

8 files changed

+724
-0
lines changed

8 files changed

+724
-0
lines changed

Android.bp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1000,6 +1000,7 @@ cc_library_static {
10001000
"src/runtime/experimental/operators/CpuDepthwiseConv2d.cpp",
10011001
"src/runtime/experimental/operators/CpuDequantize.cpp",
10021002
"src/runtime/experimental/operators/CpuElementwise.cpp",
1003+
"src/runtime/experimental/operators/CpuFullyConnected.cpp",
10031004
"src/runtime/experimental/operators/CpuGEMMLowp.cpp",
10041005
"src/runtime/experimental/operators/CpuGemm.cpp",
10051006
"src/runtime/experimental/operators/CpuGemmConv2d.cpp",
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
/*
2+
* Copyright (c) 2021-2023, 2025 Arm Limited.
3+
*
4+
* SPDX-License-Identifier: MIT
5+
*
6+
* Permission is hereby granted, free of charge, to any person obtaining a copy
7+
* of this software and associated documentation files (the "Software"), to
8+
* deal in the Software without restriction, including without limitation the
9+
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10+
* sell copies of the Software, and to permit persons to whom the Software is
11+
* furnished to do so, subject to the following conditions:
12+
*
13+
* The above copyright notice and this permission notice shall be included in all
14+
* copies or substantial portions of the Software.
15+
*
16+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22+
* SOFTWARE.
23+
*/
24+
#ifndef ACL_ARM_COMPUTE_RUNTIME_EXPERIMENTAL_OPERATORS_CPUFULLYCONNECTED_H
25+
#define ACL_ARM_COMPUTE_RUNTIME_EXPERIMENTAL_OPERATORS_CPUFULLYCONNECTED_H
26+
27+
#include "arm_compute/core/TensorInfo.h"
28+
#include "arm_compute/function_info/FullyConnectedLayerInfo.h"
29+
#include "arm_compute/runtime/NEON/INEOperator.h"
30+
31+
#include <memory>
32+
33+
namespace arm_compute
34+
{
35+
namespace experimental
36+
{
37+
namespace op
38+
{
39+
/** Stateless wrapper for cpu::CpuFullyConnected
40+
*
41+
* @note weights must be fixed-format
42+
* @note transposed_weights is not supported
43+
*/
44+
class CpuFullyConnected : public INEOperator
45+
{
46+
public:
47+
/** Constructor */
48+
CpuFullyConnected();
49+
/** Destructor */
50+
~CpuFullyConnected();
51+
/** Set the input and output tensors.
52+
*
53+
* Valid data layouts:
54+
* - NHWC
55+
* - NCHW
56+
*
57+
* Valid data type configurations:
58+
* |src0 |src1 |src2 |dst |
59+
* |:--------------|:------------------|:------|:--------------|
60+
* |F16 |F16 |F16 |F16 |
61+
* |F32 |F32 |F32 |F32 |
62+
*
63+
* @param[in] src Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
64+
* @param[in] weights Weights tensor info. The weights must be 2 dimensional.
65+
* If this function is called after a Convolution Layer, the (transposed) weights will have as many rows as the product of the first 3 input's dimensions.
66+
* If it is called after another FullyConnected Layer, the (transposed) weights will have as many rows as the input's first dimension.
67+
* Data type supported: Same as @p src.
68+
* @param[in] biases Bias tensor info. Can be nullptr. Data type supported: Same as @p weights, S32 if @p weights is QASYMM8/QASYMM8_SIGNED.
69+
* @param[out] dst Destination tensor info. Its shape should be equal to the output of a matrix multiplication between:
70+
* - The output of im2col on the input and the (transposed) 2D weights, if the function is called after a Convolution Layer
71+
* - The input tensor and the (transposed) 2D weights, if the function is called after another FullyConnected Layer.
72+
* Data type supported: Same as @p src.
73+
* @param[in] fc_info (Optional) Fully connected layer additional info
74+
* @param[in] weights_info (Optional) Stores neccessary compute information when weights are already reshaped
75+
*/
76+
void configure(const ITensorInfo *src,
77+
const ITensorInfo *weights,
78+
const ITensorInfo *biases,
79+
ITensorInfo *dst,
80+
FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo(),
81+
const WeightsInfo &weights_info = WeightsInfo());
82+
/** Static function to check if given info will lead to a valid configuration of @ref CpuFullyConnected
83+
*
84+
* Similar to @ref CpuFullyConnected::configure()
85+
*
86+
* @return a status
87+
*/
88+
static Status validate(const ITensorInfo *src,
89+
const ITensorInfo *weights,
90+
const ITensorInfo *biases,
91+
const ITensorInfo *dst,
92+
FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo(),
93+
const WeightsInfo &weights_info = WeightsInfo());
94+
95+
/** Static function that queries whether there exists fixed-format kernel and if it exists it will return in the first argument in what format
96+
* weights are expected to be reshaped as defined by WeightFormat class. Apart from the first argument the rest of the arguments are the same
97+
* as in cpu::CpuFullyConnected::validate() except that all arguments are required.
98+
*
99+
* @return a status
100+
*/
101+
static Status has_opt_impl(arm_compute::WeightFormat &expected_weight_format,
102+
const ITensorInfo *src,
103+
const ITensorInfo *weights,
104+
const ITensorInfo *biases,
105+
const ITensorInfo *dst,
106+
FullyConnectedLayerInfo fc_info,
107+
WeightsInfo weights_info);
108+
109+
//Inherited methods override
110+
void run(ITensorPack &tensors) override;
111+
void prepare(ITensorPack &tensors) override;
112+
experimental::MemoryRequirements workspace() const override;
113+
114+
private:
115+
struct Impl;
116+
std::unique_ptr<Impl> _impl;
117+
};
118+
} // namespace op
119+
} // namespace experimental
120+
} // namespace arm_compute
121+
#endif // ACL_ARM_COMPUTE_RUNTIME_EXPERIMENTAL_OPERATORS_CPUFULLYCONNECTED_H

filelist.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1618,6 +1618,7 @@
16181618
"src/runtime/experimental/operators/CpuDepthwiseConv2d.cpp",
16191619
"src/runtime/experimental/operators/CpuDequantize.cpp",
16201620
"src/runtime/experimental/operators/CpuElementwise.cpp",
1621+
"src/runtime/experimental/operators/CpuFullyConnected.cpp",
16211622
"src/runtime/experimental/operators/CpuGEMMLowp.cpp",
16221623
"src/runtime/experimental/operators/CpuGemm.cpp",
16231624
"src/runtime/experimental/operators/CpuGemmConv2d.cpp",

src/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -989,6 +989,7 @@ filegroup(
989989
"runtime/experimental/operators/CpuDepthwiseConv2d.cpp",
990990
"runtime/experimental/operators/CpuDequantize.cpp",
991991
"runtime/experimental/operators/CpuElementwise.cpp",
992+
"runtime/experimental/operators/CpuFullyConnected.cpp",
992993
"runtime/experimental/operators/CpuGEMMLowp.cpp",
993994
"runtime/experimental/operators/CpuGemm.cpp",
994995
"runtime/experimental/operators/CpuGemmConv2d.cpp",

src/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -982,6 +982,7 @@ target_sources(
982982
runtime/experimental/operators/CpuDepthwiseConv2d.cpp
983983
runtime/experimental/operators/CpuDequantize.cpp
984984
runtime/experimental/operators/CpuElementwise.cpp
985+
runtime/experimental/operators/CpuFullyConnected.cpp
985986
runtime/experimental/operators/CpuGEMMLowp.cpp
986987
runtime/experimental/operators/CpuGemm.cpp
987988
runtime/experimental/operators/CpuGemmConv2d.cpp
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
/*
2+
* Copyright (c) 2025 Arm Limited.
3+
*
4+
* SPDX-License-Identifier: MIT
5+
*
6+
* Permission is hereby granted, free of charge, to any person obtaining a copy
7+
* of this software and associated documentation files (the "Software"), to
8+
* deal in the Software without restriction, including without limitation the
9+
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10+
* sell copies of the Software, and to permit persons to whom the Software is
11+
* furnished to do so, subject to the following conditions:
12+
*
13+
* The above copyright notice and this permission notice shall be included in all
14+
* copies or substantial portions of the Software.
15+
*
16+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22+
* SOFTWARE.
23+
*/
24+
#include "src/cpu/operators/CpuFullyConnected.h"
25+
26+
#include "arm_compute/runtime/experimental/operators/CpuFullyConnected.h"
27+
28+
namespace arm_compute
29+
{
30+
namespace experimental
31+
{
32+
namespace op
33+
{
34+
using namespace arm_compute::experimental;
35+
36+
struct CpuFullyConnected::Impl
37+
{
38+
std::unique_ptr<cpu::CpuFullyConnected> op{nullptr};
39+
};
40+
41+
CpuFullyConnected::CpuFullyConnected() : _impl(std::make_unique<Impl>())
42+
{
43+
_impl->op = std::make_unique<cpu::CpuFullyConnected>();
44+
}
45+
46+
CpuFullyConnected::~CpuFullyConnected() = default;
47+
48+
void CpuFullyConnected::configure(const ITensorInfo *src,
49+
const ITensorInfo *weights,
50+
const ITensorInfo *biases,
51+
ITensorInfo *dst,
52+
FullyConnectedLayerInfo fc_info,
53+
const WeightsInfo &weights_info)
54+
{
55+
_impl->op->configure(src, weights, biases, dst, fc_info, weights_info);
56+
}
57+
Status CpuFullyConnected::validate(const ITensorInfo *src,
58+
const ITensorInfo *weights,
59+
const ITensorInfo *biases,
60+
const ITensorInfo *dst,
61+
FullyConnectedLayerInfo fc_info,
62+
const WeightsInfo &weights_info)
63+
{
64+
bool fp32_ok = src->data_type() == DataType::F32 && weights->data_type() == DataType::F32 &&
65+
(biases->data_type() == DataType::F32 || biases == nullptr) && dst->data_type() == DataType::F32;
66+
bool fp16_ok = src->data_type() == DataType::F16 && weights->data_type() == DataType::F16 &&
67+
(biases->data_type() == DataType::F16 || biases == nullptr) && dst->data_type() == DataType::F16;
68+
if (!(fp32_ok || fp16_ok))
69+
{
70+
return Status(ErrorCode::RUNTIME_ERROR, "datatype is not supported");
71+
}
72+
if (!arm_compute::is_fixed_format(weights_info.weight_format()))
73+
{
74+
return Status(ErrorCode::RUNTIME_ERROR, "only support fixed format weight");
75+
}
76+
if (fc_info.transpose_weights)
77+
{
78+
return Status(ErrorCode::RUNTIME_ERROR, "transpose weight is not supported");
79+
}
80+
const bool is_batched_fc_layer = dst->dimension(1) > 1;
81+
bool is_fc_after_conv = true;
82+
if (is_batched_fc_layer)
83+
{
84+
is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) &&
85+
(std::equal(src->tensor_shape().cbegin() + 3, src->tensor_shape().cend(),
86+
dst->tensor_shape().cbegin() + 1));
87+
}
88+
else
89+
{
90+
is_fc_after_conv = src->num_dimensions() > 1;
91+
}
92+
if (is_fc_after_conv)
93+
{
94+
return Status(ErrorCode::RUNTIME_ERROR, "only support fully connected layer after fully connected layer");
95+
}
96+
return cpu::CpuFullyConnected::validate(src, weights, biases, dst, fc_info, weights_info);
97+
}
98+
Status CpuFullyConnected::has_opt_impl(arm_compute::WeightFormat &expected_weight_format,
99+
const ITensorInfo *src,
100+
const ITensorInfo *weights,
101+
const ITensorInfo *biases,
102+
const ITensorInfo *dst,
103+
FullyConnectedLayerInfo fc_info,
104+
WeightsInfo weights_info)
105+
{
106+
return cpu::CpuFullyConnected::has_opt_impl(expected_weight_format, src, weights, biases, dst, fc_info,
107+
weights_info);
108+
}
109+
110+
void CpuFullyConnected::run(ITensorPack &tensors)
111+
{
112+
_impl->op->run(tensors);
113+
}
114+
115+
void CpuFullyConnected::prepare(ITensorPack &tensors)
116+
{
117+
_impl->op->prepare(tensors);
118+
}
119+
120+
experimental::MemoryRequirements CpuFullyConnected::workspace() const
121+
{
122+
return _impl->op->workspace();
123+
}
124+
125+
} // namespace op
126+
} // namespace experimental
127+
} // namespace arm_compute

0 commit comments

Comments
 (0)