Skip to content

Commit aca26c7

Browse files
committed
feat: Updates to operator CpuGEMMLowp for static quantization, and associated tests.
Added function to update quantization parameters used in the kernel. Updated existing tests to cover static quant data types. Added new quantization specific tests. Added multithreading test. Resolves: MLINFSW-1814 Signed-off-by: Anna Mayne <[email protected]> Change-Id: I027b50a49d742db6d1b1a8d529238ba7fda1b813 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/14986 Comments-Addressed: Arm Jenkins <[email protected]> Benchmark: Arm Jenkins <[email protected]> Tested-by: Arm Jenkins <[email protected]> Reviewed-by: Andreas Flöjt <[email protected]>
1 parent 34a2ef2 commit aca26c7

File tree

4 files changed

+291
-72
lines changed

4 files changed

+291
-72
lines changed

arm_compute/runtime/experimental/operators/CpuGEMMLowp.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,17 @@ class CpuGEMMLowp : public INEOperator
8484
const ITensorInfo *output,
8585
const GEMMInfo &gemm_info = GEMMInfo());
8686

87+
/** Update of quantization information at the run stage so that the quantization multipliers can be properly calculated.
88+
*
89+
* Please have a look at NEGEMMConvolutionLayer.h for a more in-depth explanation and example.
90+
*/
91+
void update_quantization_parameters(const QuantizationInfo &a,
92+
const QuantizationInfo &b,
93+
const QuantizationInfo &c,
94+
const DataType data_type,
95+
const bool is_prepared,
96+
const bool negated_offsets);
97+
8798
// Inherited methods overridden
8899
void run(ITensorPack &tensors) override;
89100
void prepare(ITensorPack &tensors) override;

src/runtime/experimental/operators/CpuGEMMLowp.cpp

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017-2021, 2023-2024 Arm Limited.
2+
* Copyright (c) 2017-2021, 2023-2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -23,11 +23,14 @@
2323
*/
2424
#include "arm_compute/runtime/experimental/operators/CpuGEMMLowp.h"
2525

26+
#include "arm_compute/core/Utils.h"
2627
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
2728

2829
#include "src/core/utils/quantization/AsymmHelpers.h"
2930
#include "src/cpu/operators/CpuGemmLowpMatrixMultiplyCore.h"
3031

32+
#include <set>
33+
3134
namespace arm_compute
3235
{
3336
namespace experimental
@@ -37,6 +40,7 @@ namespace op
3740
struct CpuGEMMLowp::Impl
3841
{
3942
std::unique_ptr<arm_compute::cpu::CpuGemmLowpMatrixMultiplyCore> op{nullptr};
43+
ActivationLayerInfo act_info{};
4044
bool is_prepared{false};
4145
};
4246

@@ -63,6 +67,7 @@ void CpuGEMMLowp::configure(
6367
b_info_to_use->set_are_values_constant(false);
6468
}
6569

70+
_impl->act_info = gemm_info.activation_info();
6671
_impl->is_prepared = false;
6772
_impl->op->configure(a, b_info_to_use.get(), (c != nullptr ? c : nullptr), output, gemm_info);
6873
}
@@ -83,6 +88,44 @@ Status CpuGEMMLowp::validate(const ITensorInfo *a,
8388
return cpu::CpuGemmLowpMatrixMultiplyCore::validate(a, b_info_to_use.get(), c, output, gemm_info);
8489
}
8590

91+
void CpuGEMMLowp::update_quantization_parameters(const QuantizationInfo &a,
92+
const QuantizationInfo &b,
93+
const QuantizationInfo &c,
94+
const DataType data_type,
95+
const bool is_prepared,
96+
const bool negated_offsets)
97+
{
98+
// Supported activations in GEMM
99+
const std::set<ActivationLayerInfo::ActivationFunction> supported_acts = {
100+
ActivationLayerInfo::ActivationFunction::RELU, ActivationLayerInfo::ActivationFunction::BOUNDED_RELU,
101+
ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU};
102+
103+
PixelValue type_min{};
104+
PixelValue type_max{};
105+
std::tie(type_min, type_max) = get_min_max(data_type);
106+
int32_t min_activation = type_min.get<int32_t>();
107+
int32_t max_activation = type_max.get<int32_t>();
108+
109+
const UniformQuantizationInfo uoqinfo = c.uniform();
110+
if (supported_acts.find(_impl->act_info.activation()) != supported_acts.end())
111+
{
112+
std::tie(min_activation, max_activation) =
113+
get_quantized_activation_min_max(_impl->act_info, data_type, uoqinfo);
114+
}
115+
116+
GEMMLowpOutputStageInfo output_info;
117+
output_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
118+
output_info.gemmlowp_offset = uoqinfo.offset;
119+
output_info.gemmlowp_min_bound = min_activation;
120+
output_info.gemmlowp_max_bound = max_activation;
121+
output_info.is_quantized_per_channel = false;
122+
output_info.output_data_type = data_type;
123+
const Status status = quantization::calculate_quantized_multipliers(a, b, c, output_info);
124+
ARM_COMPUTE_ERROR_ON(!bool(status));
125+
126+
_impl->op->update_quantization_parameters(output_info, a, b, is_prepared, negated_offsets);
127+
}
128+
86129
void CpuGEMMLowp::run(ITensorPack &tensors)
87130
{
88131
prepare(tensors);

0 commit comments

Comments
 (0)