|
| 1 | +// Copyright (C) 2018-2025 Intel Corporation |
| 2 | +// SPDX-License-Identifier: Apache-2.0 |
| 3 | +// |
| 4 | + |
| 5 | +#include "acl_conv.hpp" |
| 6 | + |
| 7 | +#include <arm_compute/core/CoreTypes.h> |
| 8 | +#include <arm_compute/core/Error.h> |
| 9 | +#include <arm_compute/core/QuantizationInfo.h> |
| 10 | +#include <arm_compute/core/TensorInfo.h> |
| 11 | +#include <arm_compute/core/TensorShape.h> |
| 12 | +#include <arm_compute/core/Types.h> |
| 13 | +#include <arm_compute/runtime/NEON/functions/NEConvolutionLayer.h> |
| 14 | + |
| 15 | +#include <any> |
| 16 | +#include <cmath> |
| 17 | +#include <memory> |
| 18 | + |
| 19 | +#include "acl_utils.hpp" |
| 20 | +#include "cpu_shape.h" |
| 21 | +#include "memory_desc/cpu_memory_desc.h" |
| 22 | +#include "nodes/common/cpu_convert.h" |
| 23 | +#include "nodes/executors/acl/acl_common_executor.hpp" |
| 24 | +#include "nodes/executors/convolution_config.hpp" |
| 25 | +#include "nodes/executors/debug_messages.hpp" |
| 26 | +#include "nodes/executors/executor.hpp" |
| 27 | +#include "nodes/executors/memory_arguments.hpp" |
| 28 | +#include "openvino/core/except.hpp" |
| 29 | +#include "openvino/core/type/element_type.hpp" |
| 30 | +#include "post_ops.hpp" |
| 31 | +#include "utils/general_utils.h" |
| 32 | + |
| 33 | +namespace ov::intel_cpu { |
| 34 | + |
| 35 | +ACLConvolutionExecutor::ACLConvolutionExecutor(const ConvAttrs& attrs, |
| 36 | + const MemoryArgs& memory, |
| 37 | + [[maybe_unused]] const ExecutorContext::CPtr& context) |
| 38 | + : weightScale(attrs.dqScales) { |
| 39 | + MemoryDescPtr srcMemPtr = memory.at(ARG_SRC_0)->getDescPtr(); |
| 40 | + MemoryDescPtr weiMemPtr = memory.at(ARG_WEI)->getDescPtr(); |
| 41 | + MemoryDescPtr dstMemPtr = memory.at(ARG_DST)->getDescPtr(); |
| 42 | + |
| 43 | + Shape weiShape = weiMemPtr->getShape(); |
| 44 | + Shape srcShape = srcMemPtr->getShape(); |
| 45 | + Shape dstShape = dstMemPtr->getShape(); |
| 46 | + |
| 47 | + const auto with_groups = static_cast<const int>(weiShape.getRank() == srcShape.getRank() + 1); |
| 48 | + const int kh = weiShape.getDims()[with_groups + srcShape.getRank() - 2]; |
| 49 | + const int kw = weiShape.getDims()[with_groups + srcShape.getRank() - 1]; |
| 50 | + const int oc = dstShape.getDims()[1]; |
| 51 | + |
| 52 | + weightsInfo = arm_compute::WeightsInfo(false, kw, kh, oc, false, arm_compute::WeightFormat::UNSPECIFIED); |
| 53 | + auto paddingLeft = (attrs.paddingL.size() >= 2U) ? attrs.paddingL[1] : attrs.paddingL[0]; |
| 54 | + auto paddingRight = (attrs.paddingR.size() >= 2U) ? attrs.paddingR[1] : attrs.paddingR[0]; |
| 55 | + auto paddingTop = (attrs.paddingL.size() >= 2U) ? attrs.paddingL[0] : 0; |
| 56 | + auto paddingBottom = (attrs.paddingR.size() >= 2U) ? attrs.paddingR[0] : 0; |
| 57 | + padStrideInfo = arm_compute::PadStrideInfo(attrs.stride[0], |
| 58 | + attrs.stride[1], |
| 59 | + paddingLeft, |
| 60 | + paddingRight, |
| 61 | + paddingTop, |
| 62 | + paddingBottom, |
| 63 | + arm_compute::DimensionRoundingType::FLOOR); |
| 64 | + dilation = arm_compute::Size2D(attrs.dilation[1] + 1, attrs.dilation[0] + 1); |
| 65 | + |
| 66 | + if (attrs.postOps.size() == 1) { |
| 67 | + if (const auto* const activation = std::any_cast<ActivationPostOp>(attrs.postOps.data())) { |
| 68 | + activationLayerInfo = getActivationLayerInfo(convertToEltwiseAlgorithm(activation->type()), |
| 69 | + activation->alpha(), |
| 70 | + activation->beta(), |
| 71 | + activation->gamma()); |
| 72 | + } else if (const auto* const fq = std::any_cast<FakeQuantizePostOp>(attrs.postOps.data())) { |
| 73 | + fqInputScale = fq->inputScale(); |
| 74 | + fqInputShift = fq->inputShift(); |
| 75 | + fqOutputScale = fq->outputScale(); |
| 76 | + fqOutputShift = fq->outputShift(); |
| 77 | + if (fqOutputScale.size() == 1 && fqOutputScale[0] == 1.0F && fqOutputShift.size() == 1 && |
| 78 | + fqOutputShift[0] == std::trunc(fqOutputShift[0])) { |
| 79 | + for (auto& v : fqInputShift) { |
| 80 | + v += fqOutputShift[0]; |
| 81 | + } |
| 82 | + fqOutputShift.clear(); |
| 83 | + } |
| 84 | + } else { |
| 85 | + OPENVINO_THROW("ACLConvolutionExecutor: the executor supports FakeQuantize and Activation post ops only"); |
| 86 | + } |
| 87 | + } else if (attrs.postOps.size() > 1) { |
| 88 | + OPENVINO_THROW("ACLConvolutionExecutor: ACL does not support more than 1 post op"); |
| 89 | + } |
| 90 | +} |
| 91 | + |
| 92 | +bool ACLConvolutionExecutor::supports(const ConvConfig& config) { |
| 93 | + bool isQuantized = any_of(config.descs.at(ARG_SRC)->getPrecision(), ov::element::u8, ov::element::i8) && |
| 94 | + config.descs.at(ARG_WEI)->getPrecision() == ov::element::i8; |
| 95 | + |
| 96 | + VERIFY(isQuantized, UNSUPPORTED_SRC_PRECISIONS); |
| 97 | + VERIFY(config.attrs.postOps.size() <= 1U, UNSUPPORTED_BY_EXECUTOR); |
| 98 | + |
| 99 | + return true; |
| 100 | +} |
| 101 | + |
| 102 | +arm_compute::Status ACLConvolutionExecutor::validateTensorsInfo(const ACLInfos& aclMemoryInfos) { |
| 103 | + // Note: LPT propagate dequantization scales from src and weights on conv output, and the result scale |
| 104 | + // is applied as weight scale. So quantization configuration forms in the following way: |
| 105 | + // - src quantization info is always trivial |
| 106 | + // - weights: scale is equal to result dequantization scale after Convolution propagated by LPT |
| 107 | + // shift is not supported |
| 108 | + // - destination: scale is formed based on requantization FakeQuantize parameters: scale = 1.0 / input scale |
| 109 | + // shift = input shift |
| 110 | + aclMemoryInfos[ACLArgs::ACL_SRC_0]->set_quantization_info(arm_compute::QuantizationInfo(1.0)); |
| 111 | + aclMemoryInfos[ACLArgs::ACL_WEI]->set_quantization_info( |
| 112 | + arm_compute::QuantizationInfo(weightScale.empty() ? 1.0F : weightScale[0])); |
| 113 | + aclMemoryInfos[ACLArgs::ACL_DST]->set_quantization_info( |
| 114 | + arm_compute::QuantizationInfo(fqInputScale.empty() ? 1.0F : 1.0F / fqInputScale[0], |
| 115 | + fqInputShift.empty() ? 0 : fqInputShift[0])); |
| 116 | + |
| 117 | + return arm_compute::NEConvolutionLayer::validate(aclMemoryInfos[ACLArgs::ACL_SRC_0].get(), |
| 118 | + aclMemoryInfos[ACLArgs::ACL_WEI].get(), |
| 119 | + aclMemoryInfos[ACLArgs::ACL_BIAS].get(), |
| 120 | + aclMemoryInfos[ACLArgs::ACL_DST].get(), |
| 121 | + padStrideInfo, |
| 122 | + weightsInfo, |
| 123 | + dilation, |
| 124 | + activationLayerInfo); |
| 125 | +} |
| 126 | + |
| 127 | +ACLFunction ACLConvolutionExecutor::configureFunction(const ACLTensors& aclMemoryTensors) { |
| 128 | + auto neConv = std::make_unique<arm_compute::NEConvolutionLayer>(); |
| 129 | + |
| 130 | + neConv->configure(aclMemoryTensors[ACLArgs::ACL_SRC_0].get(), |
| 131 | + aclMemoryTensors[ACLArgs::ACL_WEI].get(), |
| 132 | + aclMemoryTensors[ACLArgs::ACL_BIAS].get(), |
| 133 | + aclMemoryTensors[ACLArgs::ACL_DST].get(), |
| 134 | + padStrideInfo, |
| 135 | + weightsInfo, |
| 136 | + dilation, |
| 137 | + activationLayerInfo); |
| 138 | + return neConv; |
| 139 | +} |
| 140 | + |
| 141 | +std::shared_ptr<arm_compute::TensorInfo> ACLConvolutionExecutor::initTensorInfo( |
| 142 | + const arm_compute::TensorShape& tensorShape, |
| 143 | + const arm_compute::DataType& dataType, |
| 144 | + const arm_compute::DataLayout& dataLayout) { |
| 145 | + arm_compute::DataType result = arm_compute::DataType::UNKNOWN; |
| 146 | + switch (dataType) { |
| 147 | + case arm_compute::DataType::S8: { |
| 148 | + result = arm_compute::DataType::QASYMM8_SIGNED; |
| 149 | + break; |
| 150 | + } |
| 151 | + case arm_compute::DataType::U8: { |
| 152 | + result = arm_compute::DataType::QASYMM8; |
| 153 | + break; |
| 154 | + } |
| 155 | + default: { |
| 156 | + result = dataType; |
| 157 | + break; |
| 158 | + } |
| 159 | + } |
| 160 | + |
| 161 | + return ACLCommonExecutor::initTensorInfo(tensorShape, result, dataLayout); |
| 162 | +} |
| 163 | + |
| 164 | +} // namespace ov::intel_cpu |
0 commit comments