Skip to content

Commit 18f668d

Browse files
authored
[Cherry-pick][NNAdapter][QualcommQNN] Supports ERNIE nano fully quantized model (#9618) (#9645)
1 parent 9219383 commit 18f668d

25 files changed

+509
-385
lines changed

lite/backends/nnadapter/nnadapter/include/nnadapter/optimizer/pattern_matcher.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,10 @@ class PatternMatcher {
9898
int index = -1);
9999
Pattern* IsOperationOutputOperand(NNAdapterOperationType type,
100100
int index = -1);
101+
Pattern* IsModelInputOperand();
102+
Pattern* IsModelOutputOperand();
103+
Pattern* IsNotModelInputOperand();
104+
Pattern* IsNotModelOutputOperand();
101105
Pattern* CheckInputCount(int num);
102106
Pattern* CheckOutputCount(int num);
103107
// Mark the pattern matched node to be deleted, so its inlinks and outlinks

lite/backends/nnadapter/nnadapter/include/nnadapter/utility/modeling.h

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -186,10 +186,14 @@ core::Operand* InsertReshapeOperation(
186186
const NNAdapterOperandDimensionType& input_dimensions,
187187
std::vector<int32_t> shape = {});
188188
// Append or insert a dummy add operation, set the addend to a zero operand
189-
core::Operand* AppendDummyOperation(core::Model* model,
190-
core::Operand* input_operand);
191-
core::Operand* InsertDummyOperation(core::Model* model,
192-
core::Operand* output_operand);
189+
core::Operand* AppendDummyAddOperation(core::Model* model,
190+
core::Operand* input_operand);
191+
core::Operand* InsertDummyAddOperation(core::Model* model,
192+
core::Operand* output_operand);
193+
core::Operand* AppendDummySubOperation(core::Model* model,
194+
core::Operand* input_operand);
195+
core::Operand* InsertDummySubOperation(core::Model* model,
196+
core::Operand* output_operand);
193197
// Append or insert a unary activiation or other operation which has only one
194198
// input and output operand
195199
core::Operand* AppendUnaryOperation(core::Model* model,
@@ -206,6 +210,13 @@ core::Operand* AppendRequantOperation(core::Model* model,
206210
core::Operand* InsertRequantOperation(core::Model* model,
207211
core::Operand* output_operand,
208212
void* input_quant_params);
213+
// Append or insert a softmax operation
214+
core::Operand* AppendSoftmaxOperation(core::Model* model,
215+
core::Operand* input_operand,
216+
int32_t axis);
217+
core::Operand* InsertSoftmaxOperation(core::Model* model,
218+
core::Operand* output_operand,
219+
int32_t axis);
209220

210221
// Sort the operations of the specified model in topological order
211222
std::vector<const core::Operation*> SortOperationsInTopologicalOrder(

lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/optimizer/fix_multiple_outputs_ops.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ void FixMultipleOutputsOps(core::Model* model) {
4141
auto output_operand = output_operands[i];
4242
if (IsModelOutputOperand(output_operand)) {
4343
auto dummy_output_operand =
44-
InsertDummyOperation(model, output_operand);
44+
InsertDummyAddOperation(model, output_operand);
4545
UpdateOperationOutputOperands(
4646
operation, output_operand, dummy_output_operand);
4747
}

lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/optimizer/fix_no_inputs_ops.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@ void FixNoInputsOps(core::Model* model) {
3333
auto output_operand = model->output_operands[i];
3434
if (IsModelOutputOperand(output_operand)) {
3535
output_operand->type.lifetime = NNADAPTER_CONSTANT_COPY;
36-
auto dummy_output_operand = AppendDummyOperation(model, output_operand);
36+
auto dummy_output_operand =
37+
AppendDummyAddOperation(model, output_operand);
3738
UpdateModelOutputOperands(model, output_operand, dummy_output_operand);
3839
}
3940
}

lite/backends/nnadapter/nnadapter/src/driver/huawei_ascend_npu/optimizer/fix_reduce_ops_scalar_output.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ static void ReduceOpsAddDummyOperation(core::Model* model,
3636
auto reduce_all =
3737
axes_size == static_cast<int>(input_operand->type.dimensions.count);
3838
if (!keep_dim && reduce_all && IsModelOutputOperand(output_operand)) {
39-
auto dummy_output_operand = InsertDummyOperation(model, output_operand);
39+
auto dummy_output_operand = InsertDummyAddOperation(model, output_operand);
4040
UpdateOperationOutputOperands(
4141
operation, output_operand, dummy_output_operand);
4242
}

lite/backends/nnadapter/nnadapter/src/driver/huawei_kirin_npu/optimizer/fix_multiple_outputs_ops.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ void FixMultipleOutputsOps(core::Model* model) {
3737
auto output_operand = output_operands[i];
3838
if (IsModelOutputOperand(output_operand)) {
3939
auto dummy_output_operand =
40-
InsertDummyOperation(model, output_operand);
40+
InsertDummyAddOperation(model, output_operand);
4141
UpdateOperationOutputOperands(
4242
operation, output_operand, dummy_output_operand);
4343
}

lite/backends/nnadapter/nnadapter/src/driver/rockchip_npu/optimizer/fix_ops.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,8 @@ static void FixRELUDepthwiseConv2D(core::Model* model,
4747
bool is_depthwise_mode = group != 1 && input_channel_size == group &&
4848
output_channel_size % input_channel_size == 0;
4949
if (is_depthwise_mode) {
50-
auto dummy_output_operand = InsertDummyOperation(model, output_operand);
50+
auto dummy_output_operand =
51+
InsertDummyAddOperation(model, output_operand);
5152
UpdateOperationOutputOperands(
5253
operation, output_operand, dummy_output_operand);
5354
break;

lite/backends/nnadapter/nnadapter/src/optimizer/pattern_matcher.cc

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,42 @@ PatternMatcher::Pattern::IsOperationOutputOperand(NNAdapterOperationType type,
179179
return this;
180180
}
181181

182+
NNADAPTER_EXPORT PatternMatcher::Pattern *
183+
PatternMatcher::Pattern::IsModelInputOperand() {
184+
IsOperand();
185+
conditions.emplace_back([=](const Node *node) {
186+
return nnadapter::IsModelInputOperand(node->operand);
187+
});
188+
return this;
189+
}
190+
191+
NNADAPTER_EXPORT PatternMatcher::Pattern *
192+
PatternMatcher::Pattern::IsModelOutputOperand() {
193+
IsOperand();
194+
conditions.emplace_back([=](const Node *node) {
195+
return nnadapter::IsModelOutputOperand(node->operand);
196+
});
197+
return this;
198+
}
199+
200+
NNADAPTER_EXPORT PatternMatcher::Pattern *
201+
PatternMatcher::Pattern::IsNotModelInputOperand() {
202+
IsOperand();
203+
conditions.emplace_back([=](const Node *node) {
204+
return !nnadapter::IsModelInputOperand(node->operand);
205+
});
206+
return this;
207+
}
208+
209+
NNADAPTER_EXPORT PatternMatcher::Pattern *
210+
PatternMatcher::Pattern::IsNotModelOutputOperand() {
211+
IsOperand();
212+
conditions.emplace_back([=](const Node *node) {
213+
return !nnadapter::IsModelOutputOperand(node->operand);
214+
});
215+
return this;
216+
}
217+
182218
NNADAPTER_EXPORT PatternMatcher::Pattern *PatternMatcher::Pattern::IsOperation(
183219
NNAdapterOperationType type) {
184220
conditions.emplace_back([type](const Node *node) {

lite/backends/nnadapter/nnadapter/src/utility/modeling.cc

Lines changed: 45 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -732,6 +732,7 @@ NNADAPTER_EXPORT core::Operand* InsertReshapeOperation(
732732

733733
core::Operand* AddDummyOperation(core::Model* model,
734734
core::Operand* reference_operand,
735+
NNAdapterOperationType operation_type,
735736
bool after = true) {
736737
auto target_operand = AddOperand(model);
737738
CopyOperandType(&target_operand->type, reference_operand->type);
@@ -754,7 +755,7 @@ core::Operand* AddDummyOperation(core::Model* model,
754755
auto fuse_code_operand = AddInt32ConstantOperand(model, 0);
755756
// Insert a new ADD operation
756757
auto dummy_add_operation = AddOperation(model);
757-
dummy_add_operation->type = NNADAPTER_ADD;
758+
dummy_add_operation->type = operation_type;
758759
dummy_add_operation->input_operands = {
759760
after ? reference_operand : target_operand,
760761
zero_operand,
@@ -764,14 +765,24 @@ core::Operand* AddDummyOperation(core::Model* model,
764765
return target_operand;
765766
}
766767

767-
NNADAPTER_EXPORT core::Operand* AppendDummyOperation(
768+
NNADAPTER_EXPORT core::Operand* AppendDummyAddOperation(
768769
core::Model* model, core::Operand* input_operand) {
769-
return AddDummyOperation(model, input_operand, true);
770+
return AddDummyOperation(model, input_operand, NNADAPTER_ADD, true);
770771
}
771772

772-
NNADAPTER_EXPORT core::Operand* InsertDummyOperation(
773+
NNADAPTER_EXPORT core::Operand* InsertDummyAddOperation(
773774
core::Model* model, core::Operand* output_operand) {
774-
return AddDummyOperation(model, output_operand, false);
775+
return AddDummyOperation(model, output_operand, NNADAPTER_ADD, false);
776+
}
777+
778+
NNADAPTER_EXPORT core::Operand* AppendDummySubOperation(
779+
core::Model* model, core::Operand* input_operand) {
780+
return AddDummyOperation(model, input_operand, NNADAPTER_SUB, true);
781+
}
782+
783+
NNADAPTER_EXPORT core::Operand* InsertDummySubOperation(
784+
core::Model* model, core::Operand* output_operand) {
785+
return AddDummyOperation(model, output_operand, NNADAPTER_SUB, false);
775786
}
776787

777788
core::Operand* AddUnaryOperation(core::Model* model,
@@ -884,6 +895,35 @@ NNADAPTER_EXPORT core::Operand* InsertRequantOperation(
884895
return AddRequantOperation(model, output_operand, input_quant_params, false);
885896
}
886897

898+
core::Operand* AddSoftmaxOperation(core::Model* model,
899+
core::Operand* reference_operand,
900+
int32_t axis = -1,
901+
bool after = true) {
902+
auto target_operand = AddOperand(model);
903+
CopyOperandType(&target_operand->type, reference_operand->type);
904+
if (!IsTemporaryShapeOperand(reference_operand)) {
905+
target_operand->type.lifetime = NNADAPTER_TEMPORARY_VARIABLE;
906+
}
907+
auto softmax_operation = AddOperation(model);
908+
softmax_operation->type = NNADAPTER_SOFTMAX;
909+
auto axis_operand = AddInt32ConstantOperand(model, axis);
910+
softmax_operation->input_operands = {
911+
after ? reference_operand : target_operand, axis_operand};
912+
softmax_operation->output_operands = {after ? target_operand
913+
: reference_operand};
914+
return target_operand;
915+
}
916+
917+
NNADAPTER_EXPORT core::Operand* AppendSoftmaxOperation(
918+
core::Model* model, core::Operand* input_operand, int32_t axis) {
919+
return AddSoftmaxOperation(model, input_operand, axis, true);
920+
}
921+
922+
NNADAPTER_EXPORT core::Operand* InsertSoftmaxOperation(
923+
core::Model* model, core::Operand* output_operand, int32_t axis) {
924+
return AddSoftmaxOperation(model, output_operand, axis, false);
925+
}
926+
887927
#define SORT_OPERATIONS_IN_TOPOLOGICAL_ORDER(T) \
888928
NNADAPTER_EXPORT std::vector<T core::Operation*> \
889929
SortOperationsInTopologicalOrder(T core::Model* model) { \

lite/core/optimizer/mir/elimination/fill_constant_calc_offline_pass.cc

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,8 @@ void FillConstantCalcOfflinePass::RemoveFillConstantPattern(
5656
}
5757
}
5858
if (has_extra_producers) {
59-
LOG(WARNING)
60-
<< "Unsupported for op output var containing multiple producers";
59+
VLOG(5) << "WARNING: Unsupported for op output var containing multiple "
60+
"producers";
6161
continue;
6262
}
6363
std::set<const Node*> nodes2rm_;
@@ -68,24 +68,24 @@ void FillConstantCalcOfflinePass::RemoveFillConstantPattern(
6868
!op_desc->Input("ValueTensor").empty()) ||
6969
(op_desc->HasInput("str_value") &&
7070
!op_desc->GetAttr<std::string>("str_value").empty())) {
71-
LOG(WARNING) << "Unsupported ValueTensor input or str_value input for "
72-
"fill_contant op.";
71+
VLOG(5) << "WARNING: Unsupported ValueTensor input or str_value input "
72+
"for fill_contant op.";
7373
continue;
7474
} else if (!op_desc->HasAttr("value")) {
75-
LOG(WARNING)
76-
<< "One of ValueTensor, str_value(attr) or value(attr) must be set.";
75+
VLOG(5) << "WARNING: One of ValueTensor, str_value(attr) or value(attr) "
76+
"must be set.";
7777
continue;
7878
}
7979
if ((op_desc->HasInput("ShapeTensor") &&
8080
!op_desc->Input("ShapeTensor").empty()) ||
8181
(op_desc->HasInput("ShapeTensorList") &&
8282
!op_desc->Input("ShapeTensorList").empty())) {
83-
LOG(WARNING) << "Unsupported ShapeTensor or ShapeTensorList input for "
84-
"fill_contant op.";
83+
VLOG(5) << "WARNING: Unsupported ShapeTensor or ShapeTensorList input "
84+
"for fill_contant op.";
8585
continue;
8686
} else if (!op_desc->HasAttr("shape")) {
87-
LOG(WARNING)
88-
<< "One of ShapeTensor, ShapeTensorList or shape(attr) must be set.";
87+
VLOG(5) << "WARNING: One of ShapeTensor, ShapeTensorList or shape(attr) "
88+
"must be set.";
8989
continue;
9090
}
9191
// Get fill_constant's attr
@@ -110,7 +110,7 @@ void FillConstantCalcOfflinePass::RemoveFillConstantPattern(
110110
FillConstData<float>(out_t, static_cast<float>(value));
111111
break;
112112
default:
113-
LOG(WARNING) << "Unsupported dtype for fill_constant op: " << dtype;
113+
VLOG(5) << "WARNING: Unsupported dtype for fill_constant op: " << dtype;
114114
continue;
115115
}
116116
// Offline calc fill_constant, only retain output tensor as persistable

0 commit comments

Comments
 (0)