Skip to content

Commit 3df38f5

Browse files
GaoWei8yihuaxu
authored andcommitted
[cherry-pick] Add FC padding, ernie test unit and layernorm parallel (#22198)
* Optimize the kernel implementation of layernorm with openmp (#20895) * Add ernie c++ inference test (#21015) * Add ernie unit test test=develop * Add ernie unit test test=develop * Add ernie unit test test=develop * remove ngraph * optimize gpu test test=develop * optimize codes test=develop * fix cmake fails on inference_download_and_uncompress (#21185) * solve cmake fails on inference_download_and_uncompress test=develop * solve cmake fails on inference_download_and_uncompress test=develop * Add fc padding to improve mkl GEMM's performance when N and K are multiple of 128. (#20972) * Add fc padding to solve mkl performance test=develop * fix gpu pass and error information test=develop * fix fc_fuse_pass_test test=develop * fix error information test=develop * fix error information test=develop * fix name and add fc op padding test test=develop * fix attributes test=develop * optimize fc padding test=develop * fix test test=develop * Polish the codes of fc when needs padding (#21378) test=develop * Add ernie large c++ inference test (#21365) * add ernie-large test test=develop * add ernie large c++ inference test test=develop * Modify padding strategy: remove weight copy in fc padding (#21650) test=develop * optimize fc jit (#21878) test=develop Co-authored-by: Yihua Xu <[email protected]>
1 parent e8e1249 commit 3df38f5

File tree

14 files changed

+545
-156
lines changed

14 files changed

+545
-156
lines changed

paddle/fluid/framework/ir/fc_fuse_pass.cc

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,35 @@ int FCFusePass::ApplyFCPattern(Graph* graph, bool with_relu) const {
8989
std::string activation_type = with_relu ? "relu" : "";
9090
desc.SetAttr("activation_type", activation_type);
9191

92+
// This is to add padding for dimension 128 on concern of MKL performance
93+
auto* scope = param_scope();
94+
auto* weight = scope->FindVar(w->Name())->GetMutable<LoDTensor>();
95+
auto place = weight->place();
96+
bool use_gpu = Get<bool>("use_gpu");
97+
auto* weight_data = weight->data<float>();
98+
auto weight_dims = weight->dims();
99+
int weight_num = product(weight_dims);
100+
int w_h = weight_dims[0];
101+
int w_w = weight_dims[1];
102+
if (!use_gpu) {
103+
if (w_h % 128 == 0 && w_w % 128 == 0) {
104+
auto* weight_data_tmp = new float[weight_num];
105+
for (int i = 0; i < w_h; i++) {
106+
memcpy(weight_data_tmp + i * w_w, weight_data + i * w_w,
107+
w_w * sizeof(float));
108+
}
109+
weight->Resize(DDim{weight_dims[0] + 4, weight_dims[1] + 4});
110+
auto* weight_data_new =
111+
weight->mutable_data<float>(platform::CPUPlace());
112+
for (int i = 0; i < w_h; i++) {
113+
memcpy(weight_data_new + i * (w_w + 4), weight_data_tmp + i * w_w,
114+
w_w * sizeof(float));
115+
}
116+
delete[] weight_data_tmp;
117+
desc.SetAttr("padding_weights", true);
118+
}
119+
}
120+
92121
// For anakin subgraph int8
93122
// When in anakin subgraph int8 mode, the pattern like "fake_quant + mul +
94123
// fake_dequant" can be detected by the quant_dequant_fuse_pass. This pass

paddle/fluid/framework/ir/fc_fuse_pass_tester.cc

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,24 @@ namespace paddle {
2121
namespace framework {
2222
namespace ir {
2323

24+
void AddVarToScope(Scope* param_scope, const std::string& name,
25+
const DDim& dims) {
26+
auto* tensor = param_scope->Var(name)->GetMutable<LoDTensor>();
27+
tensor->Resize(dims);
28+
tensor->mutable_data<float>(platform::CPUPlace());
29+
}
30+
31+
Scope* CreateParamScope() {
32+
auto param_scope = new Scope();
33+
AddVarToScope(param_scope, "conv2d_filters_0", {});
34+
AddVarToScope(param_scope, "conv2d_bias_0", {});
35+
AddVarToScope(param_scope, "weights_0", {});
36+
AddVarToScope(param_scope, "weights_1", {});
37+
AddVarToScope(param_scope, "bias_1", {});
38+
AddVarToScope(param_scope, "bias_2", {});
39+
return param_scope;
40+
}
41+
2442
TEST(FCFusePass, basic) {
2543
// inputs operator output
2644
// --------------------------------------------------------
@@ -50,6 +68,8 @@ TEST(FCFusePass, basic) {
5068

5169
std::unique_ptr<ir::Graph> graph(new ir::Graph(layers.main_program()));
5270
auto pass = PassRegistry::Instance().Get("fc_fuse_pass");
71+
pass->Set("use_gpu", new bool(true));
72+
graph->Set("__param_scope__", CreateParamScope());
5373
int num_nodes_before = graph->Nodes().size();
5474
int num_mul_nodes_before = GetNumOpNodes(graph, "mul");
5575
VLOG(3) << DebugString(graph);

paddle/fluid/inference/tests/api/CMakeLists.txt

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,14 @@ function(download_model_and_data install_dir model_name data_name)
2727
download_data(${install_dir} ${data_name})
2828
endfunction()
2929

30+
function(download_result install_dir result_name)
31+
download_data(${install_dir} ${result_name})
32+
endfunction()
33+
3034
function(inference_analysis_api_test target install_dir filename)
3135
inference_analysis_test(${target} SRCS ${filename}
3236
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} benchmark
33-
ARGS --infer_model=${install_dir}/model --infer_data=${install_dir}/data.txt)
37+
ARGS --infer_model=${install_dir}/model --infer_data=${install_dir}/data.txt --refer_result=${install_dir}/result.txt)
3438
endfunction()
3539

3640
function(inference_analysis_api_test_build TARGET_NAME filename)
@@ -72,13 +76,6 @@ function(inference_analysis_api_test_with_fake_data_run TARGET_NAME test_binary
7276
--disable_mkldnn_fc=${disable_fc})
7377
endfunction()
7478

75-
function(inference_analysis_api_test_with_refer_result target install_dir filename)
76-
inference_analysis_test(${target} SRCS ${filename}
77-
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS}
78-
ARGS --infer_model=${install_dir}/model --infer_data=${install_dir}/data.txt
79-
--refer_result=${install_dir}/result.txt)
80-
endfunction()
81-
8279
function(inference_analysis_api_qat_test_run TARGET_NAME test_binary fp32_model_dir int8_model_dir data_path)
8380
inference_analysis_test_run(${TARGET_NAME}
8481
COMMAND ${test_binary}
@@ -147,6 +144,20 @@ set(PYRAMID_DNN_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/pyramid_dnn")
147144
download_model_and_data(${PYRAMID_DNN_INSTALL_DIR} "PyramidDNN_model.tar.gz" "PyramidDNN_data.txt.tar.gz")
148145
inference_analysis_api_test(test_analyzer_pyramid_dnn ${PYRAMID_DNN_INSTALL_DIR} analyzer_pyramid_dnn_tester.cc)
149146

147+
#Ernie
148+
set(ERNIE_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/Ernie")
149+
download_model_and_data(${ERNIE_INSTALL_DIR} "Ernie_model.tar.gz" "Ernie_data.txt.tar.gz" "Ernie_result.txt.tar.gz")
150+
download_result(${ERNIE_INSTALL_DIR} "Ernie_result.txt.tar.gz")
151+
inference_analysis_api_test(test_analyzer_ernie ${ERNIE_INSTALL_DIR} analyzer_ernie_tester.cc)
152+
153+
#Ernie large
154+
set(ERNIE_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/Ernie_Large")
155+
download_model_and_data(${ERNIE_INSTALL_DIR} "Ernie_large_model.tar.gz" "Ernie_large_data.txt.tar.gz" "Ernie_large_result.txt.tar.gz")
156+
download_result(${ERNIE_INSTALL_DIR} "Ernie_large_result.txt.tar.gz")
157+
inference_analysis_test(test_analyzer_ernie_large SRCS analyzer_ernie_tester.cc
158+
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} benchmark
159+
ARGS --infer_model=${ERNIE_INSTALL_DIR}/model --infer_data=${ERNIE_INSTALL_DIR}/data.txt --refer_result=${ERNIE_INSTALL_DIR}/result.txt --ernie_large=true)
160+
150161
# text_classification
151162
set(TEXT_CLASSIFICATION_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/text_classification")
152163
download_model_and_data(${TEXT_CLASSIFICATION_INSTALL_DIR} "text-classification-Senta.tar.gz" "text_classification_data.txt.tar.gz")
@@ -170,14 +181,14 @@ set(OCR_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/ocr")
170181
if (NOT EXISTS ${OCR_INSTALL_DIR})
171182
inference_download_and_uncompress(${OCR_INSTALL_DIR} "http://paddlemodels.bj.bcebos.com/" "inference-vis-demos%2Focr.tar.gz")
172183
endif()
173-
inference_analysis_api_test_with_refer_result(test_analyzer_ocr ${OCR_INSTALL_DIR} analyzer_vis_tester.cc)
184+
inference_analysis_api_test(test_analyzer_ocr ${OCR_INSTALL_DIR} analyzer_vis_tester.cc)
174185

175186
# mobilenet with transpose op
176187
set(MOBILENET_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/mobilenet")
177188
if (NOT EXISTS ${MOBILENET_INSTALL_DIR})
178189
inference_download_and_uncompress(${MOBILENET_INSTALL_DIR} "http://paddlemodels.bj.bcebos.com/" "inference-vis-demos%2Fmobilenet.tar.gz")
179190
endif()
180-
inference_analysis_api_test_with_refer_result(test_analyzer_mobilenet_transpose ${MOBILENET_INSTALL_DIR} analyzer_vis_tester.cc)
191+
inference_analysis_api_test(test_analyzer_mobilenet_transpose ${MOBILENET_INSTALL_DIR} analyzer_vis_tester.cc)
181192

182193
### Image classification tests with fake data
183194
set(IMG_CLASS_TEST_APP "test_analyzer_image_classification")
@@ -334,13 +345,9 @@ inference_analysis_test(test_analyzer_capi SRCS analyzer_capi_tester.cc
334345
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_fluid_c
335346
ARGS --infer_model=${RESNET50_MODEL_DIR}/model)
336347

337-
set(CAPI_MODEL_INSTALL_PD_DIR "${INFERENCE_DEMO_INSTALL_DIR}/capi_mobilenet")
338-
if (NOT EXISTS ${CAPI_MODEL_INSTALL_PD_DIR})
339-
inference_download_and_uncompress(${CAPI_MODEL_INSTALL_PD_DIR} "http://paddlemodels.bj.bcebos.com/" "inference-vis-demos%2Fmobilenet.tar.gz")
340-
endif()
341348
inference_analysis_test(test_analyzer_capi_pd_tensor SRCS analyzer_capi_pd_tensor_tester.cc
342349
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_fluid_c
343-
ARGS --infer_model=${CAPI_MODEL_INSTALL_PD_DIR}/model)
350+
ARGS --infer_model=${MOBILENET_INSTALL_DIR}/model)
344351

345352
if(WITH_MKLDNN)
346353
inference_analysis_test(test_analyzer_capi_int SRCS analyzer_capi_int_tester.cc

paddle/fluid/inference/tests/api/analyzer_bert_tester.cc

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,6 @@ void profile(bool use_mkldnn = false, bool use_ngraph = false) {
153153

154154
if (use_mkldnn) {
155155
config.EnableMKLDNN();
156-
config.pass_builder()->AppendPass("fc_mkldnn_pass");
157156
}
158157

159158
if (use_ngraph) {
@@ -193,7 +192,6 @@ void compare(bool use_mkldnn = false, bool use_ngraph = false) {
193192
SetConfig(&cfg);
194193
if (use_mkldnn) {
195194
cfg.EnableMKLDNN();
196-
cfg.pass_builder()->AppendPass("fc_mkldnn_pass");
197195
}
198196

199197
if (use_ngraph) {

0 commit comments

Comments
 (0)