Skip to content

Commit 44cb70c

Browse files
committed
Merge remote-tracking branch 'ups/develop' into fix/mac
2 parents c9730d3 + 2466ca1 commit 44cb70c

16 files changed

+177
-126
lines changed

paddle/fluid/framework/details/execution_strategy.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
// limitations under the License.
1414

1515
#pragma once
16+
#include <cstddef> // for size_t
1617

1718
namespace paddle {
1819
namespace framework {
@@ -26,6 +27,7 @@ struct ExecutionStrategy {
2627
bool allow_op_delay_{false};
2728
size_t num_iteration_per_drop_scope_{100};
2829
ExecutorType type_{kDefault};
30+
bool dry_run_{false};
2931
};
3032

3133
} // namespace details

paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,9 @@ void FastThreadedSSAGraphExecutor::RunOpAsync(
128128
size_t complete = 0;
129129
while (op_to_run != nullptr) {
130130
try {
131-
op_to_run->Run(strategy_.use_cuda_);
131+
if (LIKELY(!strategy_.dry_run_)) {
132+
op_to_run->Run(strategy_.use_cuda_);
133+
}
132134
++complete;
133135
} catch (...) {
134136
exception_.Catch(std::current_exception());

paddle/fluid/framework/details/threaded_ssa_graph_executor.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,9 @@ void ThreadedSSAGraphExecutor::RunOp(
211211
if (VLOG_IS_ON(10)) {
212212
VLOG(10) << op << " " << op->Name() << " : " << op->DebugString();
213213
}
214-
op->Run(strategy_.use_cuda_);
214+
if (LIKELY(!strategy_.dry_run_)) {
215+
op->Run(strategy_.use_cuda_);
216+
}
215217
VLOG(10) << op << " " << op->Name() << " Done ";
216218
running_ops_--;
217219
ready_var_q->Extend(op->Outputs());

paddle/fluid/framework/details/threaded_ssa_graph_executor.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor {
4848
// Use topological sort algorithm
4949
FeedFetchList Run(const std::vector<std::string> &fetch_tensors) override;
5050

51-
~ThreadedSSAGraphExecutor() {}
51+
~ThreadedSSAGraphExecutor() final = default;
5252

5353
private:
5454
void RunOp(const std::shared_ptr<BlockingQueue<VarHandleBase *>> &ready_var_q,

paddle/fluid/framework/parallel_executor.cc

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,20 @@ class ParallelExecutorPrivate {
3838
explicit ParallelExecutorPrivate(const std::vector<platform::Place> &places)
3939
: places_(places) {}
4040

41+
~ParallelExecutorPrivate() {
42+
if (own_local_scope_) {
43+
for (size_t i = 1; i < local_scopes_.size(); ++i) {
44+
// Skip the first scope, since it is the global scope.
45+
Scope *local_scope = local_scopes_[i];
46+
if (global_scope_->HasKid(local_scope)) {
47+
global_scope_->DeleteScope(local_scope);
48+
}
49+
}
50+
}
51+
}
4152
std::vector<platform::Place> places_;
4253
std::vector<Scope *> local_scopes_;
43-
Scope *global_scope_;
54+
Scope *global_scope_; // not owned
4455
std::unique_ptr<details::SSAGraphExecutor> executor_;
4556

4657
#ifdef PADDLE_WITH_CUDA
@@ -306,16 +317,6 @@ ParallelExecutor::~ParallelExecutor() {
306317
for (auto &p : member_->places_) {
307318
platform::DeviceContextPool::Instance().Get(p)->Wait();
308319
}
309-
310-
if (member_->own_local_scope_) {
311-
for (size_t i = 1; i < member_->local_scopes_.size(); ++i) {
312-
Scope *local_scope = member_->local_scopes_[i];
313-
if (member_->global_scope_->HasKid(local_scope)) {
314-
member_->global_scope_->DeleteScope(local_scope);
315-
}
316-
}
317-
}
318-
319320
// member_ must be destructed before gcs_ since the destructor of
320321
// ReferenceCountOpHandle use raw pointers of gcs_ inside.
321322
member_.reset();

paddle/fluid/inference/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
if(WITH_TESTING)
2-
include(test.cmake) # some generic cmake funtion for inference
2+
include(tests/test.cmake) # some generic cmake funtion for inference
33
endif()
44
# analysis and tensorrt must be added before creating static library,
55
# otherwise, there would be undefined reference to them in static library.

paddle/fluid/inference/tests/api/CMakeLists.txt

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
set(INFERENCE_EXTRA_DEPS paddle_inference_api paddle_fluid_api ir_pass_manager analysis_predictor)
22

3+
function(download_model install_dir model_name)
4+
if (NOT EXISTS ${install_dir})
5+
inference_download_and_uncompress(${install_dir} ${INFERENCE_URL} ${model_name})
6+
endif()
7+
endfunction()
8+
39
function(download_model_and_data install_dir model_name data_name)
410
if (NOT EXISTS ${install_dir})
511
inference_download_and_uncompress(${install_dir} ${INFERENCE_URL} ${model_name})
@@ -13,6 +19,13 @@ function(inference_analysis_api_test target install_dir filename)
1319
ARGS --infer_model=${install_dir}/model --infer_data=${install_dir}/data.txt)
1420
endfunction()
1521

22+
function(inference_analysis_api_test_with_fake_data target install_dir filename model_name)
23+
download_model(${install_dir} ${model_name})
24+
inference_analysis_test(${target} SRCS ${filename}
25+
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS}
26+
ARGS --infer_model=${install_dir}/model)
27+
endfunction()
28+
1629
# RNN1
1730
if(NOT APPLE)
1831
set(RNN1_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/rnn1")
@@ -61,17 +74,13 @@ inference_analysis_api_test(test_analyzer_seq_conv1 ${SEQ_CONV1_INSTALL_DIR} ana
6174
# ocr
6275
set(OCR_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/ocr")
6376
if (NOT EXISTS ${OCR_INSTALL_DIR})
64-
inference_download_and_uncompress(${OCR_INSTALL_DIR} "http://paddlemodels.cdn.bcebos.com/" "inference-vis-demos%2Focr.tar.gz")
77+
inference_download_and_uncompress(${OCR_INSTALL_DIR} "http://paddlemodels.cdn.bcebos.com/" "inference-vis-demos%2Focr.tar.gz")
6578
endif()
6679
inference_analysis_api_test(test_analyzer_ocr ${OCR_INSTALL_DIR} analyzer_vis_tester.cc)
6780

6881
# resnet50
69-
set(RESNET50_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/resnet50")
70-
if (NOT EXISTS ${RESNET50_INSTALL_DIR})
71-
inference_download_and_uncompress(${RESNET50_INSTALL_DIR} ${INFERENCE_URL} "resnet50_model.tar.gz")
72-
endif()
73-
inference_analysis_test(test_analyzer_resnet50 SRCS analyzer_resnet50_tester.cc
74-
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} ARGS --infer_model=${RESNET50_INSTALL_DIR}/model)
82+
inference_analysis_api_test_with_fake_data(test_analyzer_resnet50
83+
"${INFERENCE_DEMO_INSTALL_DIR}/resnet50" analyzer_resnet50_tester.cc "resnet50_model.tar.gz")
7584

7685
# anakin
7786
if (WITH_ANAKIN AND WITH_MKL) # only needed in CI

paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc

Lines changed: 2 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -30,25 +30,7 @@ void SetConfig(AnalysisConfig *cfg) {
3030
}
3131

3232
void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
33-
PADDLE_ENFORCE_EQ(FLAGS_test_all_data, 0, "Only have single batch of data.");
34-
35-
PaddleTensor input;
36-
// channel=3, height/width=318
37-
std::vector<int> shape({FLAGS_batch_size, 3, 318, 318});
38-
input.shape = shape;
39-
input.dtype = PaddleDType::FLOAT32;
40-
41-
// fill input data, for profile easily, do not use random data here.
42-
size_t size = FLAGS_batch_size * 3 * 318 * 318;
43-
input.data.Resize(size * sizeof(float));
44-
float *input_data = static_cast<float *>(input.data.data());
45-
for (size_t i = 0; i < size; i++) {
46-
*(input_data + i) = static_cast<float>(i) / size;
47-
}
48-
49-
std::vector<PaddleTensor> input_slots;
50-
input_slots.assign({input});
51-
(*inputs).emplace_back(input_slots);
33+
SetFakeImageInput(inputs, FLAGS_infer_model);
5234
}
5335

5436
// Easy for profiling independently.
@@ -61,13 +43,6 @@ void profile(bool use_mkldnn = false) {
6143
std::vector<std::vector<PaddleTensor>> input_slots_all;
6244
SetInput(&input_slots_all);
6345
TestPrediction(cfg, input_slots_all, &outputs, FLAGS_num_threads);
64-
65-
if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) {
66-
PADDLE_ENFORCE_EQ(outputs.size(), 1UL);
67-
size_t size = GetSize(outputs[0]);
68-
// output is a 512-dimension feature
69-
EXPECT_EQ(size, 512 * FLAGS_batch_size);
70-
}
7146
}
7247

7348
TEST(Analyzer_resnet50, profile) { profile(); }
@@ -83,8 +58,7 @@ TEST(Analyzer_resnet50, fuse_statis) {
8358
auto predictor = CreatePaddlePredictor<AnalysisConfig>(cfg);
8459
auto fuse_statis = GetFuseStatis(
8560
static_cast<AnalysisPredictor *>(predictor.get()), &num_ops);
86-
ASSERT_TRUE(fuse_statis.count("fc_fuse"));
87-
EXPECT_EQ(fuse_statis.at("fc_fuse"), 1);
61+
LOG(INFO) << "num_ops: " << num_ops;
8862
}
8963

9064
// Compare result of NativeConfig and AnalysisConfig

paddle/fluid/inference/tests/api/tester_helper.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "paddle/fluid/inference/api/analysis_predictor.h"
2626
#include "paddle/fluid/inference/api/helper.h"
2727
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
28+
#include "paddle/fluid/inference/tests/test_helper.h"
2829
#include "paddle/fluid/platform/profiler.h"
2930

3031
DEFINE_string(infer_model, "", "model path");
@@ -105,6 +106,34 @@ std::unordered_map<std::string, int> GetFuseStatis(PaddlePredictor *predictor,
105106
return fuse_statis;
106107
}
107108

109+
void SetFakeImageInput(std::vector<std::vector<PaddleTensor>> *inputs,
110+
const std::string &dirname) {
111+
// Set fake_image_data
112+
PADDLE_ENFORCE_EQ(FLAGS_test_all_data, 0, "Only have single batch of data.");
113+
std::vector<std::vector<int64_t>> feed_target_shapes =
114+
GetFeedTargetShapes(dirname, true, "model", "params");
115+
int dim1 = feed_target_shapes[0][1];
116+
int dim2 = feed_target_shapes[0][2];
117+
int dim3 = feed_target_shapes[0][3];
118+
119+
PaddleTensor input;
120+
std::vector<int> shape({FLAGS_batch_size, dim1, dim2, dim3});
121+
input.shape = shape;
122+
input.dtype = PaddleDType::FLOAT32;
123+
124+
// fill input data, for profile easily, do not use random data here.
125+
size_t size = FLAGS_batch_size * dim1 * dim2 * dim3;
126+
input.data.Resize(size * sizeof(float));
127+
float *input_data = static_cast<float *>(input.data.data());
128+
for (size_t i = 0; i < size; i++) {
129+
*(input_data + i) = static_cast<float>(i) / size;
130+
}
131+
132+
std::vector<PaddleTensor> input_slots;
133+
input_slots.assign({input});
134+
(*inputs).emplace_back(input_slots);
135+
}
136+
108137
void TestOneThreadPrediction(
109138
const AnalysisConfig &config,
110139
const std::vector<std::vector<PaddleTensor>> &inputs,

0 commit comments

Comments
 (0)