16
16
#include < gflags/gflags.h>
17
17
#include < glog/logging.h> // use glog instead of PADDLE_ENFORCE to avoid importing other paddle header files.
18
18
#include < gtest/gtest.h>
19
+ #include < fstream>
19
20
#include " paddle/fluid/framework/ir/pass.h"
20
21
#include " paddle/fluid/inference/analysis/ut_helper.h"
22
+ #include " paddle/fluid/inference/api/helper.h"
21
23
#include " paddle/fluid/inference/api/paddle_inference_api.h"
22
24
#include " paddle/fluid/inference/api/paddle_inference_pass.h"
23
25
#include " paddle/fluid/inference/api/timer.h"
@@ -26,6 +28,7 @@ DEFINE_string(infer_model, "", "Directory of the inference model.");
26
28
DEFINE_string (infer_data, " " , " Path of the dataset." );
27
29
DEFINE_int32 (batch_size, 1 , " batch size." );
28
30
DEFINE_int32 (repeat, 1 , " How many times to repeat run." );
31
+ DEFINE_int32 (topn, -1 , " Run top n batches of data to save time" );
29
32
30
33
namespace paddle {
31
34
@@ -45,41 +48,67 @@ void PrintTime(const double latency, const int bs, const int repeat) {
45
48
LOG (INFO) << " =====================================" ;
46
49
}
47
50
48
- void Main (int batch_size) {
49
- // Three sequence inputs.
50
- std::vector<PaddleTensor> input_slots (1 );
51
- // one batch starts
52
- // data --
53
- int64_t data0[] = {0 , 1 , 2 };
54
- for (auto &input : input_slots) {
55
- input.data .Reset (data0, sizeof (data0));
56
- input.shape = std::vector<int >({3 , 1 });
57
- // dtype --
58
- input.dtype = PaddleDType::INT64;
59
- // LoD --
60
- input.lod = std::vector<std::vector<size_t >>({{0 , 3 }});
51
+ struct DataReader {
52
+ DataReader (const std::string &path) : file(new std::ifstream(path)) {}
53
+
54
+ bool NextBatch (PaddleTensor *tensor, int batch_size) {
55
+ PADDLE_ENFORCE_EQ (batch_size, 1 );
56
+ std::string line;
57
+ tensor->lod .clear ();
58
+ tensor->lod .emplace_back (std::vector<size_t >({0 }));
59
+ std::vector<int64_t > data;
60
+
61
+ for (int i = 0 ; i < batch_size; i++) {
62
+ if (!std::getline (*file, line)) return false ;
63
+ inference::split_to_int64 (line, ' ' , &data);
64
+ }
65
+ tensor->lod .front ().push_back (data.size ());
66
+
67
+ tensor->data .Resize (data.size () * sizeof (int64_t ));
68
+ memcpy (tensor->data .data (), data.data (), data.size () * sizeof (int64_t ));
69
+ tensor->shape .clear ();
70
+ tensor->shape .push_back (data.size ());
71
+ tensor->shape .push_back (1 );
72
+ return true ;
61
73
}
62
74
75
+ std::unique_ptr<std::ifstream> file;
76
+ };
77
+
78
+ void Main (int batch_size) {
63
79
// shape --
64
80
// Create Predictor --
65
81
AnalysisConfig config;
66
82
config.model_dir = FLAGS_infer_model;
67
83
config.use_gpu = false ;
68
84
config.enable_ir_optim = true ;
69
- config.ir_passes .push_back (" fc_lstm_fuse_pass" );
70
85
auto predictor =
71
86
CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis >(
72
87
config);
73
88
89
+ std::vector<PaddleTensor> input_slots (1 );
90
+ // one batch starts
91
+ // data --
92
+ auto &input = input_slots[0 ];
93
+ input.dtype = PaddleDType::INT64;
94
+
74
95
inference::Timer timer;
75
96
double sum = 0 ;
76
97
std::vector<PaddleTensor> output_slots;
77
- for (int i = 0 ; i < FLAGS_repeat; i++) {
78
- timer.tic ();
79
- CHECK (predictor->Run (input_slots, &output_slots));
80
- sum += timer.toc ();
98
+
99
+ int num_batches = 0 ;
100
+ for (int t = 0 ; t < FLAGS_repeat; t++) {
101
+ DataReader reader (FLAGS_infer_data);
102
+ while (reader.NextBatch (&input, FLAGS_batch_size)) {
103
+ if (FLAGS_topn > 0 && num_batches > FLAGS_topn) break ;
104
+ timer.tic ();
105
+ CHECK (predictor->Run (input_slots, &output_slots));
106
+ sum += timer.toc ();
107
+ ++num_batches;
108
+ }
81
109
}
82
- PrintTime (sum, batch_size, FLAGS_repeat);
110
+
111
+ PrintTime (sum, batch_size, num_batches);
83
112
84
113
// Get output
85
114
LOG (INFO) << " get outputs " << output_slots.size ();
0 commit comments