@@ -16,75 +16,108 @@ using namespace std;
1616static void @FUNC_NAME@(benchmark::State& state, string model_path)
1717{
1818 Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "benchmark");
19-
19+
2020 Ort::SessionOptions session_options;
2121 session_options.SetIntraOpNumThreads(1);
2222 session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
2323
2424 //std::cout << "benchmarking model " << model_path << std::endl;
2525 Ort::Session session(env, model_path.c_str(), session_options);
2626
27- vector<const char*> input_node_names(1);
28- vector<const char*> output_node_names(1);
29-
27+ int nin = session.GetInputCount();
28+ int nout = 1;
29+
30+ vector<const char*> input_node_names(nin);
31+ vector<const char*> output_node_names(nout);
32+
3033 Ort::AllocatorWithDefaultOptions allocator;
31- input_node_names[0] = session.GetInputName(0, allocator);
32- output_node_names[0] = session.GetOutputName(0, allocator);
34+ for (int i = 0; i < nin; i++)
35+ input_node_names[i] = session.GetInputName(i, allocator);
36+ for (int i = 0; i < nout; i++)
37+ output_node_names[i] = session.GetOutputName(i, allocator);
3338
3439 // Getting the shapes
40+ vector<vector<int64_t>> input_node_dims(nin);
41+ vector<vector<int64_t>> output_node_dims(nout);
42+
43+ for (int i = 0; i < nin; i++)
44+ input_node_dims[i] = session.GetInputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape();
45+ for (int i = 0; i < nout; i++)
46+ output_node_dims[i] = session.GetOutputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape();
47+
48+ for (int i = 0; i < nin; i++) {
49+ std::cout << "input " << input_node_names[i] << " shape : ";
50+ for (int j = 0; j < input_node_dims[i].size(); j++)
51+ std::cout << " " << input_node_dims[i][j];
52+ std::cout << std::endl;
53+ }
54+ // fix negative shapes
55+ for (int i = 0; i < nin; i++) {
56+ for (int j = 0; j < input_node_dims[i].size(); j++) {
57+ if (input_node_dims[i][j] < 0) input_node_dims[i][j] = - input_node_dims[i][j];
58+ }
59+ }
3560
36- vector<int64_t> input_node_dims = session
37- .GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();
38- vector<int64_t> output_node_dims = session
39- .GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();
4061
4162 // Calculating the dimension of the input tensor
4263 int nevts = 64;
43- int bsize = input_node_dims[0];
64+ int bsize = input_node_dims[0][0]; // assume this
4465 //std::cout << "Using bsize = " << bsize << std::endl;
4566 int nbatches = nevts / bsize;
4667
47- size_t input_tensor_size = accumulate(input_node_dims.begin(),
48- input_node_dims.end(), 1, multiplies<int>());
49- vector<float> input_tensor_values(input_tensor_size*nbatches);
50- //std::cout << "input tensor size " << input_tensor_size << " " << input_tensor_values.size() << std::endl;
51-
52- // Input tensor initialization
53- static std::uniform_real_distribution<float> distribution(-1,1);
54- static std::default_random_engine generator;
55- std::generate(input_tensor_values.begin(), input_tensor_values.end(), []() { return distribution(generator); });
56- //fill_n(input_tensor_values.begin(), input_tensor_size, 1.0);
57-
58- auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
59- // Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info,
60- // input_tensor_values.data(), input_tensor_size,
61- // input_node_dims.data(), input_node_dims.size());
62-
63- // Running the model
64- float * floatarr = nullptr;
65-
66- double totDuration = 0;
67- int ntimes = 0;
68- for (auto _ : state) {
69- auto t1 = std::chrono::high_resolution_clock::now();
70- size_t input_offset = 0;
71- for (int i = 0; i < nevts; i += bsize) {
72- // if (input_offset > input_tensor_values.size()) {
73- // std::cout << "Error in input size " << i << " " << nevts << " " << model_path << std::endl;
74- // throw std::runtime_error("Bad input size ");
75- // }
76- Ort::Value input_tensor = Ort::Value::CreateTensor<float>(
77- memory_info, input_tensor_values.data()+input_offset, input_tensor_size, input_node_dims.data(), input_node_dims.size());
78- auto output_tensors = session.Run(Ort::RunOptions{nullptr}, input_node_names.data(), &input_tensor, 1,
79- output_node_names.data(), 1);
80- floatarr = output_tensors.front().GetTensorMutableData<float>();
81- input_offset += input_tensor_size;
82- }
68+ std::vector<std::vector<float>> inputData(nin);
69+ std::vector<size_t> inputSizes(nin);
70+
71+ for (int i = 0; i < nin; i++) {
72+ size_t input_tensor_size = accumulate(input_node_dims[i].begin(), input_node_dims[i].end(), 1, multiplies<int>());
73+ inputSizes[i] = input_tensor_size;
74+ auto &input_tensor_values = inputData[i];
75+ input_tensor_values.resize(input_tensor_size * nbatches);
76+ // std::cout << "input tensor size " << input_tensor_size << " " << input_tensor_values.size() << std::endl;
77+
78+ // Input tensor initialization
79+ static std::uniform_real_distribution<float> distribution(-1, 1);
80+ static std::default_random_engine generator;
81+ std::generate(input_tensor_values.begin(), input_tensor_values.end(), []() { return distribution(generator); });
82+ // fill_n(input_tensor_values.begin(), input_tensor_size, 1.0);
83+ }
8384
84- auto t2 = std::chrono::high_resolution_clock::now();
85- auto duration = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count();
86- totDuration += duration / 1.E3; // in milliseconds
87- ntimes++;
85+ auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
86+ // Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info,
87+ // input_tensor_values.data(), input_tensor_size,
88+ // input_node_dims.data(), input_node_dims.size());
89+
90+ // Running the model
91+ float *floatarr = nullptr;
92+
93+ std::vector<Ort::Value> input_tensors;
94+
95+ double totDuration = 0;
96+ int ntimes = 0;
97+ for (auto _ : state) {
98+ auto t1 = std::chrono::high_resolution_clock::now();
99+ std::vector<size_t> input_offset(nin);
100+ for (int i = 0; i < nevts; i += bsize) {
101+ // if (input_offset > input_tensor_values.size()) {
102+ // std::cout << "Error in input size " << i << " " << nevts << " " << model_path << std::endl;
103+ // throw std::runtime_error("Bad input size ");
104+ // }
105+ for (int k = 0; k < nin; k++) {
106+ input_tensors.emplace_back(Ort::Value::CreateTensor<float>(memory_info, inputData[k].data() + input_offset[k],
107+ inputSizes[k], input_node_dims[k].data(), input_node_dims[k].size()));
108+ }
109+ auto output_tensors = session.Run(Ort::RunOptions{nullptr}, input_node_names.data(), input_tensors.data(), nin,
110+ output_node_names.data(), nout);
111+ floatarr = output_tensors.front().GetTensorMutableData<float>();
112+ for (int k = 0; k < nin; k++) {
113+ input_offset[k] += inputSizes[k];
114+ }
115+ }
116+
117+ auto t2 = std::chrono::high_resolution_clock::now();
118+ auto duration = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count();
119+ totDuration += duration / 1.E3; // in milliseconds
120+ ntimes++;
88121 }
89122 //for (int i = 0; i < 10; i++)
90123 // printf("%f\t", i, floatarr[i]);
0 commit comments