Skip to content

Commit 93663e3

Browse files
committed
Add benchmark on CMS onnx file (DDB_B1.onnx) used with batch size=1
Modifiy noth ONNXRuntimeinference and SOFIEINference to support models with multiple inputs
1 parent 3ff625d commit 93663e3

File tree

4 files changed

+140
-58
lines changed

4 files changed

+140
-58
lines changed

root/tmva/sofie/CMakeLists.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -190,8 +190,8 @@ add_dependencies(RDF_SOFIE_Inference SofieCompileModels)
190190
#if (ROOT_PLATFORM MATCHES "linux|macosx" AND CMAKE_SYSTEM_PROCESSOR MATCHES x86_64 AND CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
191191
## assume we run only on linux/macos with gnu or gcc
192192
set(gnu-flags $<$<CXX_COMPILER_ID:GNU>:-fno-signaling-nans>)
193-
target_compile_options(SOFIEInference PRIVATE -mavx2 ${gnu-flags} -fno-trapping-math -O3)
194-
target_compile_options(RDF_SOFIE_Inference PRIVATE -mavx2 ${gnu-flags} -fno-trapping-math -O3)
193+
target_compile_options(SOFIEInference PRIVATE ${gnu-flags} -fno-trapping-math -O3)
194+
target_compile_options(RDF_SOFIE_Inference PRIVATE ${gnu-flags} -fno-trapping-math -O3)
195195
#endif()
196196

197197
endif() # endif blas
@@ -230,4 +230,4 @@ if (ONNXRuntime_FOUND)
230230
)
231231
target_link_directories(RDF_ONNXRuntime_Inference PRIVATE ${ONNXRuntime_LIBRARIES})
232232
target_include_directories(RDF_ONNXRuntime_Inference PRIVATE ${ONNXRuntime_INCLUDE_DIR})
233-
endif()
233+
endif()

root/tmva/sofie/ONNXRuntimeInference_Template.cxx.in

Lines changed: 84 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -16,75 +16,108 @@ using namespace std;
1616
static void @FUNC_NAME@(benchmark::State& state, string model_path)
1717
{
1818
Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "benchmark");
19-
19+
2020
Ort::SessionOptions session_options;
2121
session_options.SetIntraOpNumThreads(1);
2222
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
2323

2424
//std::cout << "benchmarking model " << model_path << std::endl;
2525
Ort::Session session(env, model_path.c_str(), session_options);
2626

27-
vector<const char*> input_node_names(1);
28-
vector<const char*> output_node_names(1);
29-
27+
int nin = session.GetInputCount();
28+
int nout = 1;
29+
30+
vector<const char*> input_node_names(nin);
31+
vector<const char*> output_node_names(nout);
32+
3033
Ort::AllocatorWithDefaultOptions allocator;
31-
input_node_names[0] = session.GetInputName(0, allocator);
32-
output_node_names[0] = session.GetOutputName(0, allocator);
34+
for (int i = 0; i < nin; i++)
35+
input_node_names[i] = session.GetInputName(i, allocator);
36+
for (int i = 0; i < nout; i++)
37+
output_node_names[i] = session.GetOutputName(i, allocator);
3338

3439
// Getting the shapes
40+
vector<vector<int64_t>> input_node_dims(nin);
41+
vector<vector<int64_t>> output_node_dims(nout);
42+
43+
for (int i = 0; i < nin; i++)
44+
input_node_dims[i] = session.GetInputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape();
45+
for (int i = 0; i < nout; i++)
46+
output_node_dims[i] = session.GetOutputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape();
47+
48+
for (int i = 0; i < nin; i++) {
49+
std::cout << "input " << input_node_names[i] << " shape : ";
50+
for (int j = 0; j < input_node_dims[i].size(); j++)
51+
std::cout << " " << input_node_dims[i][j];
52+
std::cout << std::endl;
53+
}
54+
// fix negative shapes
55+
for (int i = 0; i < nin; i++) {
56+
for (int j = 0; j < input_node_dims[i].size(); j++) {
57+
if (input_node_dims[i][j] < 0) input_node_dims[i][j] = - input_node_dims[i][j];
58+
}
59+
}
3560

36-
vector<int64_t> input_node_dims = session
37-
.GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();
38-
vector<int64_t> output_node_dims = session
39-
.GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();
4061

4162
// Calculating the dimension of the input tensor
4263
int nevts = 64;
43-
int bsize = input_node_dims[0];
64+
int bsize = input_node_dims[0][0]; // assume this
4465
//std::cout << "Using bsize = " << bsize << std::endl;
4566
int nbatches = nevts / bsize;
4667

47-
size_t input_tensor_size = accumulate(input_node_dims.begin(),
48-
input_node_dims.end(), 1, multiplies<int>());
49-
vector<float> input_tensor_values(input_tensor_size*nbatches);
50-
//std::cout << "input tensor size " << input_tensor_size << " " << input_tensor_values.size() << std::endl;
51-
52-
// Input tensor initialization
53-
static std::uniform_real_distribution<float> distribution(-1,1);
54-
static std::default_random_engine generator;
55-
std::generate(input_tensor_values.begin(), input_tensor_values.end(), []() { return distribution(generator); });
56-
//fill_n(input_tensor_values.begin(), input_tensor_size, 1.0);
57-
58-
auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
59-
// Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info,
60-
// input_tensor_values.data(), input_tensor_size,
61-
// input_node_dims.data(), input_node_dims.size());
62-
63-
// Running the model
64-
float * floatarr = nullptr;
65-
66-
double totDuration = 0;
67-
int ntimes = 0;
68-
for (auto _ : state) {
69-
auto t1 = std::chrono::high_resolution_clock::now();
70-
size_t input_offset = 0;
71-
for (int i = 0; i < nevts; i += bsize) {
72-
// if (input_offset > input_tensor_values.size()) {
73-
// std::cout << "Error in input size " << i << " " << nevts << " " << model_path << std::endl;
74-
// throw std::runtime_error("Bad input size ");
75-
// }
76-
Ort::Value input_tensor = Ort::Value::CreateTensor<float>(
77-
memory_info, input_tensor_values.data()+input_offset, input_tensor_size, input_node_dims.data(), input_node_dims.size());
78-
auto output_tensors = session.Run(Ort::RunOptions{nullptr}, input_node_names.data(), &input_tensor, 1,
79-
output_node_names.data(), 1);
80-
floatarr = output_tensors.front().GetTensorMutableData<float>();
81-
input_offset += input_tensor_size;
82-
}
68+
std::vector<std::vector<float>> inputData(nin);
69+
std::vector<size_t> inputSizes(nin);
70+
71+
for (int i = 0; i < nin; i++) {
72+
size_t input_tensor_size = accumulate(input_node_dims[i].begin(), input_node_dims[i].end(), 1, multiplies<int>());
73+
inputSizes[i] = input_tensor_size;
74+
auto &input_tensor_values = inputData[i];
75+
input_tensor_values.resize(input_tensor_size * nbatches);
76+
// std::cout << "input tensor size " << input_tensor_size << " " << input_tensor_values.size() << std::endl;
77+
78+
// Input tensor initialization
79+
static std::uniform_real_distribution<float> distribution(-1, 1);
80+
static std::default_random_engine generator;
81+
std::generate(input_tensor_values.begin(), input_tensor_values.end(), []() { return distribution(generator); });
82+
// fill_n(input_tensor_values.begin(), input_tensor_size, 1.0);
83+
}
8384

84-
auto t2 = std::chrono::high_resolution_clock::now();
85-
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count();
86-
totDuration += duration / 1.E3; // in milliseconds
87-
ntimes++;
85+
auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
86+
// Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info,
87+
// input_tensor_values.data(), input_tensor_size,
88+
// input_node_dims.data(), input_node_dims.size());
89+
90+
// Running the model
91+
float *floatarr = nullptr;
92+
93+
std::vector<Ort::Value> input_tensors;
94+
95+
double totDuration = 0;
96+
int ntimes = 0;
97+
for (auto _ : state) {
98+
auto t1 = std::chrono::high_resolution_clock::now();
99+
std::vector<size_t> input_offset(nin);
100+
for (int i = 0; i < nevts; i += bsize) {
101+
// if (input_offset > input_tensor_values.size()) {
102+
// std::cout << "Error in input size " << i << " " << nevts << " " << model_path << std::endl;
103+
// throw std::runtime_error("Bad input size ");
104+
// }
105+
for (int k = 0; k < nin; k++) {
106+
input_tensors.emplace_back(Ort::Value::CreateTensor<float>(memory_info, inputData[k].data() + input_offset[k],
107+
inputSizes[k], input_node_dims[k].data(), input_node_dims[k].size()));
108+
}
109+
auto output_tensors = session.Run(Ort::RunOptions{nullptr}, input_node_names.data(), input_tensors.data(), nin,
110+
output_node_names.data(), nout);
111+
floatarr = output_tensors.front().GetTensorMutableData<float>();
112+
for (int k = 0; k < nin; k++) {
113+
input_offset[k] += inputSizes[k];
114+
}
115+
}
116+
117+
auto t2 = std::chrono::high_resolution_clock::now();
118+
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count();
119+
totDuration += duration / 1.E3; // in milliseconds
120+
ntimes++;
88121
}
89122
//for (int i = 0; i < 10; i++)
90123
// printf("%f\t", i, floatarr[i]);

root/tmva/sofie/SOFIEInference.cxx

Lines changed: 53 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,16 +25,17 @@
2525
#include "GRU_d10_L20_h8_B1.hxx"
2626
#include "LSTM_d10_L20_h8_B1.hxx"
2727
#include "higgs_model_dense.hxx"
28+
#include "DDB_B1.hxx" // CMS onnx model
2829

2930
#include "resnet18v1.hxx"
3031
#include "TMath.h"
3132

3233

3334
using namespace std;
34-
bool verbose = false;
35+
bool verbose = false;
3536
template <class S>
3637
void BM_SOFIE_Inference(benchmark::State &state)
37-
{
38+
{
3839
size_t inputSize = state.range(0); // input size (without batch size)
3940
size_t bsize = (state.range(1) > 0) ? state.range(1) : 0;
4041
size_t nevts = 64;
@@ -74,8 +75,56 @@ void BM_SOFIE_Inference(benchmark::State &state)
7475
// }
7576
//if (verbose) std::cout << "output : " << output.size() << " : " << output.front() << " ......" << output.back() << std::endl;
7677
}
77-
//typedef TMVA_SOFIE_Conv_d100_L1_B1::Session S1;
78-
//BENCHMARK(BM_SOFIE_Inference<S1>);//->Name( "Conv_d100_L1_B1");
78+
79+
// inference for model with 3 inputs
80+
template <class S>
81+
void BM_SOFIE_Inference_3(benchmark::State &state)
82+
{
83+
size_t bsize = state.range(0); // batch size
84+
size_t inputSize1 = state.range(1); // input 1 size
85+
size_t inputSize2 = state.range(2); // input 2 size
86+
size_t inputSize3 = state.range(3);
87+
88+
size_t nevts = 64;
89+
size_t nrep = nevts / bsize;
90+
91+
size_t eventSize = inputSize1 + inputSize2+inputSize3;
92+
93+
vector<float> input1(inputSize1*nevts);
94+
vector<float> input2(inputSize2*nevts);
95+
vector<float> input3(inputSize3*nevts);
96+
97+
static std::uniform_real_distribution<float> distribution(-1, 1);
98+
static std::default_random_engine generator;
99+
std::generate(input1.begin(), input1.end(), []() { return distribution(generator); });
100+
std::generate(input2.begin(), input2.end(), []() { return distribution(generator); });
101+
std::generate(input3.begin(), input3.end(), []() { return distribution(generator); });
102+
103+
S s("");
104+
105+
std::cout << "initi done - do benchmark \n";
106+
107+
double totDuration = 0;
108+
int ntimes = 0;
109+
for (auto _ : state) {
110+
auto t1 = std::chrono::high_resolution_clock::now();
111+
for (int i = 0; i < nevts; i += bsize) {
112+
float * p1 = input1.data()+ inputSize1*i;
113+
float * p2 = input2.data()+ inputSize2*i;
114+
float * p3 = input3.data()+ inputSize3*i;
115+
auto y = s.infer(p1,p2,p3);
116+
}
117+
auto t2 = std::chrono::high_resolution_clock::now();
118+
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count();
119+
totDuration += duration / 1.E3; // in milliseconds
120+
ntimes++;
121+
}
122+
123+
state.counters["time/evt(ms)"] = totDuration / double(ntimes * nevts);
124+
}
125+
126+
// CMS benchmark (3 inputs)
127+
BENCHMARK_TEMPLATE(BM_SOFIE_Inference_3, TMVA_SOFIE_DDB_B1::Session)->Name("DDB_B1")->Args({1, 1*27, 60*8, 5*2})->Unit(benchmark::kMillisecond);
79128

80129
//Gemm benchmarks
81130
BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Linear_16::Session)->Name("Linear_16")->Args({100, 16})->Unit(benchmark::kMillisecond);
165 KB
Binary file not shown.

0 commit comments

Comments
 (0)