1+ #include " higgs_model_dense.hxx"
2+ #include < iostream>
3+ #include " TROOT.h"
4+ #include " TSystem.h"
5+ #include " ROOT/RDataFrame.hxx"
6+
7+ #include < onnxruntime_cxx_api.h>
8+
9+
10+ #include < string>
11+ #include < fstream>
12+ #include < stdlib.h>
13+
14+ #include < benchmark/benchmark.h>
15+
16+ // template <typename Func>
17+ struct ONNXFunctor {
18+
19+ // std::vector<float> input;
20+ // std::vector<std::shared_ptr<Func>> sessions;
21+
22+ std::map<std::string, double > inputs;
23+ std::vector<std::string> names;
24+
25+ std::shared_ptr<Ort::Session> session;
26+
27+ // td::vector<Ort::Value> input_tensors;
28+
29+ // Ort::Value * ort_input = nullptr;
30+
31+ // float *input_arr = nullptr;
32+
33+ std::vector<const char *> input_node_names;
34+ std::vector<const char *> output_node_names;
35+
36+ std::vector<float > input_tensor_values;
37+
38+ std::vector<int64_t > input_node_dims;
39+ std::vector<int64_t > output_node_dims;
40+
41+ ONNXFunctor (unsigned nslots)
42+ {
43+
44+ Ort::Env env (ORT_LOGGING_LEVEL_WARNING, " benchmark" );
45+
46+ std::string model_path = " higgs_model_dense.onnx" ;
47+
48+ Ort::SessionOptions session_options;
49+ session_options.SetIntraOpNumThreads (1 );
50+ session_options.SetGraphOptimizationLevel (GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
51+
52+ // std::cout << "benchmarking model " << model_path << std::endl;
53+ session = std::make_shared<Ort::Session>(env, model_path.c_str (), session_options);
54+
55+
56+
57+ Ort::AllocatorWithDefaultOptions allocator;
58+ input_node_names.push_back (session->GetInputName (0 , allocator));
59+ output_node_names.push_back ( session->GetOutputName (0 , allocator));
60+
61+ // Getting the shapes
62+
63+ input_node_dims = session->GetInputTypeInfo (0 ).GetTensorTypeAndShapeInfo ().GetShape ();
64+ output_node_dims = session->GetOutputTypeInfo (0 ).GetTensorTypeAndShapeInfo ().GetShape ();
65+
66+ // Calculating the dimension of the input tensor
67+
68+ // int bsize = input_node_dims[0];
69+ // std::cout << "Using bsize = " << bsize << std::endl;
70+ // int nbatches = nevt / bsize;
71+
72+ size_t input_tensor_size = std::accumulate (input_node_dims.begin (), input_node_dims.end (), 1 , std::multiplies<int >());
73+ // std::vector<float> input_tensor_values(input_tensor_size );
74+
75+ input_tensor_values.resize (input_tensor_size);
76+
77+ auto memory_info = Ort::MemoryInfo::CreateCpu (OrtArenaAllocator, OrtMemTypeDefault);
78+
79+ // input_tensors.push_back(Ort::Value::CreateTensor<float>(
80+ // memory_info, input_tensor_values.data(), input_tensor_size, input_node_dims.data(), input_node_dims.size()) );
81+
82+
83+ // Ort::Value
84+ // Ort::Value *ort_input = new Ort::Value(nullptr);
85+ // // input_tensor =
86+ // *ort_input = Ort::Value::CreateTensor<float>(memory_info, input_tensor_values.data(), input_tensor_values.size(),
87+ // input_node_dims.data(), input_node_dims.size());
88+
89+ // input_arr = input_tensor.GetTensorMutableData<float>();
90+
91+ // Running the model
92+ // input_arr = input_tensors[0].GetTensorMutableData<float>();
93+
94+ // /////
95+
96+
97+ // // Load inputs from argv
98+ // std::cout << "input size is " << config.inputs.size() << std::endl;
99+ // for (size_t n = 0; n < config.inputs.size(); n++) {
100+ // inputs[config.inputs.at(n).name] = 0.0;
101+ // names.push_back(config.inputs.at(n).name);
102+ // }
103+ }
104+
105+ double operator ()(unsigned nslots, float x0, float x1, float x2, float x3, float x4, float x5, float x6)
106+ {
107+
108+ // not sure how to cache input ort tensor
109+ auto memory_info = Ort::MemoryInfo::CreateCpu (OrtArenaAllocator, OrtMemTypeDefault);
110+ Ort::Value
111+ input_tensor = Ort::Value::CreateTensor<float >(
112+ memory_info, input_tensor_values.data (), input_tensor_values.size (), input_node_dims.data (), input_node_dims.size ());
113+ float * input_arr = input_tensor.GetTensorMutableData <float >();
114+
115+ int off = 0 ;
116+ input_arr[off] = x0;
117+ input_arr[off + 1 ] = x1;
118+ input_arr[off + 2 ] = x2;
119+ input_arr[off + 3 ] = x3;
120+ input_arr[off + 4 ] = x4;
121+ input_arr[off + 5 ] = x5;
122+ input_arr[off + 6 ] = x6;
123+
124+
125+
126+ auto output_tensors = session->Run (Ort::RunOptions{nullptr }, input_node_names.data (), &input_tensor, 1 , output_node_names.data (), 1 );
127+ float * floatarr = output_tensors.front ().GetTensorMutableData <float >();
128+ return floatarr[0 ];
129+ }
130+ };
131+
132+ void BM_RDF_ONNX_Inference (benchmark::State &state)
133+ {
134+
135+ int nslot = 1 ;
136+ if (nslot > 1 )
137+ ROOT::EnableImplicitMT (nslot);
138+
139+ auto fileName = " Higgs_data_full.root" ;
140+ // file is available at "https://cernbox.cern.ch/index.php/s/YuSHwTXBa0UBEhD/download";
141+ // do curl https://cernbox.cern.ch/index.php/s/XaPBtaGrnN38wU0 -o Higgs_data_full.root
142+ if (gSystem ->AccessPathName (fileName)) {
143+ std::string cmd = " curl https://cernbox.cern.ch/index.php/s/YuSHwTXBa0UBEhD/download -o " ;
144+ cmd += fileName;
145+ gSystem ->Exec (cmd.c_str ());
146+ }
147+ auto treeName = " test_tree" ;
148+ ROOT::RDataFrame df (treeName, fileName);
149+
150+ ONNXFunctor functor (nslot);
151+
152+ for (auto _ : state) {
153+
154+ auto h1 = df.DefineSlot (" DNN_Value" , functor, {" m_jj" , " m_jjj" , " m_lv" , " m_jlv" , " m_bb" , " m_wbb" , " m_wwbb" })
155+ .Histo1D (" DNN_Value" );
156+
157+ auto t1 = std::chrono::high_resolution_clock::now ();
158+
159+ auto n = h1->GetEntries ();
160+ auto t2 = std::chrono::high_resolution_clock::now ();
161+ auto duration = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count ();
162+
163+ std::cout << " Processed " << n << " entries "
164+ << " time = " << duration / 1 .E6 << " (sec) time/event = " << duration / double (n) << " musec"
165+ << std::endl;
166+
167+ // h1->DrawClone();
168+ }
169+ }
170+
171+
172+ BENCHMARK (BM_RDF_ONNX_Inference)->Unit(benchmark::kMillisecond );
173+ BENCHMARK_MAIN ();
0 commit comments