Skip to content

Commit 9bcceb1

Browse files
committed
Improve inference tests by reporting time/event
Add optimization flags for auto-vectorization : -mavx2 -fno-signaling-nans -fno-trapping-math. With autovec, COnv1D test is 3 times faster. These changes are used to produce the benchmark results presented ad ACAT.
1 parent f5fee6f commit 9bcceb1

File tree

3 files changed

+54
-23
lines changed

3 files changed

+54
-23
lines changed

root/tmva/sofie/CMakeLists.txt

Lines changed: 32 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# @author Federico Sossai (fsossai)
22

3-
if(ROOT_tmva_FOUND AND ROOT_tmva-sofie_FOUND)
43

54
# Checking that all required model exist
65
if (NOT ONNX_MODELS_DIR)
@@ -51,7 +50,14 @@ if(ROOT_tmva_FOUND AND ROOT_tmva-sofie_FOUND)
5150
message(STATUS "ONNXRuntime not found")
5251
endif()
5352

54-
if (Use_SOFIE_TEMPLATE)
53+
54+
55+
#---TMVA-/SOFIE
56+
if(ROOT_tmva_FOUND AND ROOT_tmva-sofie_FOUND)
57+
58+
59+
### this is not used
60+
if (Use_SOFIE_TEMPLATE)
5561

5662
# Configuring SOFIEInference_Template.cxx.in
5763
set(FUNC_NAME "BM_SOFIE_Inference")
@@ -91,9 +97,8 @@ if(ROOT_tmva_FOUND AND ROOT_tmva-sofie_FOUND)
9197
string(REPLACE ";" ",\n" FUNC_TUPLES "${ALL_FUNCS}") # String[] -> String
9298
configure_file(SOFIEInference_Template.cxx.in SOFIEInference.cxx @ONLY)
9399

94-
else()
95100

96-
endif()
101+
endif()
97102

98103

99104
# configure_file(input_models/compiled/Linear_event.hxx Linear_event.hxx COPYONLY)
@@ -154,32 +159,43 @@ if(BLAS_FOUND)
154159
#set(SOFIE_BLAS_LIBS /home/moneta/intel/mkl/lib/intel64/libmkl_intel_lp64.so /home/moneta/intel/mkl/lib/intel64/libmkl_sequential.so /home/moneta/intel/mkl/lib/intel64/libmkl_core.so -lpthread)
155160
#set(SOFIE_BLAS_LIBS /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.14.sdk/System/Library/Frameworks/Accelerate.framework)
156161

162+
#
163+
# to set specific BLAS do : cmake -DBLA_Vendor=OpenBLAS, Intel10_64lp_seq or INtel64lp
164+
# for Intel MKL need to set also MKLROOT env variable (see documentation of cmake FindBlas)
165+
# need to source for example . $dir/intel/mkl/bin/mklvars.sh intel64
166+
167+
set(SOFIE_BLAS_LIBS ${BLAS_LIBRARIES})
168+
169+
157170
# Benchmark for models emitted by SOFIE
158171
RB_ADD_GBENCHMARK(SOFIEInference
159172
SOFIEInference.cxx
160173
LABEL short
161-
LIBRARIES TMVA ROOTTMVASofie ${BLAS_LIBRARIES}
174+
LIBRARIES TMVA ROOTTMVASofie ${SOFIE_BLAS_LIBS}
162175
)
163176

164-
# target_include_directories(SOFIEInference PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
177+
add_dependencies(SOFIEInference SofieCompileModels)
165178

166-
#add_dependencies(SOFIEInference SofieCompileModels)
167-
168-
169-
#configure_file(input_models/compiled/higgs_model_dense.hxx higgs_model_dense.hxx COPYONLY)
170-
#configure_file(input_models/compiled/higgs_model_dense.dat higgs_model_dense.dat COPYONLY)
171179
RB_ADD_GBENCHMARK(RDF_SOFIE_Inference
172180
RDF_SOFIE_Inference.cxx
173181
LABEL short
174-
#LIBRARIES TMVA ROOTTMVASofie openblas
175-
#LIBRARIES TMVA ROOTTMVASofie
176-
LIBRARIES Core Hist Imt RIO Tree TreePlayer ROOTDataFrame ROOTVecOps TMVA ROOTTMVASofie ${BLAS_LIBRARIES}
182+
LIBRARIES Core Hist Imt RIO Tree TreePlayer ROOTDataFrame ROOTVecOps TMVA ROOTTMVASofie ${SOFIE_BLAS_LIBS}
177183
)
178184

179185
add_dependencies(RDF_SOFIE_Inference SofieCompileModels)
180186

181-
endif()
182-
endif()
187+
#
188+
# add optimization flags for best performances (factor 3 on simple Conv1 test)
189+
#
190+
#if (ROOT_PLATFORM MATCHES "linux|macosx" AND CMAKE_SYSTEM_PROCESSOR MATCHES x86_64 AND CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
191+
## assume we run only on linux/macos with gnu or gcc
192+
set(gnu-flags $<$<CXX_COMPILER_ID:GNU>:-fno-signaling-nans>)
193+
target_compile_options(SOFIEInference PRIVATE -mavx2 ${gnu-flags} -fno-trapping-math -O3)
194+
target_compile_options(RDF_SOFIE_Inference PRIVATE -mavx2 ${gnu-flags} -fno-trapping-math -O3)
195+
#endif()
196+
197+
endif() # endif blas
198+
endif() # endif TMVA/SOFIE
183199

184200
find_package(LWTNN QUIET)
185201
if (LWTNN_FOUND)

root/tmva/sofie/ONNXRuntimeInference_Template.cxx.in

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <vector>
1010
#include <numeric>
1111
#include <random>
12+
#include <chrono>
1213

1314
using namespace std;
1415

@@ -38,10 +39,10 @@ static void @FUNC_NAME@(benchmark::State& state, string model_path)
3839
.GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();
3940

4041
// Calculating the dimension of the input tensor
41-
int nevt = 64;
42+
int nevts = 64;
4243
int bsize = input_node_dims[0];
4344
//std::cout << "Using bsize = " << bsize << std::endl;
44-
int nbatches = nevt / bsize;
45+
int nbatches = nevts / bsize;
4546

4647
size_t input_tensor_size = accumulate(input_node_dims.begin(),
4748
input_node_dims.end(), 1, multiplies<int>());
@@ -61,18 +62,24 @@ static void @FUNC_NAME@(benchmark::State& state, string model_path)
6162
// Running the model
6263
float * floatarr = nullptr;
6364

64-
65-
65+
double totDuration = 0;
66+
int ntimes = 0;
6667
for (auto _ : state) {
67-
for (int i = 0; i < nevt; i+= bsize) {
68+
auto t1 = std::chrono::high_resolution_clock::now();
69+
for (int i = 0; i < nevts; i+= bsize) {
6870
auto output_tensors = session.Run(Ort::RunOptions{nullptr}, input_node_names.data(), &input_tensor, 1,
6971
output_node_names.data(), 1);
7072
floatarr = output_tensors.front().GetTensorMutableData<float>();
7173
}
74+
75+
auto t2 = std::chrono::high_resolution_clock::now();
76+
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count();
77+
totDuration += duration / 1.E3; // in milliseconds
78+
ntimes++;
7279
}
7380
//for (int i = 0; i < 10; i++)
7481
// printf("%f\t", i, floatarr[i]);
75-
82+
state.counters["time/evt(ms)"] = totDuration / double(ntimes * nevts);
7683

7784
}
7885
@BENCHMARK_CAPTURES@

root/tmva/sofie/SOFIEInference.cxx

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,20 @@ void BM_SOFIE_Inference(benchmark::State &state)
4242
float *input_ptr = input.data();
4343
S s("");
4444

45-
45+
double totDuration = 0;
46+
int ntimes = 0;
4647
for (auto _ : state) {
48+
auto t1 = std::chrono::high_resolution_clock::now();
4749
for (int i = 0; i < nevts; i += bsize)
4850
auto y = s.infer(input.data()+ inputSize*i);
51+
52+
auto t2 = std::chrono::high_resolution_clock::now();
53+
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count();
54+
totDuration += duration / 1.E3; // in milliseconds
55+
ntimes++;
4956
}
5057

58+
state.counters["time/evt(ms)"] = totDuration / double(ntimes * nevts);
5159
// input[0] = -999;
5260
// s.inf
5361
// std::cout << "number of times " << s.itime << std::endl;

0 commit comments

Comments
 (0)