Skip to content

Commit 44ccf6d

Browse files
committed
Add LWTNN inference test and Fast simulation model from G4
Add json file for Generator model
1 parent 89b68e8 commit 44ccf6d

File tree

5 files changed

+185
-14
lines changed

5 files changed

+185
-14
lines changed

root/tmva/sofie/CMakeLists.txt

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,9 +202,17 @@ if (LWTNN_FOUND)
202202

203203
message(STATUS "Found LWTNN (library is ${LWTNN_LIBRARY}, libraries ${LWTNN_LIBRARIES})")
204204
configure_file(input_models/higgs_model_dense.json higgs_model_dense.json COPYONLY)
205+
configure_file(input_models/Generator.json.gz Generator.json.gz COPYONLY)
206+
execute_process(COMMAND gunzip -f ${CMAKE_CURRENT_BINARY_DIR}/Generator.json.gz)
205207
# set(LWTNN_INCLUDE_DIR /home/moneta/cernbox/root/tests/tmva/sofie/lwtnn-build/include)
206208
# set(LWTNN_LIBS /home/moneta/cernbox/root/tests/tmva/sofie/lwtnn-build/lib/liblwtnn.so)
207-
RB_ADD_GBENCHMARK(RDF_lwtnn_Inference
209+
RB_ADD_GBENCHMARK(LWTNNInference
210+
LWTNNInference.cxx
211+
LABEL short
212+
LIBRARIES Core Hist Imt RIO Tree TreePlayer ROOTDataFrame ROOTVecOps TMVA ROOTTMVASofie ${LWTNN_LIBRARY})
213+
target_include_directories(LWTNNInference PRIVATE ${LWTNN_INCLUDE_DIR})
214+
215+
RB_ADD_GBENCHMARK(RDF_lwtnn_Inference
208216
RDF_lwtnn_Inference.cxx
209217
LABEL short
210218
LIBRARIES Core Hist Imt RIO Tree TreePlayer ROOTDataFrame ROOTVecOps TMVA ROOTTMVASofie ${LWTNN_LIBRARY})

root/tmva/sofie/LWTNNInference.cxx

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
// Author: Federico Sossai (fsossai), 2021
2+
3+
#include <benchmark/benchmark.h>
4+
5+
#include <iostream>
6+
#include <thread>
7+
#include <chrono>
8+
#include <utility>
9+
#include <vector>
10+
#include <memory>
11+
#include <functional>
12+
#include <random>
13+
#include <fstream>
14+
#include <stdlib.h>
15+
16+
#include "TMath.h"
17+
18+
#include "lwtnn/LightweightNeuralNetwork.hh"
19+
#include "lwtnn/LightweightGraph.hh"
20+
#include "lwtnn/parse_json.hh"
21+
22+
bool verbose = false;
23+
24+
void BM_LWTNN_Inference_model(benchmark::State &state, std::string model_name, size_t inputSize, size_t outputSize = 1)
25+
{
26+
27+
28+
std::map<std::string, double> inputs;
29+
std::vector<std::string> names;
30+
31+
std::string model_filename = model_name + ".json";
32+
std::ifstream config_file(model_filename);
33+
34+
auto config = lwt::parse_json(config_file);
35+
36+
// Set up neural network model from config
37+
auto model = std::make_unique<lwt::LightweightNeuralNetwork>(config.inputs, config.layers, config.outputs);
38+
39+
config_file.close();
40+
41+
// Initialize input
42+
//std::cout << "input size is " << config.inputs.size() << std::endl;
43+
if (config.inputs.size() != inputSize ) {
44+
throw std::runtime_error("Bad input size - it is " + std::to_string(inputSize) +
45+
" and should be " + std::to_string(config.inputs.size()));
46+
}
47+
for (size_t n = 0; n < inputSize; n++) {
48+
inputs[config.inputs.at(n).name] = 0.0;
49+
names.push_back(config.inputs.at(n).name);
50+
}
51+
52+
53+
// size_t inputSize = state.range(0); // input size (without batch size)
54+
size_t bsize = 1; // bsize is always 1 for lwtnn
55+
size_t nevts = 64;
56+
size_t nrep = nevts / bsize;
57+
58+
std::vector<float> input(inputSize * nevts);
59+
60+
static std::uniform_real_distribution<float> distribution(-1, 1);
61+
static std::default_random_engine generator;
62+
std::generate(input.begin(), input.end(), []() { return distribution(generator); });
63+
64+
double totDuration = 0;
65+
int ntimes = 0;
66+
std::vector<float> y(outputSize);
67+
for (auto _ : state) {
68+
auto t1 = std::chrono::high_resolution_clock::now();
69+
for (size_t i = 0; i < nevts; i += bsize) {
70+
for (size_t j = 0; j < inputSize; j++)
71+
//inputs["node_0"]["variable_" + std::to_string(j)] = input[i * inputSize + j];
72+
inputs[names[j]] = input[i * inputSize + j];
73+
74+
auto outputs = model->compute(inputs);
75+
y[0] = outputs.begin()->second;
76+
// for (int i = 0; i < outputSize; i++)
77+
// y[i] = outputs["out_" + std::to_string(i)];
78+
}
79+
80+
auto t2 = std::chrono::high_resolution_clock::now();
81+
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count();
82+
totDuration += duration / 1.E3; // in milliseconds
83+
ntimes++;
84+
}
85+
86+
state.counters["time/evt(ms)"] = totDuration / double(ntimes * nevts);
87+
}
88+
89+
void BM_LWTNN_Inference_graph(benchmark::State &state, std::string model_name, size_t inputSize, size_t outputSize = 1 )
90+
{
91+
92+
typedef std::map<std::string, std::map<std::string, double>> NetworkInputs;
93+
typedef std::map<std::string, double> NetworkOutputs;
94+
95+
//std::map<std::string, double> inputs;
96+
NetworkInputs inputs;
97+
std::vector<std::string> names;
98+
99+
std::string model_filename = model_name + ".json";
100+
std::ifstream config_file(model_filename);
101+
auto graph = std::make_unique<lwt::LightweightGraph>(lwt::parse_json_graph(config_file));
102+
config_file.close();
103+
104+
105+
106+
//size_t inputSize = state.range(0); // input size (without batch size)
107+
size_t bsize = 1; // bsize is always 1 for lwtnn
108+
size_t nevts = 64;
109+
size_t nrep = nevts / bsize;
110+
111+
std::vector<float> input(inputSize * nevts);
112+
113+
static std::uniform_real_distribution<float> distribution(-1, 1);
114+
static std::default_random_engine generator;
115+
std::generate(input.begin(), input.end(), []() { return distribution(generator); });
116+
117+
118+
double totDuration = 0;
119+
int ntimes = 0;
120+
std::vector<float> y(outputSize);
121+
for (auto _ : state) {
122+
auto t1 = std::chrono::high_resolution_clock::now();
123+
for (size_t i = 0; i < nevts; i += bsize) {
124+
for (size_t j = 0; j < inputSize; j++)
125+
inputs["node_0"]["variable_" + std::to_string(j)] = input[i * inputSize + j];
126+
// inputs[names[j]] = input[i * inputSize + j];
127+
128+
auto outputs = graph->compute(inputs);
129+
for (int i = 0; i < outputSize; i++)
130+
y[i] = outputs["out_" + std::to_string(i)];
131+
}
132+
133+
134+
auto t2 = std::chrono::high_resolution_clock::now();
135+
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count();
136+
totDuration += duration / 1.E3; // in milliseconds
137+
ntimes++;
138+
}
139+
140+
state.counters["time/evt(ms)"] = totDuration / double(ntimes * nevts);
141+
}
142+
143+
//LWTNN benchmarks
144+
// use B<_CAPTURE to pass string of file, second parameter is name of test and is arbitrary
145+
BENCHMARK_CAPTURE(BM_LWTNN_Inference_model,higgs_model_dense, "higgs_model_dense",7)->Unit(benchmark::kMillisecond);
146+
BENCHMARK_CAPTURE(BM_LWTNN_Inference_graph, generator, "Generator",14)->Unit(benchmark::kMillisecond);
147+
148+
BENCHMARK_MAIN();

root/tmva/sofie/ONNXRuntimeInference_Template.cxx.in

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ static void @FUNC_NAME@(benchmark::State& state, string model_path)
4747
size_t input_tensor_size = accumulate(input_node_dims.begin(),
4848
input_node_dims.end(), 1, multiplies<int>());
4949
vector<float> input_tensor_values(input_tensor_size*nbatches);
50+
//std::cout << "input tensor size " << input_tensor_size << " " << input_tensor_values.size() << std::endl;
5051

5152
// Input tensor initialization
5253
static std::uniform_real_distribution<float> distribution(-1,1);
@@ -55,9 +56,9 @@ static void @FUNC_NAME@(benchmark::State& state, string model_path)
5556
//fill_n(input_tensor_values.begin(), input_tensor_size, 1.0);
5657

5758
auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
58-
Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info,
59-
input_tensor_values.data(), input_tensor_size,
60-
input_node_dims.data(), input_node_dims.size());
59+
// Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info,
60+
// input_tensor_values.data(), input_tensor_size,
61+
// input_node_dims.data(), input_node_dims.size());
6162

6263
// Running the model
6364
float * floatarr = nullptr;
@@ -66,10 +67,18 @@ static void @FUNC_NAME@(benchmark::State& state, string model_path)
6667
int ntimes = 0;
6768
for (auto _ : state) {
6869
auto t1 = std::chrono::high_resolution_clock::now();
69-
for (int i = 0; i < nevts; i+= bsize) {
70+
size_t input_offset = 0;
71+
for (int i = 0; i < nevts; i += bsize) {
72+
// if (input_offset > input_tensor_values.size()) {
73+
// std::cout << "Error in input size " << i << " " << nevts << " " << model_path << std::endl;
74+
// throw std::runtime_error("Bad input size ");
75+
// }
76+
Ort::Value input_tensor = Ort::Value::CreateTensor<float>(
77+
memory_info, input_tensor_values.data()+input_offset, input_tensor_size, input_node_dims.data(), input_node_dims.size());
7078
auto output_tensors = session.Run(Ort::RunOptions{nullptr}, input_node_names.data(), &input_tensor, 1,
7179
output_node_names.data(), 1);
7280
floatarr = output_tensors.front().GetTensorMutableData<float>();
81+
input_offset += input_tensor_size;
7382
}
7483

7584
auto t2 = std::chrono::high_resolution_clock::now();

root/tmva/sofie/SOFIEInference.cxx

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,16 @@
1515
#include "Linear_16.hxx"
1616
#include "Linear_32.hxx"
1717
#include "Linear_64.hxx"
18+
#include "Generator_B1.hxx"
19+
#include "Generator_B64.hxx"
1820
#include "Conv_d100_L1_B1.hxx"
1921
#include "Conv_d100_L14_B1.hxx"
2022
#include "Conv_d100_L14_B32.hxx"
2123
#include "Conv3d_d32_L4_B1.hxx"
2224
#include "RNN_d10_L20_h8_B1.hxx"
2325
#include "GRU_d10_L20_h8_B1.hxx"
2426
#include "LSTM_d10_L20_h8_B1.hxx"
27+
#include "higgs_model_dense.hxx"
2528

2629
#include "resnet18v1.hxx"
2730
#include "TMath.h"
@@ -32,7 +35,7 @@ bool verbose = false;
3235
template <class S>
3336
void BM_SOFIE_Inference(benchmark::State &state)
3437
{
35-
size_t inputSize = state.range(0);
38+
size_t inputSize = state.range(0); // input size (without batch size)
3639
size_t bsize = (state.range(1) > 0) ? state.range(1) : 0;
3740
size_t nevts = 64;
3841
size_t nrep = nevts / bsize;
@@ -74,6 +77,16 @@ void BM_SOFIE_Inference(benchmark::State &state)
7477
//typedef TMVA_SOFIE_Conv_d100_L1_B1::Session S1;
7578
//BENCHMARK(BM_SOFIE_Inference<S1>);//->Name( "Conv_d100_L1_B1");
7679

80+
//Gemm benchmarks
81+
BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Linear_16::Session)->Name("Linear_16")->Args({100, 16})->Unit(benchmark::kMillisecond);
82+
BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Linear_32::Session)->Name("Linear_32")->Args({100, 32})->Unit(benchmark::kMillisecond);
83+
BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Linear_64::Session)->Name("Linear_64")->Args({100, 64})->Unit(benchmark::kMillisecond);
84+
BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Linear_event::Session)->Name("Linear_event")->Args({100, 1})->Unit(benchmark::kMillisecond);
85+
BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Generator_B1::Session)->Name("Generator_B1")->Args({14, 1})->Unit(benchmark::kMillisecond);
86+
BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Generator_B64::Session)->Name("Generator_B64")->Args({14, 64})->Unit(benchmark::kMillisecond);
87+
88+
BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_higgs_model_dense::Session)->Name("higgs_model_dense")->Args({7, 1})->Unit(benchmark::kMillisecond);
89+
7790
BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Conv_d100_L14_B1::Session)->Name( "Conv_d100_L14_B1")->Args({100*100, 1})->Unit(benchmark::kMillisecond);
7891
BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Conv_d100_L14_B32::Session)->Name("Conv_d100_L14_B32")->Args({100*100, 32})->Unit(benchmark::kMillisecond);
7992
BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Conv_d100_L1_B1::Session)->Name( "Conv_d100_L1_B1")->Args({100*100, 1})->Unit(benchmark::kMillisecond);
@@ -87,12 +100,5 @@ BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_RNN_d10_L20_h8_B1::Session)->N
87100
BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_GRU_d10_L20_h8_B1::Session)->Name("GRU_d10_L20_h8_B1")->Args({3 * 5, 1})->Unit(benchmark::kMillisecond);
88101
BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_LSTM_d10_L20_h8_B1::Session)->Name("LSTM_d10_L20_h8_B1")->Args({1 * 1, 1})->Unit(benchmark::kMillisecond);
89102

90-
//Gemm benchmarks
91-
BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Linear_16::Session)->Name("Linear_16")->Args({100, 16})->Unit(benchmark::kMillisecond);
92-
BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Linear_32::Session)->Name("Linear_32")->Args({100, 32})->Unit(benchmark::kMillisecond);
93-
BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Linear_64::Session)->Name("Linear_64")->Args({100, 64})->Unit(benchmark::kMillisecond);
94-
BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Linear_event::Session)->Name("Linear_event")->Args({100, 1})->Unit(benchmark::kMillisecond);
95-
BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Linear_event::Session)->Name("Linear_event")->Args({100, 1})->Unit(benchmark::kMillisecond);
96-
BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Linear_event::Session)->Name("Generator_B1")->Args({14, 1})->Unit(benchmark::kMillisecond);
97-
BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Linear_event::Session)->Name("Generator_B64")->Args({14, 64})->Unit(benchmark::kMillisecond);
103+
98104
BENCHMARK_MAIN();
36.2 MB
Binary file not shown.

0 commit comments

Comments
 (0)