Skip to content

Commit 09ca844

Browse files
committed
Add resnet to benchmarks
Use also a given number of events (e.g. 64)
1 parent 4066a94 commit 09ca844

File tree

5 files changed

+106
-90
lines changed

5 files changed

+106
-90
lines changed

root/tmva/sofie/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,9 @@ if(ROOT_tmva_FOUND AND ROOT_tmva-sofie_FOUND)
117117
#use file B1 as B32 for weights : it is the same
118118
configure_file(input_models/compiled/Conv_d100_L14_B32.dat Conv_d100_L14_B32.dat COPYONLY)
119119

120+
configure_file(input_models/compiled/resnet18v1.hxx resnet18v1.hxx COPYONLY)
121+
configure_file(input_models/compiled/resnet18v1.dat resnet18v1.dat COPYONLY)
122+
120123
# Benchmark for models emitted by SOFIE
121124
RB_ADD_GBENCHMARK(SOFIEInference
122125
SOFIEInference.cxx

root/tmva/sofie/ONNXRuntimeInference_Template.cxx.in

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,14 @@ static void @FUNC_NAME@(benchmark::State& state, string model_path)
3838
.GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();
3939

4040
// Calculating the dimension of the input tensor
41+
int nevt = 64;
42+
int bsize = input_node_dims[0];
43+
//std::cout << "Using bsize = " << bsize << std::endl;
44+
int nbatches = nevt / bsize;
4145

4246
size_t input_tensor_size = accumulate(input_node_dims.begin(),
4347
input_node_dims.end(), 1, multiplies<int>());
44-
vector<float> input_tensor_values(input_tensor_size);
48+
vector<float> input_tensor_values(input_tensor_size*nbatches);
4549

4650
// Input tensor initialization
4751
static std::uniform_real_distribution<float> distribution(-1,1);
@@ -56,10 +60,15 @@ static void @FUNC_NAME@(benchmark::State& state, string model_path)
5660

5761
// Running the model
5862
float * floatarr = nullptr;
63+
64+
65+
5966
for (auto _ : state) {
60-
auto output_tensors = session.Run(Ort::RunOptions{nullptr}, input_node_names.data(),
61-
&input_tensor, 1, output_node_names.data(), 1);
67+
for (int i = 0; i < nevt; i+= bsize) {
68+
auto output_tensors = session.Run(Ort::RunOptions{nullptr}, input_node_names.data(), &input_tensor, 1,
69+
output_node_names.data(), 1);
6270
floatarr = output_tensors.front().GetTensorMutableData<float>();
71+
}
6372
}
6473
//for (int i = 0; i < 10; i++)
6574
// printf("%f\t", i, floatarr[i]);

root/tmva/sofie/SOFIEInference.cxx

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -18,21 +18,22 @@
1818
#include "Conv_d100_L1_B1.hxx"
1919
#include "Conv_d100_L14_B1.hxx"
2020
#include "Conv_d100_L14_B32.hxx"
21-
//#include "input_models/compiled/Linear_16.hxx"
22-
///#include "input_models/compiled/Linear_32.hxx"
23-
//#include "input_models/compiled/Linear_64.hxx"
24-
//#include "input_models/compiled/Linear_event_blas.hxx" // old file
25-
//#include "input_models/compiled/Linear_event.hxx" // generated from Linear_event.onnx
26-
//#include "input_models/compiled/Conv_d100_L1_B1_opt.hxx"
27-
//#include "input_models/compiled/Conv_d100_L14_B1.hxx"
21+
22+
#include "resnet18v1.hxx"
23+
24+
2825

2926
using namespace std;
3027
bool verbose = false;
3128
template <class S>
3229
void BM_SOFIE_Inference(benchmark::State &state)
3330
{
3431
size_t inputSize = state.range(0);
35-
vector<float> input(inputSize);
32+
size_t bsize = (state.range(1) > 0) ? state.range(1) : 0;
33+
size_t nevts = 64;
34+
size_t nrep = nevts / bsize;
35+
36+
vector<float> input(inputSize*nevts);
3637

3738
static std::uniform_real_distribution<float> distribution(-1, 1);
3839
static std::default_random_engine generator;
@@ -42,18 +43,21 @@ void BM_SOFIE_Inference(benchmark::State &state)
4243
S s("");
4344

4445
for (auto _ : state) {
45-
s.infer(input.data());
46+
for (int i = 0; i < nevts; i+= bsize)
47+
s.infer(input.data()+ inputSize*i);
4648
}
4749
//if (verbose) std::cout << "output : " << output.size() << " : " << output.front() << " ......" << output.back() << std::endl;
4850
}
4951
//typedef TMVA_SOFIE_Conv_d100_L1_B1::Session S1;
5052
//BENCHMARK(BM_SOFIE_Inference<S1>);//->Name( "Conv_d100_L1_B1");
51-
BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Conv_d100_L1_B1::Session)->Name( "Conv_d100_L1_B1")->Args({100*100})->Unit(benchmark::kMillisecond);
52-
BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Conv_d100_L14_B1::Session)->Name( "Conv_d100_L14_B1")->Args({100*100})->Unit(benchmark::kMillisecond);
53-
BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Conv_d100_L14_B32::Session)->Name("Conv_d100_L14_B32")->Args({100*100*32})->Unit(benchmark::kMillisecond);
53+
BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Conv_d100_L1_B1::Session)->Name( "Conv_d100_L1_B1")->Args({100*100, 1})->Unit(benchmark::kMillisecond);
54+
BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Conv_d100_L14_B1::Session)->Name( "Conv_d100_L14_B1")->Args({100*100, 1})->Unit(benchmark::kMillisecond);
55+
BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Conv_d100_L14_B32::Session)->Name("Conv_d100_L14_B32")->Args({100*100, 32})->Unit(benchmark::kMillisecond);
56+
57+
BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_resnet18v1::Session)->Name("resnet18v1")->Args({3 * 224 * 224, 1})->Unit(benchmark::kMillisecond);
5458
//Gemm benchmarks
55-
BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Linear_event::Session)->Name("Linear_event")->Args({100})->Unit(benchmark::kMillisecond);
56-
BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Linear_16::Session)->Name("Linear_16")->Args({100*16})->Unit(benchmark::kMillisecond);
57-
BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Linear_32::Session)->Name("Linear_32")->Args({100*32})->Unit(benchmark::kMillisecond);
58-
BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Linear_64::Session)->Name("Linear_64")->Args({100*64})->Unit(benchmark::kMillisecond);
59+
BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Linear_event::Session)->Name("Linear_event")->Args({100, 1})->Unit(benchmark::kMillisecond);
60+
BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Linear_16::Session)->Name("Linear_16")->Args({100, 16})->Unit(benchmark::kMillisecond);
61+
BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Linear_32::Session)->Name("Linear_32")->Args({100, 32})->Unit(benchmark::kMillisecond);
62+
BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Linear_64::Session)->Name("Linear_64")->Args({100, 64})->Unit(benchmark::kMillisecond);
5963
BENCHMARK_MAIN();

root/tmva/sofie/input_models/compiled/Conv_d100_L1_B1.hxx

Lines changed: 71 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//Code generated automatically by TMVA for Inference of Model file [Conv_d100_L1_B1.onnx] at [Mon Nov 8 14:01:59 2021]
1+
//Code generated automatically by TMVA for Inference of Model file [Conv_d100_L1_B1.onnx] at [Sat Nov 20 10:46:47 2021]
22
#include<vector>
33
#include "TMVA/SOFIE_common.hxx"
44
#include <fstream>
@@ -12,12 +12,14 @@ namespace BLAS{
1212
const float * beta, float * C, const int * ldc);
1313
}//BLAS
1414
struct Session {
15-
float tensor_conv0bias[2] = {};
16-
float tensor_conv0weight[50] = {};
17-
std::vector<float> fTensor_conv0biasbcast = std::vector<float>(20000);
18-
float * tensor_conv0biasbcast = fTensor_conv0biasbcast.data();
15+
std::vector<float> fTensor_conv0bias = std::vector<float>(2);
16+
float * tensor_conv0bias = fTensor_conv0bias.data();
17+
std::vector<float> fTensor_conv0weight = std::vector<float>(50);
18+
float * tensor_conv0weight = fTensor_conv0weight.data();
1919
std::vector<float> fTensor_4 = std::vector<float>(20000);
2020
float * tensor_4 = fTensor_4.data();
21+
std::vector<float> fTensor_conv0biasbcast = std::vector<float>(20000);
22+
float * tensor_conv0biasbcast = fTensor_conv0biasbcast.data();
2123
std::vector<float> fTensor_3 = std::vector<float>(20000);
2224
float * tensor_3 = fTensor_3.data();
2325

@@ -37,23 +39,23 @@ Session(std::string filename ="") {
3739
int length;
3840
f >> tensor_name >> length;
3941
if (tensor_name != "tensor_conv0bias" ) {
40-
std::cout << "Error in tensor name : expected tensor name is tensor_conv0bias read " << tensor_name << std::endl;
41-
throw std::runtime_error("tmva-sofie failed to read the correct tensor name");
42+
std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_conv0bias , read " + tensor_name;
43+
throw std::runtime_error(err_msg);
4244
}
4345
if (length != 2) {
44-
std::cout << "Error in tensor size : expected tensor size is 2 read " << length << std::endl;
45-
throw std::runtime_error("tmva-sofie failed to read the correct tensor size");
46+
std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 2 , read " + std::to_string(length) ;
47+
throw std::runtime_error(err_msg);
4648
}
4749
for (int i =0; i < length; ++i)
4850
f >> tensor_conv0bias[i];
4951
f >> tensor_name >> length;
5052
if (tensor_name != "tensor_conv0weight" ) {
51-
std::cout << "Error in tensor name : expected tensor name is tensor_conv0weight read " << tensor_name << std::endl;
52-
throw std::runtime_error("tmva-sofie failed to read the correct tensor name");
53+
std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_conv0weight , read " + tensor_name;
54+
throw std::runtime_error(err_msg);
5355
}
5456
if (length != 50) {
55-
std::cout << "Error in tensor size : expected tensor size is 50 read " << length << std::endl;
56-
throw std::runtime_error("tmva-sofie failed to read the correct tensor size");
57+
std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 50 , read " + std::to_string(length) ;
58+
throw std::runtime_error(err_msg);
5759
}
5860
for (int i =0; i < length; ++i)
5961
f >> tensor_conv0weight[i];
@@ -63,71 +65,69 @@ Session(std::string filename ="") {
6365
std::vector<size_t> newShape = { 2, 100, 100};
6466
oldShape.resize(newShape.size(), 1.);
6567
float * newData_ptr = TMVA::Experimental::SOFIE::UTILITY::Unidirectional_broadcast<float>(tensor_conv0bias, oldShape, newShape);
66-
int length = TMVA::Experimental::SOFIE::ConvertShapeToLength(newShape);
6768
for (int i = 0; i < 1 ; i++)
68-
std::copy(newData_ptr, newData_ptr + length , tensor_conv0biasbcast + i * length);
69+
std::copy(newData_ptr, newData_ptr + 20000, tensor_conv0biasbcast + i * 20000);
70+
delete [] newData_ptr;
6971
}
7072
}
7173

7274
std::vector<float> infer(float* tensor_input){
7375

7476
//---- operator Conv op_0
75-
float * op_0_f = fVec_op_0_f.data();
76-
for (std::size_t k = 0; k < 2; k++) {
77-
for (std::size_t d = 0; d < 1; d++) {
78-
for (std::size_t h = 0; h < 5; h++) {
79-
for (std::size_t w = 0; w < 5; w++) {
80-
op_0_f[k * 25 + d * 25 + h * 5 + w * 1 ] = tensor_conv0weight[k * 25 + d * 25 + h * 5 + w ];
81-
}
82-
}
83-
}
84-
}
85-
float * op_0_xpad = fVec_op_0_xpad.data();
86-
for (size_t c = 0; c < 1; c++) {
87-
for (size_t h = 0; h < 100; h++) {
88-
size_t xpad_offset = c * 10816 + (h + 2) * 104 + 2;
89-
size_t x_offset = c * 10000 + h * 100;
90-
std::copy(tensor_input + x_offset, tensor_input + x_offset + 100, op_0_xpad + xpad_offset);
91-
}
92-
}
93-
char op_0_transA = 'T';
94-
char op_0_transB = 'N';
95-
int op_0_m = 10000;
96-
int op_0_n = 2;
97-
int op_0_k = 25;
98-
float op_0_alpha = 1.0;
99-
float op_0_beta = 0.0;
100-
float * op_0_xcol = fVec_op_0_xcol.data();
101-
for (size_t n = 0; n < 1; n++) {
102-
size_t op_0_index = 0;
103-
size_t offseth = 0;
104-
for (size_t h = 0; h < 100; h += 1) {
105-
for (size_t w = 0; w < 100;w += 1) {
106-
size_t offsetc = 0;
107-
for (size_t c = 0; c < 1; c++) {
108-
size_t offsetx = 0;
109-
for (size_t x = 0; x < 5; x++) {
110-
size_t offset = offsetc + offseth + offsetx + w;
111-
std::copy(op_0_xpad + offset, op_0_xpad + offset + 5, op_0_xcol + op_0_index);
112-
op_0_index += 5;
113-
offsetx += 104;
114-
}
115-
offsetc += 10816;
116-
}
117-
}
118-
offseth += 104;
119-
}
120-
BLAS::sgemm_(&op_0_transA, &op_0_transB, &op_0_m, &op_0_n, &op_0_k, &op_0_alpha, op_0_xcol, &op_0_k,
121-
op_0_f, &op_0_k, &op_0_beta, tensor_3, &op_0_m);
122-
}
123-
int op_0_size = 20000;
124-
float op_0_gamma = 1.0;
125-
int op_0_incx = 1;
126-
int op_0_incy = 1;
127-
BLAS::saxpy_(&op_0_size, &op_0_gamma, tensor_conv0biasbcast, &op_0_incx, tensor_3, &op_0_incy);
128-
for (int id = 0; id < 20000 ; id++){
129-
tensor_4[id] = ((tensor_3[id] > 0 )? tensor_3[id] : 0);
130-
}
77+
float * op_0_f = fVec_op_0_f.data();
78+
for (std::size_t k = 0; k < 2; k++) {
79+
for (std::size_t d = 0; d < 1; d++) {
80+
for (std::size_t h = 0; h < 5; h++) {
81+
for (std::size_t w = 0; w < 5; w++) {
82+
op_0_f[k * 25 + d * 25 + h * 5 + w * 1 ] = tensor_conv0weight[k * 25 + d * 25 + h * 5 + w ];
83+
}
84+
}
85+
}
86+
}
87+
char op_0_transA = 'T';
88+
char op_0_transB = 'N';
89+
int op_0_m = 10000;
90+
int op_0_n = 2;
91+
int op_0_k = 25;
92+
float op_0_alpha = 1.0;
93+
float op_0_beta = 0.0;
94+
float * op_0_xpad = fVec_op_0_xpad.data();
95+
float * op_0_xcol = fVec_op_0_xcol.data();
96+
size_t offset_tensor_3 = 0;
97+
for (size_t n = 0; n < 1; n++) {
98+
for (size_t c = 0; c < 1; c++) {
99+
for (size_t h = 0; h < 100; h++) {
100+
size_t xpad_offset = c * 10816 + (h + 2) * 104 + 2;
101+
size_t x_offset = c * 10000 + h * 100;
102+
std::copy(tensor_input + x_offset, tensor_input + x_offset + 100, op_0_xpad + xpad_offset);
103+
}
104+
}
105+
size_t op_0_index = 0;
106+
for (size_t h = 0; h < 100; h += 1) {
107+
for (size_t w = 0; w < 100;w += 1) {
108+
for (size_t c = 0; c < 1; c++) {
109+
for (size_t x = 0; x < 5; x++) {
110+
size_t offset = c * 10816 + (h + x) * 104 + w;
111+
std::copy(op_0_xpad + offset, op_0_xpad + offset + 5, op_0_xcol + op_0_index);
112+
op_0_index += 5;
113+
}
114+
}
115+
}
116+
}
117+
BLAS::sgemm_(&op_0_transA, &op_0_transB, &op_0_m, &op_0_n, &op_0_k, &op_0_alpha, op_0_xcol, &op_0_k,
118+
op_0_f, &op_0_k, &op_0_beta, tensor_3 + offset_tensor_3, &op_0_m);
119+
offset_tensor_3 += 20000;
120+
}
121+
int op_0_size = 20000;
122+
float op_0_gamma = 1.0;
123+
int op_0_incx = 1;
124+
int op_0_incy = 1;
125+
BLAS::saxpy_(&op_0_size, &op_0_gamma, tensor_conv0biasbcast, &op_0_incx, tensor_3, &op_0_incy);
126+
127+
//------ RELU
128+
for (int id = 0; id < 20000 ; id++){
129+
tensor_4[id] = ((tensor_3[id] > 0 )? tensor_3[id] : 0);
130+
}
131131
std::vector<float> ret (tensor_4, tensor_4 + 20000);
132132
return ret;
133133
}
44.7 MB
Binary file not shown.

0 commit comments

Comments
 (0)