Add resnet to benchmarks

lmoneta · lmoneta · commit 09ca8446e1f2 · 2022-03-08T10:35:47.000+01:00
Use also a given number of events (e.g. 64)
diff --git a/root/tmva/sofie/CMakeLists.txt b/root/tmva/sofie/CMakeLists.txt
@@ -117,6 +117,9 @@ if(ROOT_tmva_FOUND AND ROOT_tmva-sofie_FOUND)
   #use file B1 as B32 for weights : it is the same
   configure_file(input_models/compiled/Conv_d100_L14_B32.dat Conv_d100_L14_B32.dat COPYONLY)
 
+  configure_file(input_models/compiled/resnet18v1.hxx resnet18v1.hxx COPYONLY)
+  configure_file(input_models/compiled/resnet18v1.dat resnet18v1.dat COPYONLY)
+
   # Benchmark for models emitted by SOFIE
   RB_ADD_GBENCHMARK(SOFIEInference
     SOFIEInference.cxx
diff --git a/root/tmva/sofie/ONNXRuntimeInference_Template.cxx.in b/root/tmva/sofie/ONNXRuntimeInference_Template.cxx.in
@@ -38,10 +38,14 @@ static void @FUNC_NAME@(benchmark::State& state, string model_path)
       .GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();
 
    // Calculating the dimension of the input tensor
+   int nevt = 64;
+   int bsize = input_node_dims[0];
+   //std::cout << "Using bsize = " << bsize << std::endl;
+   int nbatches = nevt / bsize;
 
    size_t input_tensor_size = accumulate(input_node_dims.begin(),
       input_node_dims.end(), 1, multiplies<int>());
-   vector<float> input_tensor_values(input_tensor_size);
+   vector<float> input_tensor_values(input_tensor_size*nbatches);
 
    // Input tensor initialization   
    static std::uniform_real_distribution<float> distribution(-1,1);
@@ -56,10 +60,15 @@ static void @FUNC_NAME@(benchmark::State& state, string model_path)
 
    // Running the model
    float * floatarr = nullptr;
+
+  
+   
    for (auto _ : state) {
-      auto output_tensors = session.Run(Ort::RunOptions{nullptr}, input_node_names.data(),
-         &input_tensor, 1, output_node_names.data(), 1);
+      for (int i = 0; i < nevt; i+= bsize) {
+         auto output_tensors = session.Run(Ort::RunOptions{nullptr}, input_node_names.data(), &input_tensor, 1,
+                                           output_node_names.data(), 1);
          floatarr = output_tensors.front().GetTensorMutableData<float>();
+      }
    }
    //for (int i = 0; i < 10; i++)
    //  printf("%f\t", i, floatarr[i]);
diff --git a/root/tmva/sofie/SOFIEInference.cxx b/root/tmva/sofie/SOFIEInference.cxx
@@ -18,21 +18,22 @@
 #include "Conv_d100_L1_B1.hxx"
 #include "Conv_d100_L14_B1.hxx"
 #include "Conv_d100_L14_B32.hxx"
-//#include "input_models/compiled/Linear_16.hxx"
-///#include "input_models/compiled/Linear_32.hxx"
-//#include "input_models/compiled/Linear_64.hxx"
-//#include "input_models/compiled/Linear_event_blas.hxx" // old file 
-//#include "input_models/compiled/Linear_event.hxx"   // generated from Linear_event.onnx
-//#include "input_models/compiled/Conv_d100_L1_B1_opt.hxx"
-//#include "input_models/compiled/Conv_d100_L14_B1.hxx"
+
+#include "resnet18v1.hxx"
+
+
 
 using namespace std;
 bool verbose = false; 
 template <class S>
 void BM_SOFIE_Inference(benchmark::State &state)
 { 
    size_t inputSize = state.range(0);
-   vector<float> input(inputSize);
+   size_t bsize = (state.range(1) > 0) ? state.range(1) : 0;
+   size_t nevts = 64;
+   size_t nrep = nevts / bsize;
+
+   vector<float> input(inputSize*nevts);
 
    static std::uniform_real_distribution<float> distribution(-1, 1);
    static std::default_random_engine generator;
@@ -42,18 +43,21 @@ void BM_SOFIE_Inference(benchmark::State &state)
    S s("");
 
    for (auto _ : state) {
-      s.infer(input.data());
+      for (int i = 0; i < nevts; i+= bsize)
+         s.infer(input.data()+ inputSize*i);
     }
     //if (verbose) std::cout << "output : " << output.size() << " : " << output.front() << " ......" << output.back() << std::endl;
 }
 //typedef TMVA_SOFIE_Conv_d100_L1_B1::Session S1;
 //BENCHMARK(BM_SOFIE_Inference<S1>);//->Name( "Conv_d100_L1_B1");
-BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Conv_d100_L1_B1::Session)->Name( "Conv_d100_L1_B1")->Args({100*100})->Unit(benchmark::kMillisecond);
-BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Conv_d100_L14_B1::Session)->Name( "Conv_d100_L14_B1")->Args({100*100})->Unit(benchmark::kMillisecond);
-BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Conv_d100_L14_B32::Session)->Name("Conv_d100_L14_B32")->Args({100*100*32})->Unit(benchmark::kMillisecond);
+BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Conv_d100_L1_B1::Session)->Name( "Conv_d100_L1_B1")->Args({100*100, 1})->Unit(benchmark::kMillisecond);
+BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Conv_d100_L14_B1::Session)->Name( "Conv_d100_L14_B1")->Args({100*100, 1})->Unit(benchmark::kMillisecond);
+BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Conv_d100_L14_B32::Session)->Name("Conv_d100_L14_B32")->Args({100*100, 32})->Unit(benchmark::kMillisecond);
+
+BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_resnet18v1::Session)->Name("resnet18v1")->Args({3 * 224 * 224, 1})->Unit(benchmark::kMillisecond);
 //Gemm benchmarks
-BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Linear_event::Session)->Name("Linear_event")->Args({100})->Unit(benchmark::kMillisecond);
-BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Linear_16::Session)->Name("Linear_16")->Args({100*16})->Unit(benchmark::kMillisecond);
-BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Linear_32::Session)->Name("Linear_32")->Args({100*32})->Unit(benchmark::kMillisecond);
-BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Linear_64::Session)->Name("Linear_64")->Args({100*64})->Unit(benchmark::kMillisecond);
+BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Linear_event::Session)->Name("Linear_event")->Args({100, 1})->Unit(benchmark::kMillisecond);
+BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Linear_16::Session)->Name("Linear_16")->Args({100, 16})->Unit(benchmark::kMillisecond);
+BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Linear_32::Session)->Name("Linear_32")->Args({100, 32})->Unit(benchmark::kMillisecond);
+BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Linear_64::Session)->Name("Linear_64")->Args({100, 64})->Unit(benchmark::kMillisecond);
 BENCHMARK_MAIN();
diff --git a/root/tmva/sofie/input_models/compiled/Conv_d100_L1_B1.hxx b/root/tmva/sofie/input_models/compiled/Conv_d100_L1_B1.hxx
@@ -1,4 +1,4 @@
-//Code generated automatically by TMVA for Inference of Model file [Conv_d100_L1_B1.onnx] at [Mon Nov  8 14:01:59 2021] 
+//Code generated automatically by TMVA for Inference of Model file [Conv_d100_L1_B1.onnx] at [Sat Nov 20 10:46:47 2021] 
 #include<vector>
 #include "TMVA/SOFIE_common.hxx"
 #include <fstream>
@@ -12,12 +12,14 @@ namespace BLAS{
 	                       const float * beta, float * C, const int * ldc);
 }//BLAS
 struct Session {
-float tensor_conv0bias[2] = {};
-float tensor_conv0weight[50] = {};
-std::vector<float> fTensor_conv0biasbcast = std::vector<float>(20000);
-float * tensor_conv0biasbcast = fTensor_conv0biasbcast.data();
+std::vector<float> fTensor_conv0bias = std::vector<float>(2);
+float * tensor_conv0bias = fTensor_conv0bias.data();
+std::vector<float> fTensor_conv0weight = std::vector<float>(50);
+float * tensor_conv0weight = fTensor_conv0weight.data();
 std::vector<float> fTensor_4 = std::vector<float>(20000);
 float * tensor_4 = fTensor_4.data();
+std::vector<float> fTensor_conv0biasbcast = std::vector<float>(20000);
+float * tensor_conv0biasbcast = fTensor_conv0biasbcast.data();
 std::vector<float> fTensor_3 = std::vector<float>(20000);
 float * tensor_3 = fTensor_3.data();
 
@@ -37,23 +39,23 @@ Session(std::string filename ="") {
    int length;
    f >> tensor_name >> length;
    if (tensor_name != "tensor_conv0bias" ) {
-      std::cout << "Error in tensor name : expected tensor name is tensor_conv0bias read " << tensor_name << std::endl;
-      throw std::runtime_error("tmva-sofie failed to read the correct tensor name");
+      std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_conv0bias , read " + tensor_name;
+      throw std::runtime_error(err_msg);
     }
    if (length != 2) {
-      std::cout << "Error in tensor size : expected tensor size is 2 read " << length << std::endl;
-      throw std::runtime_error("tmva-sofie failed to read the correct tensor size");
+      std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 2 , read " + std::to_string(length) ;
+      throw std::runtime_error(err_msg);
     }
     for (int i =0; i < length; ++i) 
        f >> tensor_conv0bias[i];
    f >> tensor_name >> length;
    if (tensor_name != "tensor_conv0weight" ) {
-      std::cout << "Error in tensor name : expected tensor name is tensor_conv0weight read " << tensor_name << std::endl;
-      throw std::runtime_error("tmva-sofie failed to read the correct tensor name");
+      std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_conv0weight , read " + tensor_name;
+      throw std::runtime_error(err_msg);
     }
    if (length != 50) {
-      std::cout << "Error in tensor size : expected tensor size is 50 read " << length << std::endl;
-      throw std::runtime_error("tmva-sofie failed to read the correct tensor size");
+      std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 50 , read " + std::to_string(length) ;
+      throw std::runtime_error(err_msg);
     }
     for (int i =0; i < length; ++i) 
        f >> tensor_conv0weight[i];
@@ -63,71 +65,69 @@ Session(std::string filename ="") {
       std::vector<size_t> newShape = { 2, 100, 100};
       oldShape.resize(newShape.size(), 1.);
       float * newData_ptr = TMVA::Experimental::SOFIE::UTILITY::Unidirectional_broadcast<float>(tensor_conv0bias, oldShape, newShape);
-      int length = TMVA::Experimental::SOFIE::ConvertShapeToLength(newShape);
       for (int i = 0; i < 1 ; i++)
-         std::copy(newData_ptr, newData_ptr + length , tensor_conv0biasbcast + i * length);
+         std::copy(newData_ptr, newData_ptr + 20000, tensor_conv0biasbcast + i * 20000);
+      delete [] newData_ptr;
    }
 }
 
 std::vector<float> infer(float* tensor_input){
 
 //----  operator Conv op_0
-	float * op_0_f = fVec_op_0_f.data();
-	for (std::size_t k = 0; k < 2; k++) {
-		for (std::size_t d = 0; d < 1; d++) {
-			for (std::size_t h = 0; h < 5; h++) {
-				for (std::size_t w = 0; w < 5; w++) {
-					op_0_f[k * 25 + d * 25 + h * 5 + w * 1  ] = tensor_conv0weight[k * 25 + d * 25 + h * 5 + w ];
-				}
-			}
-		}
-	}
-	float * op_0_xpad = fVec_op_0_xpad.data();
-	for (size_t c = 0; c < 1; c++) {
-		for (size_t h = 0; h < 100; h++) {
-			size_t xpad_offset = c * 10816 + (h + 2) * 104 + 2;
-			size_t x_offset = c * 10000 + h * 100;
-			std::copy(tensor_input + x_offset, tensor_input + x_offset + 100, op_0_xpad + xpad_offset);
-		}
-	}
-	char op_0_transA = 'T';
-	char op_0_transB = 'N';
-	int op_0_m = 10000;
-	int op_0_n = 2;
-	int op_0_k = 25;
-	float op_0_alpha = 1.0;
-	float op_0_beta = 0.0;
-	float * op_0_xcol = fVec_op_0_xcol.data();
-	for (size_t n = 0; n < 1; n++) {
-		size_t op_0_index = 0;
-                size_t offseth = 0;
-		for (size_t h = 0; h < 100; h += 1) {
-			for (size_t w = 0; w < 100;w += 1) {
-                                size_t offsetc = 0; 
-				for (size_t c = 0; c < 1; c++) {
-                                   size_t offsetx = 0;
-					for (size_t x = 0; x < 5; x++) {
-					size_t offset = offsetc + offseth + offsetx  + w;
-					std::copy(op_0_xpad + offset, op_0_xpad + offset + 5, op_0_xcol + op_0_index);
-					op_0_index += 5;
-                                        offsetx += 104;
-					}
-                                        offsetc += 10816;
-				}
-			}
-                        offseth += 104;
-		}
-		BLAS::sgemm_(&op_0_transA, &op_0_transB, &op_0_m, &op_0_n, &op_0_k, &op_0_alpha, op_0_xcol, &op_0_k,
-			op_0_f, &op_0_k, &op_0_beta, tensor_3, &op_0_m);
-	}
-	int op_0_size = 20000;
-	float op_0_gamma = 1.0;
-	int op_0_incx = 1;
-	int op_0_incy = 1;
-	BLAS::saxpy_(&op_0_size, &op_0_gamma, tensor_conv0biasbcast, &op_0_incx, tensor_3, &op_0_incy);
-	for (int id = 0; id < 20000 ; id++){
-		tensor_4[id] = ((tensor_3[id] > 0 )? tensor_3[id] : 0);
-	}
+   float * op_0_f = fVec_op_0_f.data();
+   for (std::size_t k = 0; k < 2; k++) {
+      for (std::size_t d = 0; d < 1; d++) {
+         for (std::size_t h = 0; h < 5; h++) {
+            for (std::size_t w = 0; w < 5; w++) {
+               op_0_f[k * 25 + d * 25 + h * 5 + w * 1  ] = tensor_conv0weight[k * 25 + d * 25 + h * 5 + w ];
+            }
+         }
+      }
+   }
+   char op_0_transA = 'T';
+   char op_0_transB = 'N';
+   int op_0_m = 10000;
+   int op_0_n = 2;
+   int op_0_k = 25;
+   float op_0_alpha = 1.0;
+   float op_0_beta = 0.0;
+   float * op_0_xpad = fVec_op_0_xpad.data();
+   float * op_0_xcol = fVec_op_0_xcol.data();
+   size_t offset_tensor_3 = 0;
+   for (size_t n = 0; n < 1; n++) {
+      for (size_t c = 0; c < 1; c++) {
+         for (size_t h = 0; h < 100; h++) {
+            size_t xpad_offset = c * 10816 + (h + 2) * 104 + 2;
+            size_t x_offset = c * 10000 + h * 100;
+            std::copy(tensor_input + x_offset, tensor_input + x_offset + 100, op_0_xpad + xpad_offset);
+         }
+      }
+      size_t op_0_index = 0;
+      for (size_t h = 0; h < 100; h += 1) {
+         for (size_t w = 0; w < 100;w += 1) {
+            for (size_t c = 0; c < 1; c++) {
+               for (size_t x = 0; x < 5; x++) {
+               size_t offset =  c * 10816 + (h + x) * 104 + w;
+               std::copy(op_0_xpad + offset, op_0_xpad + offset + 5, op_0_xcol + op_0_index);
+               op_0_index += 5;
+               }
+            }
+         }
+      }
+      BLAS::sgemm_(&op_0_transA, &op_0_transB, &op_0_m, &op_0_n, &op_0_k, &op_0_alpha, op_0_xcol, &op_0_k,
+         op_0_f, &op_0_k, &op_0_beta, tensor_3 + offset_tensor_3, &op_0_m);
+      offset_tensor_3 += 20000;
+   }
+   int op_0_size = 20000;
+   float op_0_gamma = 1.0;
+   int op_0_incx = 1;
+   int op_0_incy = 1;
+   BLAS::saxpy_(&op_0_size, &op_0_gamma, tensor_conv0biasbcast, &op_0_incx, tensor_3, &op_0_incy);
+
+//------ RELU
+   for (int id = 0; id < 20000 ; id++){
+      tensor_4[id] = ((tensor_3[id] > 0 )? tensor_3[id] : 0);
+   }
 	std::vector<float> ret (tensor_4, tensor_4 + 20000);
 	return ret;
 }
diff --git a/root/tmva/sofie/input_models/resnet18v1.onnx b/root/tmva/sofie/input_models/resnet18v1.onnx