Add LWTNN inference test and Fast simulation model from G4

lmoneta · lmoneta · commit 44ccf6de8959 · 2022-03-09T15:59:13.000+01:00
Add json file for Generator model
diff --git a/root/tmva/sofie/CMakeLists.txt b/root/tmva/sofie/CMakeLists.txt
@@ -202,9 +202,17 @@ if (LWTNN_FOUND)
   
    message(STATUS "Found LWTNN (library is  ${LWTNN_LIBRARY}, libraries ${LWTNN_LIBRARIES})")
    configure_file(input_models/higgs_model_dense.json higgs_model_dense.json COPYONLY)
+   configure_file(input_models/Generator.json.gz Generator.json.gz COPYONLY)
+   execute_process(COMMAND gunzip -f ${CMAKE_CURRENT_BINARY_DIR}/Generator.json.gz)
 #   set(LWTNN_INCLUDE_DIR /home/moneta/cernbox/root/tests/tmva/sofie/lwtnn-build/include)
 #   set(LWTNN_LIBS /home/moneta/cernbox/root/tests/tmva/sofie/lwtnn-build/lib/liblwtnn.so)
-   RB_ADD_GBENCHMARK(RDF_lwtnn_Inference
+   RB_ADD_GBENCHMARK(LWTNNInference
+       LWTNNInference.cxx
+      LABEL short
+      LIBRARIES Core Hist Imt RIO Tree TreePlayer ROOTDataFrame ROOTVecOps TMVA  ROOTTMVASofie  ${LWTNN_LIBRARY})
+   target_include_directories(LWTNNInference PRIVATE ${LWTNN_INCLUDE_DIR})
+
+    RB_ADD_GBENCHMARK(RDF_lwtnn_Inference
        RDF_lwtnn_Inference.cxx
       LABEL short
       LIBRARIES Core Hist Imt RIO Tree TreePlayer ROOTDataFrame ROOTVecOps TMVA  ROOTTMVASofie  ${LWTNN_LIBRARY})
diff --git a/root/tmva/sofie/LWTNNInference.cxx b/root/tmva/sofie/LWTNNInference.cxx
@@ -0,0 +1,148 @@
+// Author: Federico Sossai (fsossai), 2021
+
+#include <benchmark/benchmark.h>
+
+#include <iostream>
+#include <thread>
+#include <chrono>
+#include <utility>
+#include <vector>
+#include <memory>
+#include <functional>
+#include <random>
+#include <fstream>
+#include <stdlib.h>
+
+#include "TMath.h"
+
+#include "lwtnn/LightweightNeuralNetwork.hh"
+#include "lwtnn/LightweightGraph.hh"
+#include "lwtnn/parse_json.hh"
+
+bool verbose = false;
+
+void BM_LWTNN_Inference_model(benchmark::State &state, std::string model_name, size_t inputSize, size_t outputSize = 1)
+{
+
+
+   std::map<std::string, double> inputs;
+   std::vector<std::string> names;
+
+   std::string model_filename = model_name + ".json";
+   std::ifstream config_file(model_filename);
+
+   auto config = lwt::parse_json(config_file);
+
+   // Set up neural network model from config
+   auto model = std::make_unique<lwt::LightweightNeuralNetwork>(config.inputs, config.layers, config.outputs);
+
+   config_file.close();
+
+   // Initialize input
+   //std::cout << "input size is " << config.inputs.size() << std::endl;
+   if (config.inputs.size() != inputSize ) {
+      throw std::runtime_error("Bad input size - it is " + std::to_string(inputSize) + 
+      " and should be " + std::to_string(config.inputs.size()));
+   }
+   for (size_t n = 0; n < inputSize; n++) {
+      inputs[config.inputs.at(n).name] = 0.0;
+      names.push_back(config.inputs.at(n).name);
+   }
+
+
+   // size_t inputSize = state.range(0); // input size (without batch size)
+   size_t bsize = 1; // bsize is always 1 for lwtnn
+   size_t nevts = 64;
+   size_t nrep = nevts / bsize;
+
+   std::vector<float> input(inputSize * nevts);
+
+   static std::uniform_real_distribution<float> distribution(-1, 1);
+   static std::default_random_engine generator;
+   std::generate(input.begin(), input.end(), []() { return distribution(generator); });
+
+   double totDuration = 0;
+   int ntimes = 0;
+   std::vector<float> y(outputSize);
+   for (auto _ : state) {
+      auto t1 = std::chrono::high_resolution_clock::now();
+      for (size_t i = 0; i < nevts; i += bsize) {
+         for (size_t j = 0; j < inputSize; j++)
+            //inputs["node_0"]["variable_" + std::to_string(j)] = input[i * inputSize + j];
+           inputs[names[j]] = input[i * inputSize + j];
+
+         auto outputs = model->compute(inputs);
+         y[0] = outputs.begin()->second;
+         // for (int i = 0; i < outputSize; i++)
+         //    y[i] = outputs["out_" + std::to_string(i)];
+      }
+
+      auto t2 = std::chrono::high_resolution_clock::now();
+      auto duration = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count();
+      totDuration += duration / 1.E3; // in milliseconds
+      ntimes++;
+   }
+
+   state.counters["time/evt(ms)"] = totDuration / double(ntimes * nevts);
+}
+
+void BM_LWTNN_Inference_graph(benchmark::State &state, std::string model_name, size_t inputSize, size_t outputSize = 1 )
+{
+
+   typedef std::map<std::string, std::map<std::string, double>> NetworkInputs;
+   typedef std::map<std::string, double> NetworkOutputs;
+
+   //std::map<std::string, double> inputs;
+   NetworkInputs inputs; 
+   std::vector<std::string> names;
+
+   std::string model_filename = model_name + ".json";
+   std::ifstream config_file(model_filename);
+   auto graph = std::make_unique<lwt::LightweightGraph>(lwt::parse_json_graph(config_file));
+   config_file.close();
+
+ 
+
+   //size_t inputSize = state.range(0); // input size (without batch size)
+   size_t bsize = 1;                  // bsize is always 1 for lwtnn
+   size_t nevts = 64;
+   size_t nrep = nevts / bsize;
+
+   std::vector<float> input(inputSize * nevts);
+
+   static std::uniform_real_distribution<float> distribution(-1, 1);
+   static std::default_random_engine generator;
+   std::generate(input.begin(), input.end(), []() { return distribution(generator); });
+
+
+   double totDuration = 0;
+   int ntimes = 0;
+   std::vector<float> y(outputSize);
+   for (auto _ : state) {
+      auto t1 = std::chrono::high_resolution_clock::now();
+      for (size_t i = 0; i < nevts; i += bsize) {
+         for (size_t j = 0; j < inputSize; j++)
+            inputs["node_0"]["variable_" + std::to_string(j)] = input[i * inputSize + j];
+            // inputs[names[j]] = input[i * inputSize + j];
+
+         auto outputs = graph->compute(inputs);
+         for (int i = 0; i < outputSize; i++)
+            y[i] = outputs["out_" + std::to_string(i)];
+      }
+   
+
+      auto t2 = std::chrono::high_resolution_clock::now();
+      auto duration = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count();
+      totDuration += duration / 1.E3;  // in milliseconds
+      ntimes++;
+   }
+
+   state.counters["time/evt(ms)"] = totDuration / double(ntimes * nevts);
+}
+
+//LWTNN benchmarks 
+// use B<_CAPTURE to pass string of file, second parameter is name of test and is arbitrary
+BENCHMARK_CAPTURE(BM_LWTNN_Inference_model,higgs_model_dense, "higgs_model_dense",7)->Unit(benchmark::kMillisecond);
+BENCHMARK_CAPTURE(BM_LWTNN_Inference_graph, generator, "Generator",14)->Unit(benchmark::kMillisecond);
+
+BENCHMARK_MAIN();
diff --git a/root/tmva/sofie/ONNXRuntimeInference_Template.cxx.in b/root/tmva/sofie/ONNXRuntimeInference_Template.cxx.in
@@ -47,6 +47,7 @@ static void @FUNC_NAME@(benchmark::State& state, string model_path)
    size_t input_tensor_size = accumulate(input_node_dims.begin(),
       input_node_dims.end(), 1, multiplies<int>());
    vector<float> input_tensor_values(input_tensor_size*nbatches);
+   //std::cout << "input tensor size " << input_tensor_size << "  " << input_tensor_values.size() << std::endl;
 
    // Input tensor initialization   
    static std::uniform_real_distribution<float> distribution(-1,1);
@@ -55,9 +56,9 @@ static void @FUNC_NAME@(benchmark::State& state, string model_path)
    //fill_n(input_tensor_values.begin(), input_tensor_size, 1.0);
 
    auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
-   Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info,
-      input_tensor_values.data(), input_tensor_size,
-      input_node_dims.data(), input_node_dims.size());
+   // Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info,
+   //    input_tensor_values.data(), input_tensor_size,
+   //    input_node_dims.data(), input_node_dims.size());
 
    // Running the model
    float * floatarr = nullptr;
@@ -66,10 +67,18 @@ static void @FUNC_NAME@(benchmark::State& state, string model_path)
    int ntimes = 0;
    for (auto _ : state) {
       auto t1 = std::chrono::high_resolution_clock::now();
-      for (int i = 0; i < nevts; i+= bsize) {
+      size_t input_offset = 0;
+      for (int i = 0; i < nevts; i += bsize) {
+         // if (input_offset > input_tensor_values.size()) {
+         //    std::cout << "Error in input size " << i << "  " << nevts << "  " << model_path << std::endl;
+         //    throw std::runtime_error("Bad input size ");
+         // }
+         Ort::Value input_tensor = Ort::Value::CreateTensor<float>(
+            memory_info, input_tensor_values.data()+input_offset, input_tensor_size, input_node_dims.data(), input_node_dims.size());
          auto output_tensors = session.Run(Ort::RunOptions{nullptr}, input_node_names.data(), &input_tensor, 1,
                                            output_node_names.data(), 1);
          floatarr = output_tensors.front().GetTensorMutableData<float>();
+         input_offset += input_tensor_size;
       }
 
       auto t2 = std::chrono::high_resolution_clock::now();
diff --git a/root/tmva/sofie/SOFIEInference.cxx b/root/tmva/sofie/SOFIEInference.cxx
@@ -15,13 +15,16 @@
 #include "Linear_16.hxx"
 #include "Linear_32.hxx"
 #include "Linear_64.hxx"
+#include "Generator_B1.hxx"
+#include "Generator_B64.hxx"
 #include "Conv_d100_L1_B1.hxx"
 #include "Conv_d100_L14_B1.hxx"
 #include "Conv_d100_L14_B32.hxx"
 #include "Conv3d_d32_L4_B1.hxx"
 #include "RNN_d10_L20_h8_B1.hxx"
 #include "GRU_d10_L20_h8_B1.hxx"
 #include "LSTM_d10_L20_h8_B1.hxx"
+#include "higgs_model_dense.hxx"
 
 #include "resnet18v1.hxx"
 #include "TMath.h"
@@ -32,7 +35,7 @@ bool verbose = false;
 template <class S>
 void BM_SOFIE_Inference(benchmark::State &state)
 { 
-   size_t inputSize = state.range(0);
+   size_t inputSize = state.range(0);  // input size (without batch size)
    size_t bsize = (state.range(1) > 0) ? state.range(1) : 0;
    size_t nevts = 64;
    size_t nrep = nevts / bsize;
@@ -74,6 +77,16 @@ void BM_SOFIE_Inference(benchmark::State &state)
 //typedef TMVA_SOFIE_Conv_d100_L1_B1::Session S1;
 //BENCHMARK(BM_SOFIE_Inference<S1>);//->Name( "Conv_d100_L1_B1");
 
+//Gemm benchmarks
+BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Linear_16::Session)->Name("Linear_16")->Args({100, 16})->Unit(benchmark::kMillisecond);
+BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Linear_32::Session)->Name("Linear_32")->Args({100, 32})->Unit(benchmark::kMillisecond);
+BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Linear_64::Session)->Name("Linear_64")->Args({100, 64})->Unit(benchmark::kMillisecond);
+BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Linear_event::Session)->Name("Linear_event")->Args({100, 1})->Unit(benchmark::kMillisecond);
+BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Generator_B1::Session)->Name("Generator_B1")->Args({14, 1})->Unit(benchmark::kMillisecond);
+BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Generator_B64::Session)->Name("Generator_B64")->Args({14, 64})->Unit(benchmark::kMillisecond);
+
+BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_higgs_model_dense::Session)->Name("higgs_model_dense")->Args({7, 1})->Unit(benchmark::kMillisecond);
+
 BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Conv_d100_L14_B1::Session)->Name( "Conv_d100_L14_B1")->Args({100*100, 1})->Unit(benchmark::kMillisecond);
 BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Conv_d100_L14_B32::Session)->Name("Conv_d100_L14_B32")->Args({100*100, 32})->Unit(benchmark::kMillisecond);
 BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Conv_d100_L1_B1::Session)->Name( "Conv_d100_L1_B1")->Args({100*100, 1})->Unit(benchmark::kMillisecond);
@@ -87,12 +100,5 @@ BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_RNN_d10_L20_h8_B1::Session)->N
 BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_GRU_d10_L20_h8_B1::Session)->Name("GRU_d10_L20_h8_B1")->Args({3 * 5, 1})->Unit(benchmark::kMillisecond);
 BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_LSTM_d10_L20_h8_B1::Session)->Name("LSTM_d10_L20_h8_B1")->Args({1 * 1, 1})->Unit(benchmark::kMillisecond);
 
-//Gemm benchmarks
-BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Linear_16::Session)->Name("Linear_16")->Args({100, 16})->Unit(benchmark::kMillisecond);
-BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Linear_32::Session)->Name("Linear_32")->Args({100, 32})->Unit(benchmark::kMillisecond);
-BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Linear_64::Session)->Name("Linear_64")->Args({100, 64})->Unit(benchmark::kMillisecond);
-BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Linear_event::Session)->Name("Linear_event")->Args({100, 1})->Unit(benchmark::kMillisecond);
-BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Linear_event::Session)->Name("Linear_event")->Args({100, 1})->Unit(benchmark::kMillisecond);
-BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Linear_event::Session)->Name("Generator_B1")->Args({14, 1})->Unit(benchmark::kMillisecond);
-BENCHMARK_TEMPLATE(BM_SOFIE_Inference, TMVA_SOFIE_Linear_event::Session)->Name("Generator_B64")->Args({14, 64})->Unit(benchmark::kMillisecond);
+
 BENCHMARK_MAIN();
diff --git a/root/tmva/sofie/input_models/Generator.json.gz b/root/tmva/sofie/input_models/Generator.json.gz