[tmva][sofie] Add in RModel support for order input tensor names and generic batch size

lmoneta · lmoneta · commit 52343be24938 · 2022-04-04T10:35:15.000+02:00
In case of multiple inputs it is important to have an order of the input tensors that will be used in the infer function.

Add a new data member keeping a vector of string with the input names. The vector is filled by the parsers and eventually can be modified by the user calling
AddVectorNames.

Support also the generic batch size converting in the INitializa function the parameter N to the specified batch size (by default value=1 is used)

Modify also Keras and PyTorch parsers for new changes.
ONNX parser fix arein the following commit
diff --git a/tmva/pymva/src/RModelParser_Keras.cxx b/tmva/pymva/src/RModelParser_Keras.cxx
@@ -570,6 +570,7 @@ RModel Parse(std::string filename){
          // Getting the shape vector from the Tuple object
          std::vector<size_t>fInputShape = GetDataFromTuple(fPInputShapes);
          rmodel.AddInputTensorInfo(fInputName, ETensorType::FLOAT, fInputShape);
+         rmodel.AddInputTensorName(fInputName);
          break;
          }
 
@@ -597,6 +598,7 @@ RModel Parse(std::string filename){
 
          std::vector<size_t>fInputShape = GetDataFromTuple(fInputShapeTuple);
          rmodel.AddInputTensorInfo(fInputName, ETensorType::FLOAT, fInputShape);
+         rmodel.AddInputTensorName(fInputName);
          break;
          }
 
diff --git a/tmva/pymva/src/RModelParser_PyTorch.cxx b/tmva/pymva/src/RModelParser_PyTorch.cxx
@@ -517,6 +517,7 @@ RModel Parse(std::string filename, std::vector<std::vector<size_t>> inputShapes,
         switch(fInputDType){
             case(ETensorType::FLOAT): {
                 rmodel.AddInputTensorInfo(fInputName, ETensorType::FLOAT, fInputShape);
+                rmodel.AddInputTensorName(fInputName);
                 break;
             }
             default:
diff --git a/tmva/sofie/inc/TMVA/RModel.hxx b/tmva/sofie/inc/TMVA/RModel.hxx
@@ -28,6 +28,7 @@ private:
    std::unordered_map<std::string, InitializedTensor> fInitializedTensors;
    std::unordered_map<std::string, TensorInfo> fIntermediateTensorInfos;
    std::vector<std::string> fOutputTensorNames;
+   std::vector<std::string> fInputTensorNames;  //input tensor names using ONNX order
 
    std::vector<std::unique_ptr<ROperator>> fOperators;
 
@@ -79,13 +80,14 @@ public:
          fNeededStdLib.insert(libname);
       }
    }
+   void AddInputTensorName(std::string name);
    void AddOutputTensorNameList(std::vector<std::string> outputtensornames);
    void UpdateInitializedTensor(std::string tensor_name, ETensorType type, std::vector<std::size_t> shape, std::shared_ptr<void> data);
    std::shared_ptr<void> GetInitializedTensorData(std::string tensor_name);
 
 
-   void Initialize();
-   void Generate(bool useSession = true, bool useWeightFile = true);
+   void Initialize(int batchSize=1);
+   void Generate(bool useSession = true, bool useWeightFile = true, int batchSize = 1);
 
    void ReadInitializedTensorsFromFile();
    void WriteInitializedTensorsToFile(std::string filename = "");
diff --git a/tmva/sofie/src/RModel.cxx b/tmva/sofie/src/RModel.cxx
@@ -13,6 +13,7 @@ namespace SOFIE{
       fInputTensorInfos = std::move(other.fInputTensorInfos);
       fReadyInputTensorInfos = std::move(other.fReadyInputTensorInfos);
       fOutputTensorNames = other.fOutputTensorNames;
+      fInputTensorNames = other.fInputTensorNames;
       fOperators = std::move(other.fOperators);
       fInitializedTensors = std::move(other.fInitializedTensors);
       fIntermediateTensorInfos = std::move(other.fIntermediateTensorInfos);
@@ -28,6 +29,7 @@ namespace SOFIE{
       fInputTensorInfos = std::move(other.fInputTensorInfos);
       fReadyInputTensorInfos = std::move(other.fReadyInputTensorInfos);
       fOutputTensorNames = other.fOutputTensorNames;
+      fInputTensorNames = other.fInputTensorNames;
       fOperators = std::move(other.fOperators);
       fInitializedTensors = std::move(other.fInitializedTensors);
       fIntermediateTensorInfos = std::move(other.fIntermediateTensorInfos);
@@ -83,7 +85,7 @@ namespace SOFIE{
          return f4->second.type;
       }
 
-      throw std::runtime_error("TMVA SOFIE tensor [" + name + "] for which the shape is requested is not found");
+      throw std::runtime_error("TMVA SOFIE tensor [" + name + "] for which the type is requested is not found");
    }
 
    bool RModel::CheckIfTensorAlreadyExist(std::string tensor_name){
@@ -112,6 +114,10 @@ namespace SOFIE{
       fReadyInputTensorInfos[input_name] = inputInfo;
    }
 
+   void RModel::AddInputTensorName(std::string input_name) {
+       fInputTensorNames.push_back(input_name);
+   }
+
    void RModel::AddOperator(std::unique_ptr<ROperator> op, int order_execution){
       if (order_execution >= 0) {
          fOperators.insert(fOperators.begin() + order_execution, std::move(op));
@@ -164,15 +170,36 @@ namespace SOFIE{
       }
    }
 
-   void RModel::Initialize(){
+   void RModel::Initialize(int batchSize){
+      // check if there are only parametrized input tensor and convert in
+      // ready input tensor according to batch size
+      // convert parametric shape to a dimensional shape
+      if (fReadyInputTensorInfos.size() != fInputTensorNames.size()) {
+         if ( fReadyInputTensorInfos.size() + fInputTensorInfos.size() != fInputTensorNames.size())
+            throw std::runtime_error("TMVA-SOFIE: RModel::Initializes: invalid inputs");
+         for (auto & input : fInputTensorInfos) {
+            std::vector<size_t> shape;
+            shape.reserve(input.second.shape.size());
+            for (auto & d : input.second.shape){
+               if (d.isParam)
+                  shape.push_back(batchSize);
+               else
+                  shape.push_back(d.dim);
+            }
+            AddInputTensorInfo(input.first, input.second.type, shape);
+         }
+      }
+
+
       for (auto& i : fOperators){
+         //std::cout << "initialize operator  " << typeid(*i).name() << std::endl;
          i->Initialize(*this);
       }
    }
 
-   void RModel::Generate(bool useSession, bool useWeightFile){
+   void RModel::Generate(bool useSession, bool useWeightFile, int batchSize){
       fUseSession = useSession;  // session flag is used in operator initialize
-      Initialize();
+      Initialize(batchSize);
       fGC += ("//Code generated automatically by TMVA for Inference of Model file [" + fFileName + "] at [" + fParseTime.substr(0, fParseTime.length()-1) +"] \n");
       for (auto& i: fNeededStdLib) {
          fGC += "#include<" + i + ">\n";
@@ -182,7 +209,7 @@ namespace SOFIE{
       fGC += "#include \"TMVA/SOFIE_common.hxx\"\n";
       if (useWeightFile)
          fGC += "#include <fstream>\n";
-      
+
       fGC += "\nnamespace TMVA_SOFIE_" + fName + "{\n";
       if (!fNeededBlasRoutines.empty()) {
          fGC += ("namespace BLAS{\n");
@@ -227,7 +254,7 @@ namespace SOFIE{
                fGC += "std::vector<float> fTensor_" + i.first + " = std::vector<float>(" + std::to_string(length) + ");\n";
                fGC += "float * tensor_" + i.first + " = fTensor_" + i.first + ".data();\n";
             }
-          
+
          }
       }
       for (auto&i: fIntermediateTensorInfos){
@@ -309,16 +336,16 @@ namespace SOFIE{
       }
       if (outputSize == 1) {
          size_t outputLength = ConvertShapeToLength(GetTensorShape(fOutputTensorNames[0]));
-         
-         fGC += "\tstd::vector<float> ret (tensor_" + fOutputTensorNames[0] + ", tensor_" + fOutputTensorNames[0] + " + " + 
+
+         fGC += "\tstd::vector<float> ret (tensor_" + fOutputTensorNames[0] + ", tensor_" + fOutputTensorNames[0] + " + " +
                std::to_string(outputLength) + ");\n";
       } else {
          for (size_t i = 0; i < outputSize; i++) {
             if (!fOutputTensorNames[i].empty()) {
                size_t outputLength = ConvertShapeToLength(GetTensorShape(fOutputTensorNames[i]));
                fGC += "\tstd::vector<float> ret_";
                fGC += std::to_string(i);
-               fGC += " (tensor_" + fOutputTensorNames[i] + ", tensor_" + fOutputTensorNames[i] + " + " + 
+               fGC += " (tensor_" + fOutputTensorNames[i] + ", tensor_" + fOutputTensorNames[i] + " + " +
                std::to_string(outputLength) + ");\n";
             }
          }
@@ -353,7 +380,7 @@ namespace SOFIE{
       fGC += "   }\n";
       fGC += "   std::string tensor_name;\n";
       fGC += "   int length;\n";
-      
+
       //loop on tensors and parse the file
       for (auto& i: fInitializedTensors){
          if (i.second.fType == ETensorType::FLOAT){
@@ -370,7 +397,7 @@ namespace SOFIE{
             fGC += "      throw std::runtime_error(err_msg);\n";
             fGC += "    }\n";
             fGC += "   if (length != " + slength + ") {\n";
-            fGC += "      std::string err_msg = \"TMVA-SOFIE failed to read the correct tensor size; expected size is " + 
+            fGC += "      std::string err_msg = \"TMVA-SOFIE failed to read the correct tensor size; expected size is " +
                    slength + " , read \" + std::to_string(length) ;\n";
             fGC += "      throw std::runtime_error(err_msg);\n";
             fGC += "    }\n";
@@ -382,7 +409,7 @@ namespace SOFIE{
    }
 
    void RModel::WriteInitializedTensorsToFile(std::string filename) {
-      // write the initialized tensors in a text file 
+      // write the initialized tensors in a text file
       if (filename == ""){
          filename = fName + ".data";
       }

Original file line number	Diff line number	Diff line change
`@@ -570,6 +570,7 @@ RModel Parse(std::string filename){`
`570`	`570`	`// Getting the shape vector from the Tuple object`
`571`	`571`	`std::vector<size_t>fInputShape = GetDataFromTuple(fPInputShapes);`
`572`	`572`	`rmodel.AddInputTensorInfo(fInputName, ETensorType::FLOAT, fInputShape);`
	`573`	`+ rmodel.AddInputTensorName(fInputName);`
`573`	`574`	`break;`
`574`	`575`	`}`
`575`	`576`
`@@ -597,6 +598,7 @@ RModel Parse(std::string filename){`
`597`	`598`
`598`	`599`	`std::vector<size_t>fInputShape = GetDataFromTuple(fInputShapeTuple);`
`599`	`600`	`rmodel.AddInputTensorInfo(fInputName, ETensorType::FLOAT, fInputShape);`
	`601`	`+ rmodel.AddInputTensorName(fInputName);`
`600`	`602`	`break;`
`601`	`603`	`}`
`602`	`604`
Original file line number	Diff line number	Diff line change
`@@ -517,6 +517,7 @@ RModel Parse(std::string filename, std::vector<std::vector<size_t>> inputShapes,`
`517`	`517`	`switch(fInputDType){`
`518`	`518`	`case(ETensorType::FLOAT): {`
`519`	`519`	`rmodel.AddInputTensorInfo(fInputName, ETensorType::FLOAT, fInputShape);`
	`520`	`+ rmodel.AddInputTensorName(fInputName);`
`520`	`521`	`break;`
`521`	`522`	`}`
`522`	`523`	`default:`