[GSOC][TMVA][SOFIE] Cast ONNX Operator implemented with the corresponding unit tests (root-project#11033)

Neel-Shah-29 · lmoneta · web-flow · commit 2c97479b0eb4 · 2022-08-16T13:00:39.000+02:00
* Cast ONNX Operator implemented with the corresponding unit tests

Added the functionality and support of int input type in Cast ONNX Operator

* Attribute type added to ROperator Cast Class and modified the RModel Parser for supporting different input types

* The functionality and support for other datatypes added for the cast operator and added the support to RModel::Generate method also

* made the required changes related to support for other datatypes

* Extended the support for other datatypes in the infer function

* Required changes made related to support of different datatypes in SOFIE

* Apply various fixes to support different input/output type. This fixes the new Cast operator.
   Several changes are needed for Cast since the input tensor can be of a type different than float
   Apply also a fix for parsing correctly the attribute of Cast

* The attribute fattr_type changed to fAttrType

Co-authored-by: moneta &lt;lorenzo.moneta@cern.ch&gt;
diff --git a/tmva/sofie/CMakeLists.txt b/tmva/sofie/CMakeLists.txt
@@ -33,6 +33,7 @@ ROOT_STANDARD_LIBRARY_PACKAGE(ROOTTMVASofie
    TMVA/ROperator_Concat.hxx
    TMVA/ROperator_Identity.hxx
    TMVA/ROperator_Softmax.hxx
+   TMVA/ROperator_Cast.hxx
    TMVA/SOFIE_common.hxx
    TMVA/SOFIEHelpers.hxx
   SOURCES
diff --git a/tmva/sofie/inc/TMVA/OperatorList.hxx b/tmva/sofie/inc/TMVA/OperatorList.hxx
@@ -18,3 +18,4 @@
 #include "TMVA/ROperator_Identity.hxx"
 #include "TMVA/ROperator_Softmax.hxx"
 #include "TMVA/ROperator_Concat.hxx"
+#include "TMVA/ROperator_Cast.hxx"
diff --git a/tmva/sofie/inc/TMVA/ROperator_Cast.hxx b/tmva/sofie/inc/TMVA/ROperator_Cast.hxx
@@ -0,0 +1,76 @@
+#ifndef TMVA_SOFIE_ROPERATOR_Cast
+#define TMVA_SOFIE_ROPERATOR_Cast
+
+#include "TMVA/SOFIE_common.hxx"
+#include "TMVA/ROperator.hxx"
+#include "TMVA/RModel.hxx"
+
+#include <sstream>
+
+namespace TMVA{
+namespace Experimental{
+namespace SOFIE{
+
+
+template <typename T>
+class ROperator_Cast final : public ROperator
+{
+
+private:
+
+   std::string fNX;
+   std::string fNY;
+   std::vector<size_t> fShape;
+   std::string fAttrType = "float";
+
+public:
+   ROperator_Cast(){}
+   ROperator_Cast(std::string attr_type,std::string nameX, std::string nameY):
+   fNX(UTILITY::Clean_name(nameX)), fNY(UTILITY::Clean_name(nameY)),
+   fAttrType(attr_type) {}
+
+   std::vector<ETensorType> TypeInference(std::vector<ETensorType> input){
+      return input;
+   }
+
+   std::vector<std::vector<size_t>> ShapeInference(std::vector<std::vector<size_t>> input){
+      auto ret = input; //suggest copy to compiler
+      return ret;
+   }
+
+   void Initialize(RModel& model){
+       //input must be a graph input, or already initialized intermediate tensor
+      if (model.CheckIfTensorAlreadyExist(fNX) == false){
+        throw std::runtime_error("TMVA SOFIE Cast Op Input Tensor is not found in model");
+      }
+      fShape = model.GetTensorShape(fNX);
+      model.AddIntermediateTensor(fNY, ConvertStringToType(fAttrType), fShape);
+   }
+
+
+   std::string Generate(std::string OpName){
+      OpName = "op_" + OpName;
+      if (fShape.empty()) {
+         throw std::runtime_error("TMVA SOFIE Cast called to Generate without being initialized first");
+      }
+      std::stringstream out;
+      size_t length = ConvertShapeToLength(fShape);
+
+      // out << SP << ETensorType << " " << OpName << "_attr = "  << fattr << ";\n";
+      out << "\n//------ CAST\n";
+      out << SP << "for (int id = 0; id < " << length << " ; id++){\n";
+
+      out << SP << SP << "tensor_" << fNY << "[id] = static_cast<"<< fAttrType << ">(tensor_" << fNX << "[id]);\n";
+
+      out << SP << "}\n";
+      return out.str();
+   }
+
+};
+
+}//SOFIE
+}//Experimental
+}//TMVA
+
+
+#endif //TMVA_SOFIE_ROPERATOR_Cast
diff --git a/tmva/sofie/src/RModel.cxx b/tmva/sofie/src/RModel.cxx
@@ -277,15 +277,19 @@ namespace SOFIE{
          }
       }
       for (auto&i: fIntermediateTensorInfos){
+         size_t length = ConvertShapeToLength(i.second.shape);
          if (i.second.type == ETensorType::FLOAT){
-            size_t length = 1;
-            for (auto & dim: i.second.shape){
-               length *= dim;
-            }
-            //fGC += "float tensor_" + i.first + "[" + std::to_string(length) + "];\n";
             fGC += "std::vector<float> fTensor_" + i.first  + " = std::vector<float>(" + std::to_string(length) + ");\n";
             fGC += "float * tensor_" + i.first + " = fTensor_" + i.first  + ".data();\n";
          }
+         if (i.second.type == ETensorType::DOUBLE){
+            fGC += "std::vector<double> fTensor_" + i.first  + " = std::vector<double>(" + std::to_string(length) + ");\n";
+            fGC += "double * tensor_" + i.first + " = fTensor_" + i.first  + ".data();\n";
+         }
+         if (i.second.type == ETensorType::INT64){
+            fGC += "std::vector<int64_t> fTensor_" + i.first  + " = std::vector<int64_t>(" + std::to_string(length) + ");\n";
+            fGC += "int64_t * tensor_" + i.first + " = fTensor_" + i.first  + ".data();\n";
+         }
       }
       if (fUseSession) {
          // add here specific operator code that needs to define session data members
@@ -310,14 +314,15 @@ namespace SOFIE{
       }
 
       size_t outputSize = fOutputTensorNames.size();
+      // assume output types are all the same
+      std::string outputType;
       if (outputSize == 1) {
          auto f = fIntermediateTensorInfos.find(fOutputTensorNames[0]);
          if (f == fIntermediateTensorInfos.end()){
             throw std::runtime_error("TMVA-SOFIE: output tensor " + fOutputTensorNames[0] + " not found when trying to get its info");
          }else{
-            if (f->second.type == ETensorType::FLOAT){
-               fGC += "std::vector<float> ";
-            }
+            outputType = ConvertTypeToString(f->second.type);
+            fGC += "std::vector<" + outputType + "> ";
          }
       } else {
          std::vector<ETensorType> outputTensorsTypes(outputSize);
@@ -330,45 +335,55 @@ namespace SOFIE{
                outputTensorsTypes[i] = f->second.type;
             }
          }
-         ETensorType outputType = outputTensorsTypes[0];
+         // assume all output types are the same
+         outputType = ConvertTypeToString(outputTensorsTypes[0]);
          for (size_t i = 0; i < outputSize; i++) {
-            if (outputTensorsTypes[i] != outputType) {
+            if (outputTensorsTypes[i] != outputTensorsTypes[0]) {
                throw std::runtime_error("TMVA-SOFIE: output tensor " + fOutputTensorNames[i] + " is of different type.");
             }
          }
-         if (outputType == ETensorType::FLOAT) {
-            fGC += "std::vector<std::vector<float>> ";
-         }
+         fGC += "std::vector<std::vector<" + outputType + ">> ";
       }
 
       fGC += "infer(";
       for (auto& i: fReadyInputTensorInfos){
          if (i.second.type == ETensorType::FLOAT){
-         fGC += "float* tensor_" + i.first + ",";
+            fGC += "float* tensor_" + i.first + ",";
+         }
+         else if (i.second.type == ETensorType::INT32 ){
+            fGC += "int32_t* tensor_" + i.first + ",";
+         }
+         else if (i.second.type == ETensorType::INT64){
+            fGC += "int64_t* tensor_" + i.first + ",";
+         }
+         else if(i.second.type == ETensorType::DOUBLE){
+            fGC += "double* tensor_" + i.first + ",";
          }
       }
       fGC.pop_back(); //remove last ","
       fGC += "){\n";
 
+      const std::string SP = "   ";
+
       for (size_t id = 0; id < fOperators.size() ; id++){
          fGC+= (fOperators[id]->Generate(std::to_string(id)));
       }
       if (outputSize == 1) {
          size_t outputLength = ConvertShapeToLength(GetTensorShape(fOutputTensorNames[0]));
 
-         fGC += "\tstd::vector<float> ret (tensor_" + fOutputTensorNames[0] + ", tensor_" + fOutputTensorNames[0] + " + " +
+         fGC += SP + "std::vector<" + outputType + "> ret (tensor_" + fOutputTensorNames[0] + ", tensor_" + fOutputTensorNames[0] + " + " +
                std::to_string(outputLength) + ");\n";
       } else {
          for (size_t i = 0; i < outputSize; i++) {
             if (!fOutputTensorNames[i].empty()) {
                size_t outputLength = ConvertShapeToLength(GetTensorShape(fOutputTensorNames[i]));
-               fGC += "\tstd::vector<float> ret_";
+               fGC += SP + "std::vector<" + outputType + "> ret_";
                fGC += std::to_string(i);
                fGC += " (tensor_" + fOutputTensorNames[i] + ", tensor_" + fOutputTensorNames[i] + " + " +
                std::to_string(outputLength) + ");\n";
             }
          }
-         fGC += "\tstd::vector<std::vector<float>> ret({";
+         fGC += SP + "std::vector<std::vector<" + outputType + ">> ret({";
          for (size_t i = 0; i < outputSize; i++) {
             if (fOutputTensorNames[i].empty()) {
                fGC += "{}";
@@ -382,7 +397,7 @@ namespace SOFIE{
          }
          fGC += "});\n";
       }
-      fGC += "\treturn ret;\n";
+      fGC += SP + "return ret;\n";
       fGC += "}\n";
       if (fUseSession) {
          fGC += "};\n";
@@ -394,7 +409,7 @@ namespace SOFIE{
    void RModel::ReadInitializedTensorsFromFile() {
       // generate the code to read initialized tensors from a text data file
       if (fInitializedTensors.empty()) return;
-      
+
       fGC += "   std::ifstream f;\n";
       fGC += "   f.open(filename);\n";
       fGC += "   if (!f.is_open()){\n";
diff --git a/tmva/sofie/src/SOFIE_common.cxx b/tmva/sofie/src/SOFIE_common.cxx
@@ -25,16 +25,43 @@ std::string ConvertTypeToString(ETensorType type){
       case ETensorType::FLOAT : {
          return "float";
       }
+      case ETensorType::INT16 : {
+         return "int16_t";
+      }
+      case ETensorType::INT32 : {
+         return "int32_t";
+      }
+      case ETensorType::INT64 : {
+         return "int64_t";
+      }
+      case ETensorType::UINT16 : {
+         return "uint16_t";
+      }
+      case ETensorType::UINT32 : {
+         return "uint32_t";
+      }
+      case ETensorType::UINT64 : {
+         return "uint64_t";
+      }
+      case ETensorType::DOUBLE : {
+         return "double";
+      }
       default:{
          return "other";
       }
    }
 }
 
 ETensorType ConvertStringToType(std::string type){
-   if(type == "float32" || type == "Float"){
+   if(type == "float32" || type == "float" || type == "Float"){
      return ETensorType::FLOAT;
    }
+   else if(type == "int64"){
+     return ETensorType::INT64;
+   }
+   else if (type == "double" || type == "float64"){
+      return ETensorType::DOUBLE;
+   }
    else{
       return ETensorType::UNDEFINED;
    }
diff --git a/tmva/sofie/test/TestCustomModelsFromONNX.cxx b/tmva/sofie/test/TestCustomModelsFromONNX.cxx
@@ -30,6 +30,9 @@
 #include "Neg_FromONNX.hxx"
 #include "input_models/references/Neg.ref.hxx"
 
+#include "Cast_FromONNX.hxx"
+#include "input_models/references/Cast.ref.hxx"
+
 #include "LinearWithLeakyRelu_FromONNX.hxx"
 #include "input_models/references/LinearWithLeakyRelu.ref.hxx"
 
@@ -308,7 +311,7 @@ TEST(ONNX, Neg)
         -1.9100,  1.8811, -1.7269, -0.1094, -0.0145,  0.2509,  0.5893, -2.2733,
         -0.7077,  1.0645, -0.8607,  0.2085
       });
-      
+
       TMVA_SOFIE_Neg::Session s("Neg_FromONNX.dat");
       std::vector<float> output = s.infer(input.data());
 
@@ -323,6 +326,30 @@ TEST(ONNX, Neg)
       }
    }
 
+TEST(ONNX, Cast)
+{
+   constexpr float TOLERANCE = DEFAULT_TOLERANCE;
+
+   // Preparing the standard  input
+   std::vector<int64_t> input({
+      1,2,3,4,5,6
+   });
+
+   TMVA_SOFIE_Cast::Session s("Cast_FromONNX.dat");
+
+   auto output = s.infer(input.data());
+
+   // Checking output size
+   EXPECT_EQ(output.size(), sizeof(Cast_ExpectedOutput::outputs) / sizeof(float));
+
+   float *correct = Cast_ExpectedOutput::outputs;
+
+   // Checking every output value, one by one
+   for (size_t i = 0; i < output.size(); ++i) {
+      EXPECT_LE(std::abs(output[i] - correct[i]), TOLERANCE);
+   }
+}
+
 TEST(ONNX, Linear64)
 {
    constexpr float TOLERANCE = DEFAULT_TOLERANCE;
@@ -589,12 +616,12 @@ TEST(ONNX, MaxPool1d){
            0.2283,  0.8947,  1.7627,
          -0.1657,  0.0649, -1.6066,  0.4162, -1.1525, -0.8184,  1.1324,
           -1.1086,  0.1061,  1.0071});
-   
+
    TMVA_SOFIE_MaxPool1d::Session s("MaxPool1d_FromONNX.dat");
    std::vector<float> output = s.infer(input.data());
    // Checking output size
    EXPECT_EQ(output.size(), sizeof(MaxPool1d_ExpectedOutput::output) / sizeof(float));
-   
+
    float *correct = MaxPool1d_ExpectedOutput::output;
 
    // Checking every output value, one by one
@@ -620,12 +647,12 @@ TEST(ONNX, MaxPool2d){
           -0.9398, -0.2065, -0.9499, -0.9739, -0.1288, -0.1375, -1.2612,
             0.8810,  0.8506,  0.4455
    });
-   
+
    TMVA_SOFIE_MaxPool2d::Session s("MaxPool2d_FromONNX.dat");
    std::vector<float> output = s.infer(input.data());
    // Checking output size
    EXPECT_EQ(output.size(), sizeof(MaxPool2d_ExpectedOutput::output) / sizeof(float));
-   
+
    float *correct = MaxPool2d_ExpectedOutput::output;
 
    // Checking every output value, one by one
@@ -652,12 +679,12 @@ TEST(ONNX, MaxPool3d){
            -0.5477,  0.2341,  0.9181,
             0.3842,  0.2428,  1.7924
    });
-   
+
    TMVA_SOFIE_MaxPool3d::Session s("MaxPool3d_FromONNX.dat");
    std::vector<float> output = s.infer(input.data());
    // Checking output size
    EXPECT_EQ(output.size(), sizeof(MaxPool3d_ExpectedOutput::output) / sizeof(float));
-   
+
    float *correct = MaxPool3d_ExpectedOutput::output;
 
    // Checking every output value, one by one
@@ -683,12 +710,12 @@ TEST(ONNX, AvgPool){
           -1.4971,  0.5386, -0.2922,  0.4860, -0.3973, -0.4624,  0.4514,
             0.2385,  0.3783, -1.0500
    });
-   
+
    TMVA_SOFIE_AvgPool::Session s("AvgPool_FromONNX.dat");
    std::vector<float> output = s.infer(input.data());
    // Checking output size
    EXPECT_EQ(output.size(), sizeof(AvgPool_ExpectedOutput::output) / sizeof(float));
-   
+
    float *correct = AvgPool_ExpectedOutput::output;
 
    // Checking every output value, one by one
diff --git a/tmva/sofie/test/input_models/Cast.onnx b/tmva/sofie/test/input_models/Cast.onnx
@@ -0,0 +1,12 @@
+pytorch1.11.0:s
+*
+onnx::Cast_01Cast_0"Cast*	
+to�torch-jit-exportZ
+onnx::Cast_0
+
+
+b
+1
+
+
+B	
diff --git a/tmva/sofie/test/input_models/references/Cast.ref.hxx b/tmva/sofie/test/input_models/references/Cast.ref.hxx
@@ -0,0 +1,6 @@
+namespace Cast_ExpectedOutput{
+	float outputs[] = {
+        1.0,2.0,3.0,
+        4.0,5.0,6.0
+	};
+} // namespace Cast_ExpectedOutput
diff --git a/tmva/sofie_parsers/inc/TMVA/RModelParser_ONNX.hxx b/tmva/sofie_parsers/inc/TMVA/RModelParser_ONNX.hxx
diff --git a/tmva/sofie_parsers/src/RModelParser_ONNX.cxx b/tmva/sofie_parsers/src/RModelParser_ONNX.cxx

-Original file line number
+Diff line change
@@ @@ -0,0 +1,12 @@ @@
 +pytorch1.11.0:s
 +*
 +onnx::Cast_01Cast_0"Cast*
 +to�torch-jit-exportZ
 +onnx::Cast_0
++
++
 +b
 +1
++
++
 +B