[tmva][sofie] Add Concat operator

lmoneta · lmoneta · commit 45f3e7b2cc26 · 2022-04-04T10:35:15.000+02:00
Use initial code from @sanjibansg and add implementation for axis = N-1. Still need to implement other axis cases. Add also function to compute tensor strides from tensor shape used in concat. Fix also some error messages in some other operators
diff --git a/tmva/sofie/inc/TMVA/ROperator_Concat.hxx b/tmva/sofie/inc/TMVA/ROperator_Concat.hxx
@@ -0,0 +1,144 @@
+#ifndef TMVA_SOFIE_ROPERATOR_Concat
+ #define TMVA_SOFIE_ROPERATOR_Concat
+
+
+ #include "TMVA/SOFIE_common.hxx"
+ #include "TMVA/ROperator.hxx"
+ #include "TMVA/RModel.hxx"
+
+ #include <sstream>
+ #include <algorithm>
+ #include <iterator>
+ #include <iomanip>
+ #include <limits>
+
+ namespace TMVA{
+ namespace Experimental{
+ namespace SOFIE{
+
+     template <typename T>
+     class ROperator_Concat final : public ROperator
+     {
+     private:
+         int fAxis;
+         std::vector<std::string> fInputs;
+         std::string fOutput;
+         std::vector<size_t>fOutputShape;
+         std::vector<std::vector<size_t>> fInputShapes;
+
+     public:
+         ROperator_Concat(){}
+         ROperator_Concat(std::vector<std::string> inputs, int axis, std::string output):
+         fAxis(axis), fOutput(UTILITY::Clean_name(output)) {
+            fInputs.reserve(inputs.size());
+            for (auto & name : inputs)
+               fInputs.push_back(UTILITY::Clean_name(name));
+         }
+
+         std::vector<ETensorType> TypeInference(std::vector<ETensorType> input){
+             return input;
+         }
+
+         // get shape of output given inputs. It is going to be called after initialized
+         std::vector<std::vector<size_t>> ShapeInference(std::vector<std::vector<size_t>> inputs){
+             std::vector<std::vector<size_t>> ret(1);
+            // treat negative axis case
+            if (fAxis<0) {
+               fAxis = inputs[0].size()+fAxis;
+            }
+            if (fAxis < 0 || fAxis >= (int) inputs[0].size())
+               throw std::runtime_error("TMVA SOFIE Concat Op - invalid axis value ");
+
+            int concat_dim=0;
+            for(size_t i = 0; i < inputs.size(); i++) {
+               if (i > 0 && inputs[i].size() != inputs[i-1].size() )
+                 throw std::runtime_error("TMVA SOFIE Concat Op - input tensors have different shapes " +
+                  ConvertShapeToString(inputs[i]) + " and " + ConvertShapeToString(inputs[i-1]));
+               for (size_t iaxis = 0; iaxis < inputs[i].size(); iaxis++) {
+                 if ((int) iaxis == fAxis)
+                  concat_dim += inputs[i][iaxis];
+                 else
+                    if (i> 0 && inputs[i][iaxis] != inputs[i-1][iaxis])
+                     throw std::runtime_error("TMVA SOFIE Concat Op - input tensors have wrong shapes " +
+                      ConvertShapeToString(inputs[i]) + " and " + ConvertShapeToString(inputs[i-1]));
+               }
+            }
+            // output shape
+            ret[0] = inputs[0];
+            ret[0][fAxis] = concat_dim;
+            return ret;
+         }
+
+         void Initialize(RModel &model)
+         {
+            for (auto &it : fInputs) {
+               if (model.CheckIfTensorAlreadyExist(it) == false) {
+                  throw std::runtime_error("TMVA SOFIE Concat Op Input Tensor " + it + " is not found in model");
+               }
+               fInputShapes.push_back(model.GetTensorShape(it));
+            }
+            fOutputShape = ShapeInference(fInputShapes)[0];
+            model.AddIntermediateTensor(fOutput, model.GetTensorType(fInputs[0]), fOutputShape);
+         }
+
+         std::string Generate(std::string OpName){
+             OpName = "op_"+OpName;
+             const std::string SP = "   ";
+            if(fOutputShape.empty()){
+                  throw std::runtime_error("TMVA SOFIE Concat called to Generate without being initialized first");
+            }
+            std::stringstream out;
+            out<<"\n//--------- Concat\n";
+            // special case for 0 axis that memory is contigous
+            if (fAxis == 0) {
+               for(auto &it : fInputs){
+                  out<<SP<<"tensor_" << fOutput<<".insert("<<"tensor_" << fOutput<<".end(),tensor_" << it<<".begin(),tensor_"<<it<<".end());\n";
+               }
+            }
+            else {
+
+               std::vector<size_t> outStride = UTILITY::ComputeStrideFromShape(fOutputShape);
+               std::vector<std::vector<size_t>> inStrides(fInputs.size());
+               int i = 0;
+               for ( auto &s : inStrides) {
+                  s = UTILITY::ComputeStrideFromShape(fInputShapes[i]);
+                  i++;
+               }
+               for (int i = 0; i < fAxis; ++i) {
+                  // loop on dimensions
+                  out << SP << "for (size_t i" << i << " = 0; i" << i << " < " << fOutputShape[i] << "; ++i" << i <<") {\n";
+               }
+
+               out << SP << SP << SP << "int idxOut =";
+               for (int k = 0; k < fAxis; k++)
+                  out << " + " << outStride[k] << "*i" << k;
+               out << ";\n";
+
+               for (size_t j = 0; j < fInputs.size(); j++) {
+                  if (j>0)
+                  out << SP << SP << SP << "idxOut += " << fInputShapes[j-1][fAxis] << ";\n";
+                  out << SP << SP << SP << "int idxIn" << j <<" =";
+                  for (int k = 0; k < fAxis; k++)
+                     out << " + " << inStrides[j][k] << "*i" << k;
+                  out << ";\n";
+                  out << SP << SP << SP << "for (size_t iC = 0; iC < " << fInputShapes[j][fAxis] << "; ++iC) {\n";
+                  out << SP << SP << SP << SP << "tensor_" << fOutput << "[idxOut+iC] = tensor_" << fInputs[j] << "[idxIn" << j << "+iC];\n";
+                  out << SP << SP << SP << "}\n";
+               // concatenate the axis values
+               }
+                for (int i = 0; i < fAxis; ++i) {
+                    out << SP << "}\n";
+                }
+            }
+
+
+             return out.str();
+         }
+
+
+     };
+ }//SOFIE
+ }//Experimental
+ }//TMVA
+
+ #endif //TMVA_SOFIE_ROPERATOR_CONCAT
diff --git a/tmva/sofie/inc/TMVA/ROperator_Relu.hxx b/tmva/sofie/inc/TMVA/ROperator_Relu.hxx
@@ -37,7 +37,7 @@ public:
 
    void Initialize(RModel& model){
       if (model.CheckIfTensorAlreadyExist(fNX) == false){   //input must be a graph input, or already initialized intermediate tensor
-         throw std::runtime_error("TMVA SOFIE Relu Op Input Tensor is not found in model");
+         throw std::runtime_error("TMVA SOFIE Relu Op Input Tensor " + fNX + " is not found in model");
       }
       fShape = model.GetTensorShape(fNX);
       model.AddIntermediateTensor(fNY, model.GetTensorType(fNX), fShape);
diff --git a/tmva/sofie/inc/TMVA/ROperator_Reshape.hxx b/tmva/sofie/inc/TMVA/ROperator_Reshape.hxx
@@ -21,7 +21,7 @@ private:
 
    ReshapeOpMode fOpMode = Reshape;   // type of Reshape operator
 
-   int fAllowZero = 0; // (for Reshape) zero in tensor shape makes output shape equal to input tensor shape 
+   int fAllowZero = 0; // (for Reshape) zero in tensor shape makes output shape equal to input tensor shape
    int fAxis = 1;      // (for Flatten)
 
    std::string fNData;        // input data tensor name
@@ -41,7 +41,7 @@ public:
       if (opMode == Reshape) fAllowZero = attr_value;
       if (opMode == Flatten) fAxis = attr_value;
    }
-   
+
    // for squeeze/unsqueezed operators following old ONNX version (< 10)
    // IN this cases axes are passed as attribute values
    ROperator_Reshape(ReshapeOpMode opMode, std::vector<int64_t> attrAxes, std::string nameData, std::string nameOutput)
@@ -51,7 +51,7 @@ public:
       assert(fOpMode == Squeeze || fOpMode == Unsqueeze);
    }
 
-   // output type is same as input 
+   // output type is same as input
    std::vector<ETensorType> TypeInference(std::vector<ETensorType> input){
       auto ret = std::vector<ETensorType>(1, input[0]);
       return ret;
@@ -60,7 +60,7 @@ public:
    // output shape
    std::vector<std::vector<size_t>> ShapeInference(std::vector<std::vector<size_t>> input){
       std::vector<std::vector<size_t>> ret;
-      auto & input_shape = input[0]; 
+      auto & input_shape = input[0];
 
       if (fOpMode == Reshape) {
          if (input.size() != 2) throw std::runtime_error("TMVA SOFIE Reshape Op needs 2 input tensors");
@@ -96,7 +96,7 @@ public:
          ret.push_back(newShape);
 
       } else if (fOpMode == Squeeze) {
-         // squeeze 
+         // squeeze
          // assume no axis is provided - remove all axes with value equal to 1
          auto output_shape = input[0];
          if (input.size() == 1) {
@@ -105,7 +105,7 @@ public:
                   output_shape.erase(output_shape.begin() + i);
                }
             }
-         } else if (input.size() == 2) { 
+         } else if (input.size() == 2) {
             auto & axes = input[1];
             for (size_t i = 0; i < axes.size(); i++){
                if (output_shape[axes[i]] != 1)
@@ -125,7 +125,7 @@ public:
          if (axes[0] > 0) { // positive axis start from beginning
             for (auto & i : axes)
                output_shape.insert(output_shape.begin() + i, 1);
-         } else { 
+         } else {
             //negative axes
             for (auto &i : axes) {
                assert(i < 0);
@@ -140,9 +140,9 @@ public:
    void Initialize(RModel &model)
    {
 
-      if (model.CheckIfTensorAlreadyExist(fNData) == false) { 
+      if (model.CheckIfTensorAlreadyExist(fNData) == false) {
           // input must be a graph input, or already initialized intermediate tensor
-         throw std::runtime_error("TMVA Reshape Op Input Tensor is not found in model");
+         throw std::runtime_error("TMVA Reshape Op Input Tensor " + fNData + "  is not found in model");
       }
       fShapeInput = model.GetTensorShape(fNData);
 
@@ -159,7 +159,7 @@ public:
             std::copy(input_shape, input_shape + n, descShape.begin());
             fShapeOutput = ShapeInference({fShapeInput, descShape})[0];
          } else {
-            throw std::runtime_error("TMVA Reshape Op Input Tensor is not found in model");
+            throw std::runtime_error("TMVA Reshape Op Shape Tensor " + fNShape + " is not found in model");
          }
       } else if (!fAttrAxes.empty()) {
          // case fNShape is empty and axes are provided as attributes
diff --git a/tmva/sofie/inc/TMVA/SOFIE_common.hxx b/tmva/sofie/inc/TMVA/SOFIE_common.hxx
@@ -107,6 +107,8 @@ template<typename T>
 T* Unidirectional_broadcast(const T* original_data, const std::vector<size_t> original_shape, const std::vector<size_t> target_shape);
 std::string Clean_name(std::string input_tensor_name);
 
+/// compute stride of a tensor given its shape (assume layout is row-major)
+std::vector<size_t> ComputeStrideFromShape(const std::vector<size_t> & shape);
 
 /// function to check if a >> 0 and a < MAX using a single comparison
 //// use trick casting to unsigned values so it becomes a single comparison
@@ -115,23 +117,23 @@ inline bool is_a_ge_zero_and_a_lt_b(int a, int b) {
 }
 
 
-/// im2col : efficient function to re-arrange input data of convolution to a matrix 
+/// im2col : efficient function to re-arrange input data of convolution to a matrix
 /// that can be used by BLAS
 /// Use trick to loop on each element of filtered region first and follow input data layout
 /// By doing this reads and writes are of consecutive data in memory and one gains in efficiency
-/// The resulting matrix will be already transposed and can be used directly in BLAS 
+/// The resulting matrix will be already transposed and can be used directly in BLAS
 /// since output will be a matrix : (channels*kernel_h*kernel_w , output_h*output_w)
-/// Example: with an input matrix 
-///    a1 a2 a3 
-///    b1 b2 b3    and a 2x2 kernel    (k1,k2,k3,k4) and padding 1 : 
-///    c1 c2 c3  
-///     outpout will be a matrix (4 x 16)                  
-///  the routine will follow output order : 
+/// Example: with an input matrix
+///    a1 a2 a3
+///    b1 b2 b3    and a 2x2 kernel    (k1,k2,k3,k4) and padding 1 :
+///    c1 c2 c3
+///     outpout will be a matrix (4 x 16)
+///  the routine will follow output order :
 //     first all elements which will be operated by k1 then k2 then k3
 ///  -> ( 0  0  0  0  0  a1 a2 a3 0  b1 b2 b3  0 c1 c2 c3  )    all elements for k1
-///     ( 0  0  0  0  a1 a2 a3  0 b1 b2 b3  0 c1 c2 c3  0  )     for k2 
-///     ( 0  a1 a2 a3 0  b1 b2 b3 0  c1 c2 c3  0  0  0  0  )     for k3 
-///     ( a1 a2 a3 0  b1 b2 b3  0 c1 c2 c3  0  0  0  0  0  )     for k4     
+///     ( 0  0  0  0  a1 a2 a3  0 b1 b2 b3  0 c1 c2 c3  0  )     for k2
+///     ( 0  a1 a2 a3 0  b1 b2 b3 0  c1 c2 c3  0  0  0  0  )     for k3
+///     ( a1 a2 a3 0  b1 b2 b3  0 c1 c2 c3  0  0  0  0  0  )     for k4
 ///
 
 template <typename T>
@@ -171,11 +173,11 @@ void Im2col(const T *data_im, const int channels, const int height, const int wi
 
 /// 3d implementation
 template <typename T>
-void Im2col_3d(const T *data_im, const int channels, 
-            const int depth, const int height, const int width, 
-            const int kernel_d, const int kernel_h, const int kernel_w, 
-            const int pad_d, const int pad_h, const int pad_w, 
-            const int stride_d, const int stride_h, const int stride_w, 
+void Im2col_3d(const T *data_im, const int channels,
+            const int depth, const int height, const int width,
+            const int kernel_d, const int kernel_h, const int kernel_w,
+            const int pad_d, const int pad_h, const int pad_w,
+            const int stride_d, const int stride_h, const int stride_w,
             const int dilation_d, const int dilation_h,  const int dilation_w, T *data_col)
 {
    const int output_h = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
@@ -201,7 +203,7 @@ void Im2col_3d(const T *data_im, const int channels,
                         if (!is_a_ge_zero_and_a_lt_b(input_row, height)) {
                            for (int output_cols = output_w; output_cols; output_cols--) {
                               *(data_col++) = 0;
-                           }  
+                           }
                         } else {
                            int input_col = -pad_w + kernel_col * dilation_w;
                            for (int output_col = output_w; output_col; output_col--) {
diff --git a/tmva/sofie/src/SOFIE_common.cxx b/tmva/sofie/src/SOFIE_common.cxx
@@ -44,7 +44,7 @@ std::string ConvertShapeToString(std::vector<size_t> shape) {
    std::stringstream out;
    out << "{ ";
    for (size_t i = 0; i < shape.size(); i++) {
-      out << shape[i];  
+      out << shape[i];
       if (i < shape.size()-1) out << " , ";
    }
    out << " }";
@@ -86,7 +86,7 @@ T* UTILITY::Unidirectional_broadcast(const T* original_data, const std::vector<s
          throw std::runtime_error(
             "TMVA::SOFIE Error in Broadcasting Tensor : original array has more dimensions than target shape," + originalShape + ", " + targetShape);
       }
-      // if shape's sizes are different prepend 1 to get tensor with same shape size 
+      // if shape's sizes are different prepend 1 to get tensor with same shape size
       // since the broadcast is unidirectional we can only prepend
       std::vector<size_t> current_shape(original_shape);
       auto it = current_shape.begin();
@@ -95,7 +95,7 @@ T* UTILITY::Unidirectional_broadcast(const T* original_data, const std::vector<s
       }
       // this code below will work
       // when shape are not equal e.g. (3,4,5,6) and (3) and we add 1 in all missing positions
-      // since broadcasting is uni-directional we do not use it 
+      // since broadcasting is uni-directional we do not use it
       // std::vector<size_t> current_shape(target_shape.size(),1);
       // for (size_t i = 0; i < original_shape.size(); i++) {
       //    for (size_t j = 0; j < target_shape.size(); j++) {
@@ -143,6 +143,17 @@ std::string UTILITY::Clean_name(std::string input_tensor_name){
 
 template float* UTILITY::Unidirectional_broadcast(const float* original_data, const std::vector<size_t> original_shape, const std::vector<size_t> target_shape);
 
+std::vector<size_t> UTILITY::ComputeStrideFromShape(const std::vector<size_t> & shape) {
+   // assume row major layout
+   const auto size = shape.size();
+   std::vector<size_t> strides(size,1);
+   for (std::size_t i = 1; i < size; i++) {
+      strides[size - 1 - i] = strides[size - 1 - i + 1] * shape[size - 1 - i + 1];
+   }
+   return strides;
+}
+
+
 }//SOFIE
 }//Experimental
 }//TMVA

Original file line number	Diff line number	Diff line change
`@@ -37,7 +37,7 @@ public:`
`37`	`37`
`38`	`38`	`void Initialize(RModel& model){`
`39`	`39`	`if (model.CheckIfTensorAlreadyExist(fNX) == false){ //input must be a graph input, or already initialized intermediate tensor`
`40`		`- throw std::runtime_error("TMVA SOFIE Relu Op Input Tensor is not found in model");`
	`40`	`+ throw std::runtime_error("TMVA SOFIE Relu Op Input Tensor " + fNX + " is not found in model");`
`41`	`41`	`}`
`42`	`42`	`fShape = model.GetTensorShape(fNX);`
`43`	`43`	`model.AddIntermediateTensor(fNY, model.GetTensorType(fNX), fShape);`