[tmva][sofie] Fix an issue in the Transpose operator when permuting >= 3 axes

lmoneta · lmoneta · commit ce493948a03e · 2022-04-07T15:19:50.000+02:00
The input attribute was not used in the correct way, but in the inverse mode.
This is not a problem when just transposing two axes, but it is not correct for &gt;=3 axes.

Update also the generated code by using contigous writes that shoould be faster and add more documentation

Add also a new utility function to compute strides from shape of a vector
diff --git a/tmva/sofie/inc/TMVA/ROperator_Transpose.hxx b/tmva/sofie/inc/TMVA/ROperator_Transpose.hxx
@@ -44,9 +44,12 @@ public:
    std::vector<std::vector<size_t>> ShapeInference(std::vector<std::vector<size_t>> input){
       if (input.size() > 1) throw std::runtime_error("TMVA SOFIE Tranpose Op Shape Inference only need 1 input tensor");
       auto& data = input[0];
+      if (fAttrPerm.size() != data.size() )
+         throw std::runtime_error("TMVA SOFIE Tranpose Op - Invalid axes attributes");
+
       std::vector<size_t> output_shape(fAttrPerm.size());
       for (size_t i = 0; i < fAttrPerm.size(); i++){
-         output_shape[fAttrPerm[i]] = data[i];
+         output_shape[i] = data[fAttrPerm[i]];
       }
       std::vector<std::vector<size_t>> ret;
       ret.push_back(output_shape);
@@ -60,18 +63,14 @@ public:
       }
       fShapeData = model.GetTensorShape(fNData);
       if (fAttrPerm.empty()){
+         fAttrPerm.reserve(fShapeData.size());
          for (int i = fShapeData.size() - 1; i >= 0; i--){
             fAttrPerm.push_back(i);
          }
       }
-
-      std::vector<size_t> output_shape(fAttrPerm.size());
-      for (size_t i = 0; i < fAttrPerm.size(); i++){
-         output_shape[fAttrPerm[i]] = fShapeData[i];
-      }
-
-      model.AddIntermediateTensor(fNOutput, model.GetTensorType(fNData), output_shape);
-      fShapeOutput = output_shape;
+      std::vector<std::vector<size_t>> inputs = { fShapeData };
+      fShapeOutput = ShapeInference(inputs).front();
+      model.AddIntermediateTensor(fNOutput, model.GetTensorType(fNData), fShapeOutput);
    }
 
    std::string Generate(std::string OpName){
@@ -80,32 +79,48 @@ public:
          throw std::runtime_error("TMVA SOFIE Transpose Op called to Generate without being initialized first");
       }
       int dim = fShapeData.size();
-      int length=1;
-      std::vector<int> sizeofindex(dim);
-      for (int i = dim - 1; i>=0; i--){
-         sizeofindex[i] = length;
-         length *= fShapeData[i];
-      }
-      std::vector<int> index_goto(dim);
-      for (int i = 0; i < dim; i++){
-         index_goto[fAttrPerm[i]] = i;
-      }
-      std::vector<int> new_sizeofindex(dim);
-      int t = 1;
-      for (int i = dim - 1; i>=0; i--){
-         new_sizeofindex[i] = t;
-         t *= fShapeOutput[i];
-      }
+      auto inStrides = UTILITY::ComputeStrideFromShape(fShapeData);
+      auto outStrides = UTILITY::ComputeStrideFromShape(fShapeOutput);
+      size_t length = inStrides[0]*fShapeData[0];  // total tensor size
+      assert (length == outStrides[0]*fShapeOutput[0]);
 
       std::stringstream out;
+      // Implement transpose operator using consecutive read inputs.
+      // But
+      // tensorOut[id] = tensorInput[ inStrides[0]*i0 + inStrides[1]*i1 + inStrides[2]*i2 + ...]
+      // now if (j0,j1,j2) are the output indices
+      // j0 =  id / outStrides[0]
+      // j1 =  (id % outStrides[0])/outStrides[1]
+      // j2 =  (id % outStrides[1])/outStrides[2]
+      //......
+      // and we have j_k = i_fAttrPerm[k]
+      // since we are using consecutive writes we should find the inverse of fAttrPerm
       out << SP << "///------- Transpose operator\n" << std::endl;
-      out << SP << "for (int id = 0; id < " << length << " ; id++){\n";
-      out << SP << SP << "tensor_" << fNOutput << "[";
-      for (int i =0; i < dim; i++){
-         out << "id / " << sizeofindex[i] << " % " << fShapeData[i] << " * " << new_sizeofindex[index_goto[i]];
-         if (i != dim - 1) out << " + ";
+      out << SP << "for (size_t id = 0; id < " << length << " ; id++){\n";
+      out << SP << SP << "tensor_" << fNOutput << "[id] = tensor_" << fNData << "[ ";
+      // compute output j indices
+      std::vector<std::string> i_out(dim);
+      for (int k =0; k < dim; k++){
+         if (k == 0)
+            i_out[k] = "id";
+         else
+            i_out[k] = "(id % " + std::to_string(outStrides[k-1]) + ")";
+         if (k < dim-1)
+            i_out[k] += " / " + std::to_string(outStrides[k]);
+      }
+      // use now them for input tensors
+      // need to invert the fAttrPerm[k]
+      for (int k =0; k < dim; k++){
+         // find value in fAtrrPerm corresponding to k
+         int l = std::find(fAttrPerm.begin(), fAttrPerm.end(), k) - fAttrPerm.begin();
+         assert(l > 0 && l < dim);
+         out << "( " << i_out[l] << " )";
+         if (k < dim-1) {
+            out << " * " << inStrides[k];
+            out << " + ";
+         }
       }
-      out << "] = " << "tensor_" << fNData << "[id];\n";
+      out << "];\n";
       out << SP << "}\n";
       return out.str();
    }