fix: avoid allocating memory again to tensors of fused ops

sanjibansg · sanjibansg · commit 291154b6704b · 2025-06-07T21:49:15.000+02:00
diff --git a/tmva/sofie/inc/TMVA/RModel.hxx b/tmva/sofie/inc/TMVA/RModel.hxx
@@ -147,6 +147,10 @@ public:
 
    void SetOptimizationLevel(const OptimizationLevel &optim_level) { fOptimizationLevel = optim_level; }
 
+   void RemoveIntermediateTensor(const std::string& tensor_name){
+      fIntermediateTensorInfos.erase(tensor_name);
+   }
+   
 protected:
    // internal functions
    // generate code for the initialized tensors
diff --git a/tmva/sofie/inc/TMVA/ROperator.hxx b/tmva/sofie/inc/TMVA/ROperator.hxx
@@ -60,7 +60,7 @@ public:
    virtual std::string GenerateSessionMembersCode(std::string /*opName*/) { return ""; }
    virtual std::string Header() { return "";}
    virtual std::string GetFusableOutputTensorName() { return "";}
-   virtual void UpdateFusableTensorName(std::string){ return;};
+   virtual void UpdateFusableTensorName(std::string, const std::function<void(const std::string&)>& removal_func){ return;};
 
 
    //virtual void Forward_reference() = 0;
diff --git a/tmva/sofie/inc/TMVA/ROperator_BatchNormalization.hxx b/tmva/sofie/inc/TMVA/ROperator_BatchNormalization.hxx
@@ -235,18 +235,15 @@ public:
        return fNY;
    }
    
-   void UpdateFusableTensorName(std::string fusable_tensor_name){
-            fNX = UTILITY::Clean_name(fusable_tensor_name);
-            fNY = UTILITY::Clean_name(fusable_tensor_name);
-         fInputTensorNames = { fNX, fNScale };
-         if (!fNB.empty()){
-            fInputTensorNames.emplace_back(fNB);
-         }
-
-         fOutputTensorNames = { fNY };
-                           std::cout<<"\ncalled from gemm";
+   void UpdateFusableTensorName(std::string fusable_tensor_name, const std::function<void(const std::string&)>& removal_func){
+      removal_func(fNX);
+      removal_func(fNY);      
+      fNX = fusable_tensor_name;
+      fNY = fusable_tensor_name;
+      fInputTensorNames[0] = fNX;
+      fOutputTensorNames[0] = fNY;
+   }
 
-      }
 };
 
 }//SOFIE
diff --git a/tmva/sofie/inc/TMVA/ROperator_Conv.hxx b/tmva/sofie/inc/TMVA/ROperator_Conv.hxx
@@ -530,15 +530,11 @@ public:
          std::string GetFusableOutputTensorName() override {
          return fNY;
       }
-   void UpdateFusableTensorName(std::string fusable_tensor_name) override {
-      std::cout<<"\ncalled from conv";   
+   void UpdateFusableTensorName(std::string fusable_tensor_name, const std::function<void(const std::string&)>& removal_func) override {
+      removal_func(fNY);
       fNY = fusable_tensor_name;
-                  fOutputTensorNames = { fNY };
-                        convK = fNX +"_f";
-      imcol = fNX +"_xcol";
-      fOutputTensorNames.emplace_back(convK);
-      fOutputTensorNames.emplace_back(imcol);
-      }
+      fOutputTensorNames[0] = fNY;
+   }
 };
 
 } // namespace SOFIE
diff --git a/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx b/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx
@@ -390,11 +390,12 @@ namespace SOFIE{
          return fNY;
       }
          
-      void UpdateFusableTensorName(std::string fusable_tensor_name){
-         fNY = UTILITY::Clean_name(fusable_tensor_name);
-                  fOutputTensorNames = { fNY };
-                  std::cout<<"\ncalled from gemm";
+      void UpdateFusableTensorName(std::string fusable_tensor_name, const std::function<void(const std::string&)>& removal_func){
+         removal_func(fNY);
+         fNY = fusable_tensor_name;
+         fOutputTensorNames[0] = fNY;
       }
+      
    };
 
 
diff --git a/tmva/sofie/inc/TMVA/ROperator_LayerNormalization.hxx b/tmva/sofie/inc/TMVA/ROperator_LayerNormalization.hxx
@@ -342,22 +342,15 @@ public:
        return fNY;
    }
    
-   void UpdateFusableTensorName(std::string fusable_tensor_name){
-            fNX = UTILITY::Clean_name(fusable_tensor_name);
-            fNY = UTILITY::Clean_name(fusable_tensor_name);
-         fInputTensorNames = { fNX, fNScale };
-         if (!fNB.empty()){
-            fInputTensorNames.emplace_back(fNB);
-         }
-
-         fOutputTensorNames = { fNY };
-         if (!fNMean.empty()){
-            fOutputTensorNames.emplace_back(fNMean);
-         }
-         if (!fNInvStdDev.empty()){
-            fOutputTensorNames.emplace_back(fNInvStdDev);
-         }
+   void UpdateFusableTensorName(std::string fusable_tensor_name, const std::function<void(const std::string&)>& removal_func){
+      removal_func(fNX);
+      removal_func(fNY);
+      fNX = fusable_tensor_name;
+      fNY = fusable_tensor_name;
+      fInputTensorNames[0] = fNX;
+      fOutputTensorNames[0] = fNY;
    }
+
 };
 
 } // namespace SOFIE
diff --git a/tmva/sofie/inc/TMVA/ROperator_Relu.hxx b/tmva/sofie/inc/TMVA/ROperator_Relu.hxx
@@ -72,11 +72,13 @@ public:
          return fNY;
    }
    
-   void UpdateFusableTensorName(std::string fusable_tensor_name){
-         fNX = fusable_tensor_name;
-         fNY = fusable_tensor_name;
-        fInputTensorNames = { fNX };
-         fOutputTensorNames = { fNY };
+   void UpdateFusableTensorName(std::string fusable_tensor_name, const std::function<void(const std::string&)>& removal_func){
+      removal_func(fNX);
+      removal_func(fNY);
+      fNX = fusable_tensor_name;
+      fNY = fusable_tensor_name;
+      fInputTensorNames[0] =  fNX;
+      fOutputTensorNames[0] = fNY;
    }
 
 };
diff --git a/tmva/sofie/src/RModel.cxx b/tmva/sofie/src/RModel.cxx
@@ -387,7 +387,6 @@ void RModel::CheckAndFuseOperators() {
    std::vector<size_t> fusable_indices;
    std::string fusable_propagate_tensor_name;
    while (idx < fOperators.size()) {
-      std::cout<<"\nop currently: "<<toString(fOperators[idx]->GetOpKind());
       if (fOperators[idx]->GetOpKind() != OperatorKind::GEMM && fOperators[idx]->GetOpKind() != OperatorKind::CONV) {
           ++idx;
           continue;
@@ -400,13 +399,11 @@ void RModel::CheckAndFuseOperators() {
       size_t j = idx + 1;
       for (; j < fOperators.size()-1; ++j) {
           auto opKind = fOperators[j]->GetOpKind();
-         std::cout<<"\nchecking for fusion: "<<toString(opKind);
           // Only consider operators with fusable kinds
           if (!FusableKinds.count(opKind)) {
             // std::cout<<"\n op not fusable: "<<toString(opKind);  
             break;
           }
-          std::cout<<"\nmight be fusable: "<<toString(opKind);
    
           const auto& tensorName = fOperators[j]->GetFusableOutputTensorName();
           auto freqIt = fIntermediateTensorFrequencyLookup.find(tensorName);
@@ -421,21 +418,21 @@ void RModel::CheckAndFuseOperators() {
               break;
           }
       }
-      // std::cout<<"\nstart fusing: "<<fusable_propagate_tensor_name;
       if (!fusable_propagate_tensor_name.empty()) {
-         //  std::cout << "\nOperators to be fused with: " << fusable_propagate_tensor_name;
+         auto fusable_tensor_type = GetTensorType(fusable_propagate_tensor_name);
+         auto fusable_tensor_shape = GetDynamicTensorShape(fusable_propagate_tensor_name);
           for (auto& index : fusable_indices) {
-            std::cout<<"\nfusing op "<<toString(fOperators[index]->GetOpKind())<<" , with: "<<fusable_propagate_tensor_name;
-            fOperators[index]->UpdateFusableTensorName(fusable_propagate_tensor_name);
+            fOperators[index]->UpdateFusableTensorName(fusable_propagate_tensor_name, [this](const std::string& name) {
+               this->RemoveIntermediateTensor(name);
+           });
           }
+          AddIntermediateTensor(fusable_propagate_tensor_name, fusable_tensor_type, fusable_tensor_shape);
       }
    
       idx = std::max(idx + 1, j);
    }
 }
 
-
-
 void RModel::Initialize(int batchSize, bool verbose) {
    std::map<std::string, size_t> inputParams;
    if (batchSize > 0) {

Original file line number	Diff line number	Diff line change
`@@ -390,11 +390,12 @@ namespace SOFIE{`
`390`	`390`	`return fNY;`
`391`	`391`	`}`
`392`	`392`
`393`		`- void UpdateFusableTensorName(std::string fusable_tensor_name){`
`394`		`- fNY = UTILITY::Clean_name(fusable_tensor_name);`
`395`		`- fOutputTensorNames = { fNY };`
`396`		`- std::cout<<"\ncalled from gemm";`
	`393`	`+ void UpdateFusableTensorName(std::string fusable_tensor_name, const std::function<void(const std::string&)>& removal_func){`
	`394`	`+ removal_func(fNY);`
	`395`	`+ fNY = fusable_tensor_name;`
	`396`	`+ fOutputTensorNames[0] = fNY;`
`397`	`397`	`}`
	`398`	`+`
`398`	`399`	`};`
`399`	`400`
`400`	`401`