From e3133c6308f8587873d9098915c813c0a8787050 Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Sun, 4 May 2025 03:48:00 +0200 Subject: [PATCH 1/7] feat: functionality for generalized operator fusion in sofie --- tmva/sofie/inc/TMVA/RModel.hxx | 1 + tmva/sofie/inc/TMVA/ROperator.hxx | 19 +++++++++- tmva/sofie/inc/TMVA/ROperator_Gemm.hxx | 10 +++++- .../inc/TMVA/ROperator_LayerNormalization.hxx | 12 ++++++- tmva/sofie/inc/TMVA/ROperator_Relu.hxx | 11 ++++++ tmva/sofie/src/RModel.cxx | 36 ++++++++++++++++++- 6 files changed, 85 insertions(+), 4 deletions(-) diff --git a/tmva/sofie/inc/TMVA/RModel.hxx b/tmva/sofie/inc/TMVA/RModel.hxx index 80dcc9a9c45d5..67b0c1a5e700a 100644 --- a/tmva/sofie/inc/TMVA/RModel.hxx +++ b/tmva/sofie/inc/TMVA/RModel.hxx @@ -180,6 +180,7 @@ protected: void GenerateIntermediateMemoryPool(); // Generate all session code void GenerateSessionCode(); + void CheckAndFuseOperators(); public: const std::vector & GetInputTensorNames() const { return fInputTensorNames; } diff --git a/tmva/sofie/inc/TMVA/ROperator.hxx b/tmva/sofie/inc/TMVA/ROperator.hxx index f0afd9c4374c1..05e78eb4a6580 100644 --- a/tmva/sofie/inc/TMVA/ROperator.hxx +++ b/tmva/sofie/inc/TMVA/ROperator.hxx @@ -2,6 +2,7 @@ #define TMVA_SOFIE_ROPERATOR #include +#include #include #include "TMVA/SOFIE_common.hxx" @@ -15,6 +16,15 @@ namespace SOFIE{ class RModel; +enum class OperatorKind { + GEMM = 0, + LAYERNORM = 1, + RELU = 2, + UNDEFINED = 3 +}; + +inline std::set FusableKinds = { OperatorKind::RELU, OperatorKind::LAYERNORM }; + class ROperator{ @@ -32,13 +42,16 @@ public: // generate session data members specific to operator virtual std::string GenerateSessionMembersCode(std::string /*opName*/) { return ""; } virtual std::string Header() { return "";} + virtual std::string GetFusableOutputTensorName() { return "";} + virtual void UpdateFusableTensorName(std::string){ return;}; + //virtual void Forward_reference() = 0; //virtual void Forward_blas() = 0; virtual ~ROperator(){} protected: - + OperatorKind fKind = OperatorKind::UNDEFINED; const std::string SP = " "; ///< space used to correctly indent the generated C++ code bool fUseSession = false; ///< flag to identify if using the session class bool fIsOutputConstant = false; ///< flag to identify if operator has a constant output (no need to generate code) @@ -56,6 +69,10 @@ public: return fOutputTensorNames; } + OperatorKind GetOpKind(){ + return fKind; + } + }; diff --git a/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx b/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx index d954720396151..c3259e8864e01 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx @@ -50,6 +50,7 @@ namespace SOFIE{ fAttrAlpha(alpha), fAttrBeta(beta), fAttrTransA(transA), fAttrTransB(transB), fNA(UTILITY::Clean_name(nameA)), fNB(UTILITY::Clean_name(nameB)), fNY(UTILITY::Clean_name(nameY)) { + fKind = OperatorKind::GEMM; fActivation = activation; fType = "float"; static_assert(std::is_same_v, @@ -62,6 +63,7 @@ namespace SOFIE{ fAttrAlpha(alpha), fAttrBeta(beta), fAttrTransA(transA), fAttrTransB(transB), fNA(UTILITY::Clean_name(nameA)), fNB(UTILITY::Clean_name(nameB)), fNC(UTILITY::Clean_name(nameC)), fNY(UTILITY::Clean_name(nameY)), fActivation(activation) { + fKind = OperatorKind::GEMM; fActivation = activation; fType = "float"; @@ -402,7 +404,13 @@ namespace SOFIE{ } std::vector GetBlasRoutines() override { return { std::string("Gemm"), std::string("Gemv") }; } - + std::string GetFusableOutputTensorName() override { + return fNY; + } + + void UpdateFusableTensorName(std::string fusable_tensor_name){ + fNY = UTILITY::Clean_name(fusable_tensor_name); + } }; diff --git a/tmva/sofie/inc/TMVA/ROperator_LayerNormalization.hxx b/tmva/sofie/inc/TMVA/ROperator_LayerNormalization.hxx index 239c5332172b0..2c432e0a028d7 100644 --- a/tmva/sofie/inc/TMVA/ROperator_LayerNormalization.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_LayerNormalization.hxx @@ -58,7 +58,8 @@ public: : fAttrAxis(axis), fAttrEpsilon(epsilon), fAttrStashType(stashType), fNX(UTILITY::Clean_name(nameX)), fNScale(UTILITY::Clean_name(nameScale)), fNB(UTILITY::Clean_name(nameB)), fNY(UTILITY::Clean_name(nameY)), fNMean(UTILITY::Clean_name(nameMean)), fNInvStdDev(UTILITY::Clean_name(nameInvStdDev)) - { + { + fKind = OperatorKind::LAYERNORM; fInputTensorNames = { fNX, fNScale }; if (!fNB.empty()){ fInputTensorNames.emplace_back(fNB); @@ -336,6 +337,15 @@ public: std::vector GetBlasRoutines() override { return { std::string("Axpy") }; } std::vector GetStdLibs() override { return { std::string("cmath") }; } + + std::string GetFusableOutputTensorName() override { + return fNY; + } + + void UpdateFusableTensorName(std::string fusable_tensor_name){ + fNX = UTILITY::Clean_name(fusable_tensor_name); + fNY = UTILITY::Clean_name(fusable_tensor_name); + } }; } // namespace SOFIE diff --git a/tmva/sofie/inc/TMVA/ROperator_Relu.hxx b/tmva/sofie/inc/TMVA/ROperator_Relu.hxx index a3b1df8ee0abf..e98d1d7119f00 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Relu.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Relu.hxx @@ -25,6 +25,7 @@ public: ROperator_Relu(){} ROperator_Relu(std::string nameX, std::string nameY): fNX(UTILITY::Clean_name(nameX)), fNY(UTILITY::Clean_name(nameY)){ + fKind = OperatorKind::RELU; fInputTensorNames = { fNX }; fOutputTensorNames = { fNY }; } @@ -66,6 +67,16 @@ public: return out.str(); } + + std::string GetFusableOutputTensorName() override { + return fNY; + } + + void UpdateFusableTensorName(std::string fusable_tensor_name){ + fNX = UTILITY::Clean_name(fusable_tensor_name); + fNY = UTILITY::Clean_name(fusable_tensor_name); + } + }; }//SOFIE diff --git a/tmva/sofie/src/RModel.cxx b/tmva/sofie/src/RModel.cxx index 57d1630f8c619..e670eea5c56fb 100644 --- a/tmva/sofie/src/RModel.cxx +++ b/tmva/sofie/src/RModel.cxx @@ -255,7 +255,8 @@ void RModel::AddIntermediateTensor(std::string tensor_name, ETensorType type, st void RModel::AddIntermediateTensor(std::string tensor_name, ETensorType type, std::vector shape) { tensor_name = UTILITY::Clean_name(tensor_name); if (CheckIfTensorAlreadyExist(tensor_name)) { - throw std::runtime_error("TMVA-SOFIE: intermediate tensor with name " + tensor_name + " already exists \n"); + // throw std::runtime_error("TMVA-SOFIE: intermediate tensor with name " + tensor_name + " already exists \n"); + return; } TensorInfo new_tensor {type, shape}; fIntermediateTensorInfos[tensor_name] = new_tensor; @@ -419,6 +420,38 @@ void RModel::CheckAndFlushIntermediateMemory(std::span o } } +void RModel::CheckAndFuseOperators(){ + size_t idx = 0; + std::vector fusable_indices; + std::string fusable_propagate_tensor_name; + + while (idx < fOperators.size()) { + if (fOperators[idx]->GetOpKind() == OperatorKind::GEMM) { + fusable_indices.clear(); + size_t j = idx + 1; + + for (; j < fOperators.size(); ++j) { + if (FusableKinds.count(fOperators[j]->GetOpKind())) { + fusable_indices.push_back(j); + if (fIntermediateTensorFrequencyLookup[fOperators[j]->GetFusableOutputTensorName()] > 1) { + fusable_propagate_tensor_name = fOperators[j]->GetFusableOutputTensorName(); + break; + } + } else { + break; + } + } + + for (auto &it : fusable_indices) { + fOperators[it]->UpdateFusableTensorName(fusable_propagate_tensor_name); + } + + idx = j; // move idx past fused ops + } else { + ++idx; + } + } +} void RModel::Initialize(int batchSize, bool verbose) { @@ -532,6 +565,7 @@ void RModel::Initialize(const std::map & inputParams, bool } i++; } + CheckAndFuseOperators(); fIsInitialized = true; } From 0783a84e0ef527f08a3cf4680c1d7b85c2ab6a1e Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Tue, 13 May 2025 18:01:41 +0200 Subject: [PATCH 2/7] feat: multi-operator fusion --- tmva/sofie/inc/TMVA/RModel.hxx | 2 + tmva/sofie/inc/TMVA/ROperator.hxx | 32 ++++- tmva/sofie/inc/TMVA/ROperator_Constant.hxx | 5 +- tmva/sofie/src/RModel.cxx | 131 ++++++++++++------- tmva/sofie_parsers/src/RModelParser_ONNX.cxx | 2 +- 5 files changed, 117 insertions(+), 55 deletions(-) diff --git a/tmva/sofie/inc/TMVA/RModel.hxx b/tmva/sofie/inc/TMVA/RModel.hxx index 67b0c1a5e700a..e9e93eab22384 100644 --- a/tmva/sofie/inc/TMVA/RModel.hxx +++ b/tmva/sofie/inc/TMVA/RModel.hxx @@ -34,6 +34,7 @@ private: std::vector> fOperators; + std::vector> fConstantOperators; std::vector> fSubGraphs; /// tmp(op); AddOperator(std::move(tmp), order_execution); } + void AddConstantOperator(std::unique_ptr op); void AddInitializedTensor(std::string tensor_name, ETensorType type, std::vector shape, std::shared_ptr data); void AddConstantTensor(std::string tensor_name, ETensorType type, std::vector shape, diff --git a/tmva/sofie/inc/TMVA/ROperator.hxx b/tmva/sofie/inc/TMVA/ROperator.hxx index 05e78eb4a6580..60010c13165f1 100644 --- a/tmva/sofie/inc/TMVA/ROperator.hxx +++ b/tmva/sofie/inc/TMVA/ROperator.hxx @@ -17,12 +17,25 @@ namespace SOFIE{ class RModel; enum class OperatorKind { - GEMM = 0, - LAYERNORM = 1, - RELU = 2, - UNDEFINED = 3 + GEMM = 0, + LAYERNORM = 1, + RELU = 2, + CONSTANT = 3, + CONSTANTOFSHAPE = 4, + UNDEFINED = 5 }; - + +inline const char* toString(OperatorKind kind) { + switch (kind) { + case OperatorKind::GEMM: return "GEMM"; + case OperatorKind::LAYERNORM: return "LAYERNORM"; + case OperatorKind::RELU: return "RELU"; + case OperatorKind::CONSTANT: return "CONSTANT"; + case OperatorKind::CONSTANTOFSHAPE: return "CONSTANTOFSHAPE"; + case OperatorKind::UNDEFINED: return "UNDEFINED"; + default: return "UNKNOWN"; + } +} inline std::set FusableKinds = { OperatorKind::RELU, OperatorKind::LAYERNORM }; class ROperator{ @@ -52,6 +65,7 @@ public: protected: OperatorKind fKind = OperatorKind::UNDEFINED; + size_t fOpOrder = 0; const std::string SP = " "; ///< space used to correctly indent the generated C++ code bool fUseSession = false; ///< flag to identify if using the session class bool fIsOutputConstant = false; ///< flag to identify if operator has a constant output (no need to generate code) @@ -72,7 +86,13 @@ public: OperatorKind GetOpKind(){ return fKind; } - + void RegisterOperatorOrder(const size_t ord){ + fOpOrder = ord; + } + size_t GetOpOrder(){ + return fOpOrder; + } + }; diff --git a/tmva/sofie/inc/TMVA/ROperator_Constant.hxx b/tmva/sofie/inc/TMVA/ROperator_Constant.hxx index 1cf5d13f5cd6f..d5bd9746b1670 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Constant.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Constant.hxx @@ -36,7 +36,9 @@ public: fShape(shape), fValues(values), fAttrType(type) - { + { + fKind = OperatorKind::CONSTANT; + fInputTensorNames = { }; fOutputTensorNames = { }; } @@ -57,6 +59,7 @@ public: if (!fNX.empty()) { // case of ConstantOfShape (since no inputs in case of Constant operator) fIsConstantOfShape = true; + fKind = OperatorKind::CONSTANTOFSHAPE; if (model.CheckIfTensorAlreadyExist(fNX) == false){ throw std::runtime_error("TMVA SOFIE ConstantOfShape Op Input Tensor is not found in model"); } diff --git a/tmva/sofie/src/RModel.cxx b/tmva/sofie/src/RModel.cxx index e670eea5c56fb..e369448c5ce73 100644 --- a/tmva/sofie/src/RModel.cxx +++ b/tmva/sofie/src/RModel.cxx @@ -160,6 +160,12 @@ void RModel::AddOperator(std::unique_ptr op, int order_execution) { for (auto& stdlib : libs) { AddNeededStdLib(stdlib); } + if (op->GetOpKind()==OperatorKind::CONSTANT){ + AddConstantOperator(std::move(op)); + return; + } + + op->RegisterOperatorOrder(order_execution); if (order_execution >= 0) { fOperators.insert(fOperators.begin() + order_execution, std::move(op)); } else { @@ -168,8 +174,8 @@ void RModel::AddOperator(std::unique_ptr op, int order_execution) { // storing the last usage of tensors which are input to // operators (but are not inputs to the model, i.e. they are intermediate - // tensors). This information is needed to keep a check on when a - // particular intermediate tensor can be flushed to free up memory for reuse. + // tensors). This information is needed to keep a check on when memory is no + // longer required for a particular intermediate tensor and can be reused. for(size_t index = 0; index op, int order_execution) { } } +void RModel::AddConstantOperator(std::unique_ptr op){ + fConstantOperators.push_back(std::move(op)); +} + void RModel::AddInitializedTensor(std::string tensor_name, ETensorType type, std::vector shape, std::shared_ptr data) { tensor_name = UTILITY::Clean_name(tensor_name); //NB: own data @@ -420,40 +430,54 @@ void RModel::CheckAndFlushIntermediateMemory(std::span o } } -void RModel::CheckAndFuseOperators(){ +void RModel::CheckAndFuseOperators() { size_t idx = 0; std::vector fusable_indices; std::string fusable_propagate_tensor_name; - while (idx < fOperators.size()) { - if (fOperators[idx]->GetOpKind() == OperatorKind::GEMM) { - fusable_indices.clear(); - size_t j = idx + 1; - - for (; j < fOperators.size(); ++j) { - if (FusableKinds.count(fOperators[j]->GetOpKind())) { - fusable_indices.push_back(j); - if (fIntermediateTensorFrequencyLookup[fOperators[j]->GetFusableOutputTensorName()] > 1) { - fusable_propagate_tensor_name = fOperators[j]->GetFusableOutputTensorName(); - break; - } - } else { - break; - } - } - - for (auto &it : fusable_indices) { - fOperators[it]->UpdateFusableTensorName(fusable_propagate_tensor_name); - } - - idx = j; // move idx past fused ops - } else { - ++idx; - } + if (fOperators[idx]->GetOpKind() != OperatorKind::GEMM) { + ++idx; + continue; + } + + fusable_indices.clear(); + fusable_propagate_tensor_name.clear(); + + size_t j = idx + 1; + for (; j < fOperators.size(); ++j) { + auto opKind = fOperators[j]->GetOpKind(); + + // Only consider operators with fusable kinds + if (!FusableKinds.count(opKind)) { + break; + } + + fusable_indices.push_back(j); + + const auto& tensorName = fOperators[j]->GetFusableOutputTensorName(); + auto freqIt = fIntermediateTensorFrequencyLookup.find(tensorName); + + // Propagate tensor name only if it's not used multiple times + if (freqIt != fIntermediateTensorFrequencyLookup.end() && freqIt->second != fOperators[j]->GetOpOrder()) { + std::cout << "\nBreaking here, second: " << freqIt->second << ", idx: " << j; + fusable_propagate_tensor_name = tensorName; + break; + } + } + + if (!fusable_propagate_tensor_name.empty()) { + std::cout << "\nOperators to be fused with: " << fusable_propagate_tensor_name; + for (auto& index : fusable_indices) { + fOperators[index]->UpdateFusableTensorName(fusable_propagate_tensor_name); + } + } + + idx = j; // Move index forward to continue search } } + void RModel::Initialize(int batchSize, bool verbose) { std::map inputParams; if (batchSize > 0) { @@ -467,7 +491,7 @@ void RModel::Initialize(int batchSize, bool verbose) { void RModel::Initialize(const std::map & inputParams, bool verbose) { fVerbose = int(verbose); - + fVerbose = 0; if (fIsInitialized) { if (verbose) std::cout << "Model is already initialized - skip initialization " << std::endl; @@ -543,27 +567,39 @@ void RModel::Initialize(const std::map & inputParams, bool if (!modelHasWeights) fUseWeightFile = false; } - // Go through model and initialize each operator - int i = 0; - std::vector temp_available_stack; // vector stores individual chunks of available memory that maybe reused - - for(size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx){ + for (size_t op_const_idx = 0; op_const_idx < fConstantOperators.size(); ++op_const_idx) { if (verbose) { - auto& r = *fOperators[op_idx].get(); - std::cout << "Initializing operator " << i << " " << typeid(r).name() << std::endl; + auto& r = *fConstantOperators[op_const_idx].get(); + std::cout << "Initializing constant operator " << op_const_idx << " " << typeid(r).name() << std::endl; } - fOperators[op_idx]->Initialize(*this); - for(auto &it:fOperators[op_idx]->GetOpOutputTensors()){ - std::string name = std::string{it}; - if (fIntermediateTensorFrequencyLookup.find(it) == fIntermediateTensorFrequencyLookup.end() && - std::find(fOutputTensorNames.begin(), fOutputTensorNames.end(), name) == fOutputTensorNames.end() && - fInitializedTensors.find(name) == fInitializedTensors.end() && - fDynamicTensorInfos.find(name) == fDynamicTensorInfos.end()){ - fIntermediateTensorFrequencyLookup[it] = op_idx; - } - } - i++; + + fConstantOperators[op_const_idx]->Initialize(*this); + } + + // Go through model and initialize each operator + int i = 0; + for (size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx ) { + if (verbose) { + auto& r = *fOperators[op_idx].get(); + std::cout << "Initializing operator " << i << " " << typeid(r).name() << std::endl; + } + + fOperators[op_idx]->Initialize(*this); + // if(fOperators[op_idx]->GetOpName().length()==0){ + // std::cout<<"\nempty name for op, typeid: "<GetOpOutputTensors()) { + std::string name{it}; + if (fIntermediateTensorFrequencyLookup.find(it) == fIntermediateTensorFrequencyLookup.end() && + fInputTensorInfos.find(name) == fInputTensorInfos.end() && + fInitializedTensors.find(name) == fInitializedTensors.end() && + fDynamicTensorInfos.find(name) == fDynamicTensorInfos.end()) { + fIntermediateTensorFrequencyLookup[it] = i; + } + } + + ++i; } CheckAndFuseOperators(); @@ -880,6 +916,7 @@ void RModel::GenerateSessionCode() std::string intermediate_memory_alloc_string = ""; intermediate_memory_alloc_string += "\n// --- Positioning intermediate tensor memory --"; for (size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx) { + // std::cout<GetOpKind()); intermediate_memory_alloc_string += AllocateIntermediateMemory(fOperators[op_idx]->GetOpOutputTensors()); CheckAndFlushIntermediateMemory(fOperators[op_idx]->GetOpInputTensors(), op_idx); } diff --git a/tmva/sofie_parsers/src/RModelParser_ONNX.cxx b/tmva/sofie_parsers/src/RModelParser_ONNX.cxx index 7b4ade2b6bc09..ad0221d8664b5 100644 --- a/tmva/sofie_parsers/src/RModelParser_ONNX.cxx +++ b/tmva/sofie_parsers/src/RModelParser_ONNX.cxx @@ -676,7 +676,7 @@ void RModelParser_ONNX::ParseONNXGraph(RModel & rmodel, const onnx::GraphProto & } while ((int)nodesOrder.size() < graph.node_size()); - // find list of children for each operator (used for fusing oiperators) + // find list of children for each operator (used for fusing operators) std::vector> nodesChildren(graph.node_size()); for (int k = 0; k < graph.node_size(); k++) { From 6c4f80ea9dcc30f504cd293606e37f054147f240 Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Thu, 15 May 2025 08:47:56 +0200 Subject: [PATCH 3/7] fix: gemm operator for memory optimization --- tmva/sofie/inc/TMVA/RModel.hxx | 4 +- tmva/sofie/inc/TMVA/ROperator_Constant.hxx | 12 +++-- tmva/sofie/inc/TMVA/ROperator_Gemm.hxx | 2 +- tmva/sofie/src/RModel.cxx | 58 +++++++++++----------- 4 files changed, 40 insertions(+), 36 deletions(-) diff --git a/tmva/sofie/inc/TMVA/RModel.hxx b/tmva/sofie/inc/TMVA/RModel.hxx index e9e93eab22384..b8e6ef36147d0 100644 --- a/tmva/sofie/inc/TMVA/RModel.hxx +++ b/tmva/sofie/inc/TMVA/RModel.hxx @@ -69,8 +69,8 @@ public: bool CheckIfTensorAlreadyExist(std::string tensor_name); void AddInputTensorInfo(std::string input_name, ETensorType type, std::vector shape); void AddInputTensorInfo(std::string input_name, ETensorType type, std::vector shape); - void AddOperator(std::unique_ptr op, int order_execution = -1); - void AddOperatorReference(ROperator *op, int order_execution = -1) + void AddOperator(std::unique_ptr op, size_t order_execution = -1); + void AddOperatorReference(ROperator *op, size_t order_execution = -1) { std::unique_ptr tmp(op); AddOperator(std::move(tmp), order_execution); diff --git a/tmva/sofie/inc/TMVA/ROperator_Constant.hxx b/tmva/sofie/inc/TMVA/ROperator_Constant.hxx index d5bd9746b1670..736adac1c3526 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Constant.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Constant.hxx @@ -38,7 +38,11 @@ public: fAttrType(type) { fKind = OperatorKind::CONSTANT; - + if (!fNX.empty()) { + // case of ConstantOfShape (since no inputs in case of Constant operator) + fIsConstantOfShape = true; + fKind = OperatorKind::CONSTANTOFSHAPE; + } fInputTensorNames = { }; fOutputTensorNames = { }; } @@ -55,11 +59,9 @@ public: void Initialize(RModel& model) override { //input must be a graph input, or already initialized intermediate tensor size_t length = 1; - /// ConstantOfShape------------- + + /// ------- ConstantOfShape --------- if (!fNX.empty()) { - // case of ConstantOfShape (since no inputs in case of Constant operator) - fIsConstantOfShape = true; - fKind = OperatorKind::CONSTANTOFSHAPE; if (model.CheckIfTensorAlreadyExist(fNX) == false){ throw std::runtime_error("TMVA SOFIE ConstantOfShape Op Input Tensor is not found in model"); } diff --git a/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx b/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx index c3259e8864e01..7fc3f45ceb410 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx @@ -67,7 +67,7 @@ namespace SOFIE{ fActivation = activation; fType = "float"; - fInputTensorNames = {fNA, fNB, fNC}; + fInputTensorNames = { fNA, fNB, fNC }; fOutputTensorNames = { fNY }; } diff --git a/tmva/sofie/src/RModel.cxx b/tmva/sofie/src/RModel.cxx index e369448c5ce73..54263c0866a99 100644 --- a/tmva/sofie/src/RModel.cxx +++ b/tmva/sofie/src/RModel.cxx @@ -153,7 +153,7 @@ void RModel::AddInputTensorName(std::string input_name) { fInputTensorNames.emplace_back(UTILITY::Clean_name(input_name)); } -void RModel::AddOperator(std::unique_ptr op, int order_execution) { +void RModel::AddOperator(std::unique_ptr op, size_t order_execution) { AddBlasRoutines(op->GetBlasRoutines()); auto libs = op->GetStdLibs(); auto op_input_tensors = op->GetOpInputTensors(); @@ -166,7 +166,7 @@ void RModel::AddOperator(std::unique_ptr op, int order_execution) { } op->RegisterOperatorOrder(order_execution); - if (order_execution >= 0) { + if (order_execution >= 0 && order_execution <= fOperators.size()) { fOperators.insert(fOperators.begin() + order_execution, std::move(op)); } else { fOperators.push_back(std::move(op)); @@ -176,14 +176,15 @@ void RModel::AddOperator(std::unique_ptr op, int order_execution) { // operators (but are not inputs to the model, i.e. they are intermediate // tensors). This information is needed to keep a check on when memory is no // longer required for a particular intermediate tensor and can be reused. - for(size_t index = 0; index op){ @@ -396,11 +397,10 @@ std::string RModel::AllocateIntermediateMemory(std::span void RModel::CheckAndFlushIntermediateMemory(std::span op_input_tensors, const size_t& op_idx){ for (auto &it : op_input_tensors){ // last occurence of the tensor is reached => flush it from memory - if (fIntermediateTensorFrequencyLookup[it] == op_idx) { + if (fIntermediateTensorFrequencyLookup[it] == fOperators[op_idx]->GetOpOrder()){ for (auto chunk = fIntermediateMemoryInfo.total_stack.begin(); chunk != fIntermediateMemoryInfo.total_stack.end(); ++chunk ) { if (chunk->second.tensor_name == it) { - // check if nearby chunks in available memory can coalesce auto first_greater = fIntermediateMemoryInfo.available_stack.upper_bound(chunk->first); // smallest element greater than the flushed chunk idx auto last_smaller = (first_greater == fIntermediateMemoryInfo.available_stack.begin()) ? fIntermediateMemoryInfo.available_stack.end() : std::prev(first_greater); // largest element smaller than the flushed chunk idx @@ -442,37 +442,42 @@ void RModel::CheckAndFuseOperators() { fusable_indices.clear(); fusable_propagate_tensor_name.clear(); - + + fusable_indices.push_back(idx); size_t j = idx + 1; - for (; j < fOperators.size(); ++j) { + for (; j < fOperators.size()-1; ++j) { auto opKind = fOperators[j]->GetOpKind(); // Only consider operators with fusable kinds if (!FusableKinds.count(opKind)) { break; } - - fusable_indices.push_back(j); + // std::cout<<"\nmight be fusable: "<GetFusableOutputTensorName(); auto freqIt = fIntermediateTensorFrequencyLookup.find(tensorName); // Propagate tensor name only if it's not used multiple times - if (freqIt != fIntermediateTensorFrequencyLookup.end() && freqIt->second != fOperators[j]->GetOpOrder()) { - std::cout << "\nBreaking here, second: " << freqIt->second << ", idx: " << j; + if (freqIt != fIntermediateTensorFrequencyLookup.end() && + (freqIt->second != fOperators[j + 1]->GetOpOrder() || + FusableKinds.count(fOperators[j + 1]->GetOpKind()) == 0)) { + // std::cout << "\nBreaking here, second: " << freqIt->second << ", idx: " << fOperators[j+1]->GetOpOrder(); fusable_propagate_tensor_name = tensorName; break; + } else { + fusable_indices.push_back(j); } } - + // std::cout<<"\nstart fusing: "<GetOpKind())<<" , with: "<UpdateFusableTensorName(fusable_propagate_tensor_name); } } - idx = j; // Move index forward to continue search + idx = std::max(idx + 1, j); } } @@ -578,11 +583,10 @@ void RModel::Initialize(const std::map & inputParams, bool } // Go through model and initialize each operator - int i = 0; for (size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx ) { if (verbose) { auto& r = *fOperators[op_idx].get(); - std::cout << "Initializing operator " << i << " " << typeid(r).name() << std::endl; + std::cout << "Initializing operator " << op_idx << " " << typeid(r).name() << std::endl; } fOperators[op_idx]->Initialize(*this); @@ -595,11 +599,9 @@ void RModel::Initialize(const std::map & inputParams, bool fInputTensorInfos.find(name) == fInputTensorInfos.end() && fInitializedTensors.find(name) == fInitializedTensors.end() && fDynamicTensorInfos.find(name) == fDynamicTensorInfos.end()) { - fIntermediateTensorFrequencyLookup[it] = i; + fIntermediateTensorFrequencyLookup[it] = fOperators[op_idx]->GetOpOrder(); } } - - ++i; } CheckAndFuseOperators(); @@ -916,7 +918,7 @@ void RModel::GenerateSessionCode() std::string intermediate_memory_alloc_string = ""; intermediate_memory_alloc_string += "\n// --- Positioning intermediate tensor memory --"; for (size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx) { - // std::cout<GetOpKind()); + std::cout<GetOpKind()); intermediate_memory_alloc_string += AllocateIntermediateMemory(fOperators[op_idx]->GetOpOutputTensors()); CheckAndFlushIntermediateMemory(fOperators[op_idx]->GetOpInputTensors(), op_idx); } @@ -1000,7 +1002,7 @@ void RModel::GenerateSessionCode() fGC += "}\n\n"; } - + fGC += doInferSignature + "{\n"; fGC += "\n"; From f01787f4e631dc1b4f6c8e1ebab0e2937b0778bc Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Sat, 7 Jun 2025 11:04:19 +0200 Subject: [PATCH 4/7] feat: add broadcast tensors into optimization --- tmva/sofie/inc/TMVA/RModel.hxx | 4 +-- tmva/sofie/inc/TMVA/ROperator.hxx | 16 +++++---- tmva/sofie/inc/TMVA/ROperator_BasicNary.hxx | 3 +- .../inc/TMVA/ROperator_BatchNormalization.hxx | 17 ++++++++++ tmva/sofie/inc/TMVA/ROperator_Comparision.hxx | 2 ++ tmva/sofie/inc/TMVA/ROperator_Concat.hxx | 2 +- tmva/sofie/inc/TMVA/ROperator_Conv.hxx | 17 +++++++++- .../inc/TMVA/ROperator_ConvTranspose.icc | 1 + tmva/sofie/inc/TMVA/ROperator_Einsum.hxx | 2 +- tmva/sofie/inc/TMVA/ROperator_Gemm.hxx | 3 ++ .../inc/TMVA/ROperator_LayerNormalization.hxx | 13 ++++++++ tmva/sofie/inc/TMVA/ROperator_Range.hxx | 4 ++- tmva/sofie/inc/TMVA/ROperator_Relu.hxx | 6 ++-- tmva/sofie/inc/TMVA/ROperator_Split.hxx | 2 +- tmva/sofie/inc/TMVA/ROperator_SubGraph.hxx | 2 +- tmva/sofie/inc/TMVA/ROperator_Where.hxx | 2 ++ tmva/sofie/inc/TMVA/SOFIE_common.hxx | 2 +- tmva/sofie/src/RModel.cxx | 33 +++++++++++-------- tmva/sofie_parsers/src/RModelParser_ONNX.cxx | 15 +++++---- 19 files changed, 107 insertions(+), 39 deletions(-) diff --git a/tmva/sofie/inc/TMVA/RModel.hxx b/tmva/sofie/inc/TMVA/RModel.hxx index b8e6ef36147d0..e07ea21278488 100644 --- a/tmva/sofie/inc/TMVA/RModel.hxx +++ b/tmva/sofie/inc/TMVA/RModel.hxx @@ -161,8 +161,8 @@ public: std::string GenerateInferSignature(bool isdecl = true); // calculate total intermediate memory and position intermediate tensor addresses - std::string AllocateIntermediateMemory(std::span op_output_tensors); - void CheckAndFlushIntermediateMemory(std::span op_output_tensors, const size_t& op_idx); + std::string AllocateIntermediateMemory(std::span op_output_tensors, std::set& allocated_tensors); + void CheckAndFlushIntermediateMemory(std::span op_output_tensors, const size_t& op_idx); void SetOptimizationLevel(const OptimizationLevel &optim_level) { fOptimizationLevel = optim_level; } diff --git a/tmva/sofie/inc/TMVA/ROperator.hxx b/tmva/sofie/inc/TMVA/ROperator.hxx index 60010c13165f1..8f0338eb5e3fc 100644 --- a/tmva/sofie/inc/TMVA/ROperator.hxx +++ b/tmva/sofie/inc/TMVA/ROperator.hxx @@ -22,7 +22,9 @@ enum class OperatorKind { RELU = 2, CONSTANT = 3, CONSTANTOFSHAPE = 4, - UNDEFINED = 5 + UNDEFINED = 5, + CONV=6, + BATCHNORM=7 }; inline const char* toString(OperatorKind kind) { @@ -32,11 +34,13 @@ inline const char* toString(OperatorKind kind) { case OperatorKind::RELU: return "RELU"; case OperatorKind::CONSTANT: return "CONSTANT"; case OperatorKind::CONSTANTOFSHAPE: return "CONSTANTOFSHAPE"; + case OperatorKind::BATCHNORM: return "batchnorm"; + case OperatorKind::CONV: return "conv"; case OperatorKind::UNDEFINED: return "UNDEFINED"; default: return "UNKNOWN"; } } -inline std::set FusableKinds = { OperatorKind::RELU, OperatorKind::LAYERNORM }; +inline std::set FusableKinds = { OperatorKind::RELU, OperatorKind::LAYERNORM, OperatorKind::BATCHNORM}; class ROperator{ @@ -71,15 +75,15 @@ protected: bool fIsOutputConstant = false; ///< flag to identify if operator has a constant output (no need to generate code) bool fIsOutputParamShape = false; ///< flag to identify of the output represents a parametric shape (can be knwon at compile time) - mutable std::vector fInputTensorNames; - mutable std::vector fOutputTensorNames; + mutable std::vector fInputTensorNames; + mutable std::vector fOutputTensorNames; public: - std::span GetOpInputTensors() const { + std::span GetOpInputTensors() const { return fInputTensorNames; } - std::span GetOpOutputTensors() const { + std::span GetOpOutputTensors() const { return fOutputTensorNames; } diff --git a/tmva/sofie/inc/TMVA/ROperator_BasicNary.hxx b/tmva/sofie/inc/TMVA/ROperator_BasicNary.hxx index bcc0e52a40ca3..14fb6ea948585 100644 --- a/tmva/sofie/inc/TMVA/ROperator_BasicNary.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_BasicNary.hxx @@ -103,7 +103,7 @@ public: fInputTensorNames.resize(fNInputs.size()); std::transform(fNInputs.begin(), fNInputs.end(), fInputTensorNames.begin(), - [](const std::string& s) -> std::string_view { return s; }); + [](const std::string& s) -> std::string { return s; }); fOutputTensorNames = { fNY }; } @@ -137,6 +137,7 @@ public: std::string name = "Broadcasted" + fNInputs[i]; model.AddIntermediateTensor(name, model.GetTensorType(fNInputs[0]), fShapeY); fNBroadcastedInputs.emplace_back("tensor_" + name); + fOutputTensorNames.emplace_back(name); } else { fNBroadcastedInputs.emplace_back("tensor_" + fNInputs[i]); } diff --git a/tmva/sofie/inc/TMVA/ROperator_BatchNormalization.hxx b/tmva/sofie/inc/TMVA/ROperator_BatchNormalization.hxx index 16fc3d6c07ba5..82c5015dd9751 100644 --- a/tmva/sofie/inc/TMVA/ROperator_BatchNormalization.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_BatchNormalization.hxx @@ -53,6 +53,7 @@ public: fNB(UTILITY::Clean_name(nameB)), fNMean(UTILITY::Clean_name(nameMean)), fNVar(UTILITY::Clean_name(nameVar)), fNY(UTILITY::Clean_name(nameY)), fActivation(activation) { + fKind = OperatorKind::BATCHNORM; fInputTensorNames = { fNX }; fOutputTensorNames = { fNY }; @@ -233,6 +234,22 @@ public: } std::vector GetBlasRoutines() override { return { std::string("Copy"), std::string("Axpy") }; } + std::string GetFusableOutputTensorName() override { + return fNY; + } + + void UpdateFusableTensorName(std::string fusable_tensor_name){ + fNX = UTILITY::Clean_name(fusable_tensor_name); + fNY = UTILITY::Clean_name(fusable_tensor_name); + fInputTensorNames = { fNX, fNScale }; + if (!fNB.empty()){ + fInputTensorNames.emplace_back(fNB); + } + + fOutputTensorNames = { fNY }; + std::cout<<"\ncalled from gemm"; + + } }; }//SOFIE diff --git a/tmva/sofie/inc/TMVA/ROperator_Comparision.hxx b/tmva/sofie/inc/TMVA/ROperator_Comparision.hxx index 0d365ae517de5..ff7a4e69f526b 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Comparision.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Comparision.hxx @@ -135,6 +135,7 @@ public: // Add an intermediate tensor for broadcasting A fNBroadcastedX1 = "Broadcasted" + fNX1; model.AddIntermediateTensor(fNBroadcastedX1, model.GetTensorType(fNX1), fShapeY); + fOutputTensorNames.emplace_back(fNBroadcastedX1); } } // Broadcast B to Y @@ -151,6 +152,7 @@ public: // Add an intermediate tensor for broadcasting B fNBroadcastedX2 = "Broadcasted" + fNX2; model.AddIntermediateTensor(fNBroadcastedX2, model.GetTensorType(fNX2), fShapeY); + fOutputTensorNames.emplace_back(fNBroadcastedX2); } } } else { diff --git a/tmva/sofie/inc/TMVA/ROperator_Concat.hxx b/tmva/sofie/inc/TMVA/ROperator_Concat.hxx index ad855341dfc17..1161eaf5e8e0f 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Concat.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Concat.hxx @@ -37,7 +37,7 @@ fInputTensorNames.resize(fInputs.size()); std::transform(fInputs.begin(), fInputs.end(), fInputTensorNames.begin(), - [](const std::string& s) -> std::string_view { return s; }); + [](const std::string& s) -> std::string { return s; }); fOutputTensorNames = { fOutput }; } diff --git a/tmva/sofie/inc/TMVA/ROperator_Conv.hxx b/tmva/sofie/inc/TMVA/ROperator_Conv.hxx index 6d5d54262036f..1db741b6bb260 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Conv.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Conv.hxx @@ -58,7 +58,8 @@ public: fAttrPads(pads), fAttrStrides(strides), fNX(UTILITY::Clean_name(nameX)), fNW(UTILITY::Clean_name(nameW)), fNB(UTILITY::Clean_name(nameB)), fNY(UTILITY::Clean_name(nameY)) - { + { + fKind = OperatorKind::CONV; if(std::is_same::value) { fType = "float"; } else { @@ -77,6 +78,7 @@ public: fAttrPads(pads), fAttrStrides(strides), fNX(UTILITY::Clean_name(nameX)), fNW(UTILITY::Clean_name(nameW)), fNY(UTILITY::Clean_name(nameY)) { + fKind = OperatorKind::CONV; if(std::is_same::value) { fType = "float"; } else { @@ -294,6 +296,7 @@ public: // we need to add a new intermediate tensor for broadcasted bias tensor fNB2 = fNB + "bcast"; model.AddIntermediateTensor(fNB2, model.GetTensorType(fNB), targetShape); + fOutputTensorNames.push_back(fNB2); } } } @@ -569,6 +572,18 @@ public: /*! \brief Returns the blas routines needed to compile the generated code */ std::vector GetBlasRoutines() override { return { std::string("Gemm"), std::string("Axpy") }; } + std::string GetFusableOutputTensorName() override { + return fNY; + } + void UpdateFusableTensorName(std::string fusable_tensor_name) override { + std::cout<<"\ncalled from conv"; + fNY = fusable_tensor_name; + fOutputTensorNames = { fNY }; + convK = fNX +"_f"; + imcol = fNX +"_xcol"; + fOutputTensorNames.emplace_back(convK); + fOutputTensorNames.emplace_back(imcol); + } }; } // namespace SOFIE diff --git a/tmva/sofie/inc/TMVA/ROperator_ConvTranspose.icc b/tmva/sofie/inc/TMVA/ROperator_ConvTranspose.icc index 316ab00f99d58..f3df2bf2aa827 100644 --- a/tmva/sofie/inc/TMVA/ROperator_ConvTranspose.icc +++ b/tmva/sofie/inc/TMVA/ROperator_ConvTranspose.icc @@ -167,6 +167,7 @@ void ROperator_ConvTranspose::Initialize(RModel& model){ // we need to add a new intermediate tensor for broadcasted bias tensor fNBroadcastedB = "Broadcasted" + fNB; model.AddIntermediateTensor(fNBroadcastedB, model.GetTensorType(fNB), fShapeY); + fOutputTensorNames.emplace_back(fNBroadcastedB); } } else { diff --git a/tmva/sofie/inc/TMVA/ROperator_Einsum.hxx b/tmva/sofie/inc/TMVA/ROperator_Einsum.hxx index fbf6659058d36..78e138d6b5dee 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Einsum.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Einsum.hxx @@ -51,7 +51,7 @@ public: fInputTensorNames.resize(fNInputs.size()); std::transform(fNInputs.begin(), fNInputs.end(), fInputTensorNames.begin(), - [](const std::string& s) -> std::string_view { return s; }); + [](const std::string& s) -> std::string { return s; }); fOutputTensorNames = { fNY }; } diff --git a/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx b/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx index 7fc3f45ceb410..16c09bdb98f60 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx @@ -251,6 +251,7 @@ namespace SOFIE{ // In case of session add broadcasting code in Session constructor and in GenerateInitCode // we need to add a new intermediate tensor for broadcasted bias tensor fNC2 = fNC + "bcast"; + fOutputTensorNames.emplace_back(fNC2); if (!fIsDynamic) { model.AddIntermediateTensor(fNC2, model.GetTensorType(fNC), shapeY); } @@ -410,6 +411,8 @@ namespace SOFIE{ void UpdateFusableTensorName(std::string fusable_tensor_name){ fNY = UTILITY::Clean_name(fusable_tensor_name); + fOutputTensorNames = { fNY }; + std::cout<<"\ncalled from gemm"; } }; diff --git a/tmva/sofie/inc/TMVA/ROperator_LayerNormalization.hxx b/tmva/sofie/inc/TMVA/ROperator_LayerNormalization.hxx index 2c432e0a028d7..8d4c144c83e5b 100644 --- a/tmva/sofie/inc/TMVA/ROperator_LayerNormalization.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_LayerNormalization.hxx @@ -136,6 +136,7 @@ public: if (isDynamic || lengthB < static_cast(std::stoi(fLength))) { fNBroadcastedB = "Broadcasted" + fNB; model.AddIntermediateTensor(fNBroadcastedB, ConvertStringToType(fType), fShapeX); + fOutputTensorNames.emplace_back(fNBroadcastedB); } } model.AddNeededStdLib("cmath"); @@ -345,6 +346,18 @@ public: void UpdateFusableTensorName(std::string fusable_tensor_name){ fNX = UTILITY::Clean_name(fusable_tensor_name); fNY = UTILITY::Clean_name(fusable_tensor_name); + fInputTensorNames = { fNX, fNScale }; + if (!fNB.empty()){ + fInputTensorNames.emplace_back(fNB); + } + + fOutputTensorNames = { fNY }; + if (!fNMean.empty()){ + fOutputTensorNames.emplace_back(fNMean); + } + if (!fNInvStdDev.empty()){ + fOutputTensorNames.emplace_back(fNInvStdDev); + } } }; diff --git a/tmva/sofie/inc/TMVA/ROperator_Range.hxx b/tmva/sofie/inc/TMVA/ROperator_Range.hxx index 9cac15a14fc52..cc475ec6fcb03 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Range.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Range.hxx @@ -28,7 +28,7 @@ public: ROperator_Range(){} ROperator_Range(std::string start, std::string limit, std::string delta, std::string nameOutput): - fNStart(start), fNLimit(limit), fNDelta(delta), + fNStart(UTILITY::Clean_name(start)), fNLimit(UTILITY::Clean_name(limit)), fNDelta(UTILITY::Clean_name(delta)), fNOutput(UTILITY::Clean_name(nameOutput)) { if (std::is_same::value) { fType = "float"; @@ -37,6 +37,8 @@ public: } static_assert( (std::is_same_v || std::is_same_v), "TMVA::SOFIE - Unsupported type by Range operator"); + fInputTensorNames = {fNStart, fNLimit, fNDelta}; + fOutputTensorNames = {fNOutput}; } std::vector TypeInference(std::vector input) override { diff --git a/tmva/sofie/inc/TMVA/ROperator_Relu.hxx b/tmva/sofie/inc/TMVA/ROperator_Relu.hxx index e98d1d7119f00..6825d31e9c62c 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Relu.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Relu.hxx @@ -73,8 +73,10 @@ public: } void UpdateFusableTensorName(std::string fusable_tensor_name){ - fNX = UTILITY::Clean_name(fusable_tensor_name); - fNY = UTILITY::Clean_name(fusable_tensor_name); + fNX = fusable_tensor_name; + fNY = fusable_tensor_name; + fInputTensorNames = { fNX }; + fOutputTensorNames = { fNY }; } }; diff --git a/tmva/sofie/inc/TMVA/ROperator_Split.hxx b/tmva/sofie/inc/TMVA/ROperator_Split.hxx index f191f9d014238..0936a13415313 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Split.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Split.hxx @@ -38,7 +38,7 @@ public: fInputTensorNames = { fNX }; fOutputTensorNames.resize(fNYs.size()); std::transform(fNYs.begin(), fNYs.end(), fOutputTensorNames.begin(), - [](const std::string& s) -> std::string_view { return s; }); + [](const std::string& s) -> std::string { return s; }); } std::vector TypeInference(std::vector input) override { diff --git a/tmva/sofie/inc/TMVA/ROperator_SubGraph.hxx b/tmva/sofie/inc/TMVA/ROperator_SubGraph.hxx index 683d40de835d7..4eea16e059210 100644 --- a/tmva/sofie/inc/TMVA/ROperator_SubGraph.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_SubGraph.hxx @@ -36,7 +36,7 @@ public: fInputTensorNames = { fNX }; std::transform(fNYs.begin(), fNYs.end(), fOutputTensorNames.begin(), - [](const std::string& s) -> std::string_view { return s; }); + [](const std::string& s) -> std::string { return s; }); } std::vector TypeInference(std::vector input) override { diff --git a/tmva/sofie/inc/TMVA/ROperator_Where.hxx b/tmva/sofie/inc/TMVA/ROperator_Where.hxx index 0735e820ff1ed..3757831e28a49 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Where.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Where.hxx @@ -113,6 +113,7 @@ public: } else { // Add an intermediate tensor for broadcasting A model.AddIntermediateTensor(fNBroadcastedA, model.GetTensorType(fNA), fShapeY); + fOutputTensorNames.push_back(fNBroadcastedA); } } // Broadcast B to Y @@ -129,6 +130,7 @@ public: } else { // Add an intermediate tensor for broadcasting B model.AddIntermediateTensor(fNBroadcastedB, model.GetTensorType(fNB), fShapeY); + fOutputTensorNames.push_back(fNBroadcastedB); } } // Broadcast C to Y diff --git a/tmva/sofie/inc/TMVA/SOFIE_common.hxx b/tmva/sofie/inc/TMVA/SOFIE_common.hxx index 57d829ac1acff..ffc79e9be94c6 100644 --- a/tmva/sofie/inc/TMVA/SOFIE_common.hxx +++ b/tmva/sofie/inc/TMVA/SOFIE_common.hxx @@ -171,7 +171,7 @@ struct TensorMemoryInfo { TensorMemoryInfo split(const std::string_view new_name, size_t new_size) { if (new_size > tensor_size) { - throw std::invalid_argument("New size exceeds available tensor size."); + throw std::invalid_argument("New size "+ std::to_string(new_size) + " exceeds available tensor size of " + std::to_string(tensor_size)+"."); } tensor_size -= new_size; return TensorMemoryInfo{new_name, new_size}; diff --git a/tmva/sofie/src/RModel.cxx b/tmva/sofie/src/RModel.cxx index 54263c0866a99..00320312c2026 100644 --- a/tmva/sofie/src/RModel.cxx +++ b/tmva/sofie/src/RModel.cxx @@ -338,7 +338,7 @@ void RModel::SetNotWritableInitializedTensor(const std::string & tensor_name) { t->second.SetNotWritable(); } -std::string RModel::AllocateIntermediateMemory(std::span op_output_tensors) +std::string RModel::AllocateIntermediateMemory(std::span op_output_tensors, std::set& allocated_tensors) { std::stringstream code; @@ -355,8 +355,9 @@ std::string RModel::AllocateIntermediateMemory(std::span bool allocated = false; if (GetTensorType(name) == ETensorType::BOOL || fInitializedTensors.find(name) != fInitializedTensors.end() || - fDynamicTensorInfos.find(name) != fDynamicTensorInfos.end()) continue; - + fDynamicTensorInfos.find(name) != fDynamicTensorInfos.end() || + allocated_tensors.count(it)) continue; + auto tensor_size = GetTypeSize(GetTensorType(name)) * ConvertShapeToLength(GetTensorShape(name)); for (auto chunk = fIntermediateMemoryInfo.available_stack.begin(); chunk != fIntermediateMemoryInfo.available_stack.end(); ) { @@ -381,6 +382,7 @@ std::string RModel::AllocateIntermediateMemory(std::span ++chunk; } + if (!allocated) { size_t chunk_idx = fIntermediateMemoryInfo.total_stack.empty() ? 0 @@ -390,11 +392,12 @@ std::string RModel::AllocateIntermediateMemory(std::span declareIntermediateTensor(name, tensor_size, chunk_idx); } + allocated_tensors.insert(it); } return code.str(); } -void RModel::CheckAndFlushIntermediateMemory(std::span op_input_tensors, const size_t& op_idx){ +void RModel::CheckAndFlushIntermediateMemory(std::span op_input_tensors, const size_t& op_idx){ for (auto &it : op_input_tensors){ // last occurence of the tensor is reached => flush it from memory if (fIntermediateTensorFrequencyLookup[it] == fOperators[op_idx]->GetOpOrder()){ @@ -435,7 +438,8 @@ void RModel::CheckAndFuseOperators() { std::vector fusable_indices; std::string fusable_propagate_tensor_name; while (idx < fOperators.size()) { - if (fOperators[idx]->GetOpKind() != OperatorKind::GEMM) { + std::cout<<"\nop currently: "<GetOpKind()); + if (fOperators[idx]->GetOpKind() != OperatorKind::GEMM && fOperators[idx]->GetOpKind() != OperatorKind::CONV) { ++idx; continue; } @@ -447,33 +451,33 @@ void RModel::CheckAndFuseOperators() { size_t j = idx + 1; for (; j < fOperators.size()-1; ++j) { auto opKind = fOperators[j]->GetOpKind(); - + std::cout<<"\nchecking for fusion: "<GetFusableOutputTensorName(); auto freqIt = fIntermediateTensorFrequencyLookup.find(tensorName); // Propagate tensor name only if it's not used multiple times + fusable_indices.push_back(j); if (freqIt != fIntermediateTensorFrequencyLookup.end() && (freqIt->second != fOperators[j + 1]->GetOpOrder() || FusableKinds.count(fOperators[j + 1]->GetOpKind()) == 0)) { // std::cout << "\nBreaking here, second: " << freqIt->second << ", idx: " << fOperators[j+1]->GetOpOrder(); fusable_propagate_tensor_name = tensorName; break; - } else { - fusable_indices.push_back(j); } } // std::cout<<"\nstart fusing: "<GetOpKind())<<" , with: "<UpdateFusableTensorName(fusable_propagate_tensor_name); + std::cout<<"\nfusing op "<GetOpKind())<<" , with: "<UpdateFusableTensorName(fusable_propagate_tensor_name); } } @@ -915,11 +919,12 @@ void RModel::GenerateSessionCode() if (fOptimizationLevel == OptimizationLevel::kExtended) { // evaluate total intermediate memory and position intermediate tensor addresses + std::set allocated_tensors; std::string intermediate_memory_alloc_string = ""; intermediate_memory_alloc_string += "\n// --- Positioning intermediate tensor memory --"; for (size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx) { - std::cout<GetOpKind()); - intermediate_memory_alloc_string += AllocateIntermediateMemory(fOperators[op_idx]->GetOpOutputTensors()); + // std::cout<GetOpKind()); + intermediate_memory_alloc_string += AllocateIntermediateMemory(fOperators[op_idx]->GetOpOutputTensors(), allocated_tensors); CheckAndFlushIntermediateMemory(fOperators[op_idx]->GetOpInputTensors(), op_idx); } diff --git a/tmva/sofie_parsers/src/RModelParser_ONNX.cxx b/tmva/sofie_parsers/src/RModelParser_ONNX.cxx index ad0221d8664b5..1303d37150c28 100644 --- a/tmva/sofie_parsers/src/RModelParser_ONNX.cxx +++ b/tmva/sofie_parsers/src/RModelParser_ONNX.cxx @@ -90,7 +90,7 @@ extern ParserFuncSignature ParseScatterElements; // Declaration of fused operators extern ParserFuseFuncSignature ParseFuseConvAdd; extern ParserFuseFuncSignature ParseFuseGemmRelu; -extern ParserFuseFuncSignature ParseFuseBatchnormRelu; +// extern ParserFuseFuncSignature ParseFuseBatchnormRelu; extern ParserFuseFuncSignature ParseFuseConvTransposeAdd; extern ParserFuseFuncSignature ParseFuseMatMulAdd; @@ -320,12 +320,13 @@ RModelParser_ONNX::ParseOperator(const size_t i, const onnx::GraphProto &graphpr fFusedOperators[idx2] = true; return ParseFuseGemmRelu(*this, graphproto.node(idx), graphproto.node(idx2)); } - } else if (nodeproto.op_type() == "BatchNormalization") { - if (idx2 < graphproto.node_size() && graphproto.node(idx2).op_type() == "Relu") { - fFusedOperators[idx2] = true; - return ParseFuseBatchnormRelu(*this, graphproto.node(idx), graphproto.node(idx2)); - } - } + } + // else if (nodeproto.op_type() == "BatchNormalization") { + // if (idx2 < graphproto.node_size() && graphproto.node(idx2).op_type() == "Relu") { + // fFusedOperators[idx2] = true; + // return ParseFuseBatchnormRelu(*this, graphproto.node(idx), graphproto.node(idx2)); + // } + // } } From 6e1208875a0dc4ded0c3c442b09da19d8d7429bb Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Sat, 7 Jun 2025 11:55:29 +0200 Subject: [PATCH 5/7] remove adding broadcasting tensors to memory optimization --- tmva/sofie/inc/TMVA/ROperator_BasicNary.hxx | 1 - tmva/sofie/inc/TMVA/ROperator_Comparision.hxx | 2 -- tmva/sofie/inc/TMVA/ROperator_Conv.hxx | 1 - tmva/sofie/inc/TMVA/ROperator_ConvTranspose.icc | 1 - tmva/sofie/inc/TMVA/ROperator_Gemm.hxx | 1 - tmva/sofie/inc/TMVA/ROperator_LayerNormalization.hxx | 1 - tmva/sofie/inc/TMVA/ROperator_Where.hxx | 2 -- tmva/sofie/src/RModel.cxx | 1 - 8 files changed, 10 deletions(-) diff --git a/tmva/sofie/inc/TMVA/ROperator_BasicNary.hxx b/tmva/sofie/inc/TMVA/ROperator_BasicNary.hxx index 14fb6ea948585..22cd0edcf75e4 100644 --- a/tmva/sofie/inc/TMVA/ROperator_BasicNary.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_BasicNary.hxx @@ -137,7 +137,6 @@ public: std::string name = "Broadcasted" + fNInputs[i]; model.AddIntermediateTensor(name, model.GetTensorType(fNInputs[0]), fShapeY); fNBroadcastedInputs.emplace_back("tensor_" + name); - fOutputTensorNames.emplace_back(name); } else { fNBroadcastedInputs.emplace_back("tensor_" + fNInputs[i]); } diff --git a/tmva/sofie/inc/TMVA/ROperator_Comparision.hxx b/tmva/sofie/inc/TMVA/ROperator_Comparision.hxx index ff7a4e69f526b..0d365ae517de5 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Comparision.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Comparision.hxx @@ -135,7 +135,6 @@ public: // Add an intermediate tensor for broadcasting A fNBroadcastedX1 = "Broadcasted" + fNX1; model.AddIntermediateTensor(fNBroadcastedX1, model.GetTensorType(fNX1), fShapeY); - fOutputTensorNames.emplace_back(fNBroadcastedX1); } } // Broadcast B to Y @@ -152,7 +151,6 @@ public: // Add an intermediate tensor for broadcasting B fNBroadcastedX2 = "Broadcasted" + fNX2; model.AddIntermediateTensor(fNBroadcastedX2, model.GetTensorType(fNX2), fShapeY); - fOutputTensorNames.emplace_back(fNBroadcastedX2); } } } else { diff --git a/tmva/sofie/inc/TMVA/ROperator_Conv.hxx b/tmva/sofie/inc/TMVA/ROperator_Conv.hxx index 1db741b6bb260..a85185567c060 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Conv.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Conv.hxx @@ -296,7 +296,6 @@ public: // we need to add a new intermediate tensor for broadcasted bias tensor fNB2 = fNB + "bcast"; model.AddIntermediateTensor(fNB2, model.GetTensorType(fNB), targetShape); - fOutputTensorNames.push_back(fNB2); } } } diff --git a/tmva/sofie/inc/TMVA/ROperator_ConvTranspose.icc b/tmva/sofie/inc/TMVA/ROperator_ConvTranspose.icc index f3df2bf2aa827..316ab00f99d58 100644 --- a/tmva/sofie/inc/TMVA/ROperator_ConvTranspose.icc +++ b/tmva/sofie/inc/TMVA/ROperator_ConvTranspose.icc @@ -167,7 +167,6 @@ void ROperator_ConvTranspose::Initialize(RModel& model){ // we need to add a new intermediate tensor for broadcasted bias tensor fNBroadcastedB = "Broadcasted" + fNB; model.AddIntermediateTensor(fNBroadcastedB, model.GetTensorType(fNB), fShapeY); - fOutputTensorNames.emplace_back(fNBroadcastedB); } } else { diff --git a/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx b/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx index 16c09bdb98f60..63967f2b503be 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx @@ -251,7 +251,6 @@ namespace SOFIE{ // In case of session add broadcasting code in Session constructor and in GenerateInitCode // we need to add a new intermediate tensor for broadcasted bias tensor fNC2 = fNC + "bcast"; - fOutputTensorNames.emplace_back(fNC2); if (!fIsDynamic) { model.AddIntermediateTensor(fNC2, model.GetTensorType(fNC), shapeY); } diff --git a/tmva/sofie/inc/TMVA/ROperator_LayerNormalization.hxx b/tmva/sofie/inc/TMVA/ROperator_LayerNormalization.hxx index 8d4c144c83e5b..f0e49670ef993 100644 --- a/tmva/sofie/inc/TMVA/ROperator_LayerNormalization.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_LayerNormalization.hxx @@ -136,7 +136,6 @@ public: if (isDynamic || lengthB < static_cast(std::stoi(fLength))) { fNBroadcastedB = "Broadcasted" + fNB; model.AddIntermediateTensor(fNBroadcastedB, ConvertStringToType(fType), fShapeX); - fOutputTensorNames.emplace_back(fNBroadcastedB); } } model.AddNeededStdLib("cmath"); diff --git a/tmva/sofie/inc/TMVA/ROperator_Where.hxx b/tmva/sofie/inc/TMVA/ROperator_Where.hxx index 3757831e28a49..0735e820ff1ed 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Where.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Where.hxx @@ -113,7 +113,6 @@ public: } else { // Add an intermediate tensor for broadcasting A model.AddIntermediateTensor(fNBroadcastedA, model.GetTensorType(fNA), fShapeY); - fOutputTensorNames.push_back(fNBroadcastedA); } } // Broadcast B to Y @@ -130,7 +129,6 @@ public: } else { // Add an intermediate tensor for broadcasting B model.AddIntermediateTensor(fNBroadcastedB, model.GetTensorType(fNB), fShapeY); - fOutputTensorNames.push_back(fNBroadcastedB); } } // Broadcast C to Y diff --git a/tmva/sofie/src/RModel.cxx b/tmva/sofie/src/RModel.cxx index 00320312c2026..2e2c5ef7cc15e 100644 --- a/tmva/sofie/src/RModel.cxx +++ b/tmva/sofie/src/RModel.cxx @@ -1417,7 +1417,6 @@ void RModel::HeadInitializedTensors(std::string name, int n_print) { void RModel::OutputGenerated(std::string filename, bool append) { RModel_Base::OutputGenerated(filename, append); - // write weights in a text file if (fUseWeightFile) { if (!filename.empty()) { From e32b7ca86e4c56e3c623ad714d6464a7fcd9c865 Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Sat, 7 Jun 2025 21:49:15 +0200 Subject: [PATCH 6/7] fix: avoid allocating memory again to tensors of fused ops --- tmva/sofie/inc/TMVA/RModel.hxx | 4 ++++ tmva/sofie/inc/TMVA/ROperator.hxx | 2 +- .../inc/TMVA/ROperator_BatchNormalization.hxx | 19 +++++++-------- tmva/sofie/inc/TMVA/ROperator_Conv.hxx | 12 ++++------ tmva/sofie/inc/TMVA/ROperator_Gemm.hxx | 9 ++++---- .../inc/TMVA/ROperator_LayerNormalization.hxx | 23 +++++++------------ tmva/sofie/inc/TMVA/ROperator_Relu.hxx | 12 ++++++---- tmva/sofie/src/RModel.cxx | 15 +++++------- 8 files changed, 43 insertions(+), 53 deletions(-) diff --git a/tmva/sofie/inc/TMVA/RModel.hxx b/tmva/sofie/inc/TMVA/RModel.hxx index e07ea21278488..8b521f2e6c03f 100644 --- a/tmva/sofie/inc/TMVA/RModel.hxx +++ b/tmva/sofie/inc/TMVA/RModel.hxx @@ -166,6 +166,10 @@ public: void SetOptimizationLevel(const OptimizationLevel &optim_level) { fOptimizationLevel = optim_level; } + void RemoveIntermediateTensor(const std::string& tensor_name){ + fIntermediateTensorInfos.erase(tensor_name); + } + protected: // internal functions // generate code for the initialized tensors diff --git a/tmva/sofie/inc/TMVA/ROperator.hxx b/tmva/sofie/inc/TMVA/ROperator.hxx index 8f0338eb5e3fc..1c166491bf8b4 100644 --- a/tmva/sofie/inc/TMVA/ROperator.hxx +++ b/tmva/sofie/inc/TMVA/ROperator.hxx @@ -60,7 +60,7 @@ public: virtual std::string GenerateSessionMembersCode(std::string /*opName*/) { return ""; } virtual std::string Header() { return "";} virtual std::string GetFusableOutputTensorName() { return "";} - virtual void UpdateFusableTensorName(std::string){ return;}; + virtual void UpdateFusableTensorName(std::string, const std::function& removal_func){ return;}; //virtual void Forward_reference() = 0; diff --git a/tmva/sofie/inc/TMVA/ROperator_BatchNormalization.hxx b/tmva/sofie/inc/TMVA/ROperator_BatchNormalization.hxx index 82c5015dd9751..a565ea5d314c3 100644 --- a/tmva/sofie/inc/TMVA/ROperator_BatchNormalization.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_BatchNormalization.hxx @@ -238,18 +238,15 @@ public: return fNY; } - void UpdateFusableTensorName(std::string fusable_tensor_name){ - fNX = UTILITY::Clean_name(fusable_tensor_name); - fNY = UTILITY::Clean_name(fusable_tensor_name); - fInputTensorNames = { fNX, fNScale }; - if (!fNB.empty()){ - fInputTensorNames.emplace_back(fNB); - } - - fOutputTensorNames = { fNY }; - std::cout<<"\ncalled from gemm"; + void UpdateFusableTensorName(std::string fusable_tensor_name, const std::function& removal_func){ + removal_func(fNX); + removal_func(fNY); + fNX = fusable_tensor_name; + fNY = fusable_tensor_name; + fInputTensorNames[0] = fNX; + fOutputTensorNames[0] = fNY; + } - } }; }//SOFIE diff --git a/tmva/sofie/inc/TMVA/ROperator_Conv.hxx b/tmva/sofie/inc/TMVA/ROperator_Conv.hxx index a85185567c060..ee9a24c159ea2 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Conv.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Conv.hxx @@ -574,15 +574,11 @@ public: std::string GetFusableOutputTensorName() override { return fNY; } - void UpdateFusableTensorName(std::string fusable_tensor_name) override { - std::cout<<"\ncalled from conv"; + void UpdateFusableTensorName(std::string fusable_tensor_name, const std::function& removal_func) override { + removal_func(fNY); fNY = fusable_tensor_name; - fOutputTensorNames = { fNY }; - convK = fNX +"_f"; - imcol = fNX +"_xcol"; - fOutputTensorNames.emplace_back(convK); - fOutputTensorNames.emplace_back(imcol); - } + fOutputTensorNames[0] = fNY; + } }; } // namespace SOFIE diff --git a/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx b/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx index 63967f2b503be..00e3a9158d6d8 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx @@ -408,11 +408,12 @@ namespace SOFIE{ return fNY; } - void UpdateFusableTensorName(std::string fusable_tensor_name){ - fNY = UTILITY::Clean_name(fusable_tensor_name); - fOutputTensorNames = { fNY }; - std::cout<<"\ncalled from gemm"; + void UpdateFusableTensorName(std::string fusable_tensor_name, const std::function& removal_func){ + removal_func(fNY); + fNY = fusable_tensor_name; + fOutputTensorNames[0] = fNY; } + }; diff --git a/tmva/sofie/inc/TMVA/ROperator_LayerNormalization.hxx b/tmva/sofie/inc/TMVA/ROperator_LayerNormalization.hxx index f0e49670ef993..d09db1a19979e 100644 --- a/tmva/sofie/inc/TMVA/ROperator_LayerNormalization.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_LayerNormalization.hxx @@ -342,22 +342,15 @@ public: return fNY; } - void UpdateFusableTensorName(std::string fusable_tensor_name){ - fNX = UTILITY::Clean_name(fusable_tensor_name); - fNY = UTILITY::Clean_name(fusable_tensor_name); - fInputTensorNames = { fNX, fNScale }; - if (!fNB.empty()){ - fInputTensorNames.emplace_back(fNB); - } - - fOutputTensorNames = { fNY }; - if (!fNMean.empty()){ - fOutputTensorNames.emplace_back(fNMean); - } - if (!fNInvStdDev.empty()){ - fOutputTensorNames.emplace_back(fNInvStdDev); - } + void UpdateFusableTensorName(std::string fusable_tensor_name, const std::function& removal_func){ + removal_func(fNX); + removal_func(fNY); + fNX = fusable_tensor_name; + fNY = fusable_tensor_name; + fInputTensorNames[0] = fNX; + fOutputTensorNames[0] = fNY; } + }; } // namespace SOFIE diff --git a/tmva/sofie/inc/TMVA/ROperator_Relu.hxx b/tmva/sofie/inc/TMVA/ROperator_Relu.hxx index 6825d31e9c62c..dea69818e978e 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Relu.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Relu.hxx @@ -72,11 +72,13 @@ public: return fNY; } - void UpdateFusableTensorName(std::string fusable_tensor_name){ - fNX = fusable_tensor_name; - fNY = fusable_tensor_name; - fInputTensorNames = { fNX }; - fOutputTensorNames = { fNY }; + void UpdateFusableTensorName(std::string fusable_tensor_name, const std::function& removal_func){ + removal_func(fNX); + removal_func(fNY); + fNX = fusable_tensor_name; + fNY = fusable_tensor_name; + fInputTensorNames[0] = fNX; + fOutputTensorNames[0] = fNY; } }; diff --git a/tmva/sofie/src/RModel.cxx b/tmva/sofie/src/RModel.cxx index 2e2c5ef7cc15e..11a49f729b580 100644 --- a/tmva/sofie/src/RModel.cxx +++ b/tmva/sofie/src/RModel.cxx @@ -438,7 +438,6 @@ void RModel::CheckAndFuseOperators() { std::vector fusable_indices; std::string fusable_propagate_tensor_name; while (idx < fOperators.size()) { - std::cout<<"\nop currently: "<GetOpKind()); if (fOperators[idx]->GetOpKind() != OperatorKind::GEMM && fOperators[idx]->GetOpKind() != OperatorKind::CONV) { ++idx; continue; @@ -451,13 +450,11 @@ void RModel::CheckAndFuseOperators() { size_t j = idx + 1; for (; j < fOperators.size()-1; ++j) { auto opKind = fOperators[j]->GetOpKind(); - std::cout<<"\nchecking for fusion: "<GetFusableOutputTensorName(); auto freqIt = fIntermediateTensorFrequencyLookup.find(tensorName); @@ -472,21 +469,21 @@ void RModel::CheckAndFuseOperators() { break; } } - // std::cout<<"\nstart fusing: "<GetOpKind())<<" , with: "<UpdateFusableTensorName(fusable_propagate_tensor_name); + fOperators[index]->UpdateFusableTensorName(fusable_propagate_tensor_name, [this](const std::string& name) { + this->RemoveIntermediateTensor(name); + }); } + AddIntermediateTensor(fusable_propagate_tensor_name, fusable_tensor_type, fusable_tensor_shape); } idx = std::max(idx + 1, j); } } - - void RModel::Initialize(int batchSize, bool verbose) { std::map inputParams; if (batchSize > 0) { From fa5008b01d207d024e14fc07a8c5b29d123f9bf9 Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Thu, 7 Aug 2025 18:41:19 +0200 Subject: [PATCH 7/7] fix: remove debug comments --- tmva/sofie/src/RModel.cxx | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tmva/sofie/src/RModel.cxx b/tmva/sofie/src/RModel.cxx index 11a49f729b580..56dd6b336e37d 100644 --- a/tmva/sofie/src/RModel.cxx +++ b/tmva/sofie/src/RModel.cxx @@ -452,7 +452,6 @@ void RModel::CheckAndFuseOperators() { auto opKind = fOperators[j]->GetOpKind(); // Only consider operators with fusable kinds if (!FusableKinds.count(opKind)) { - // std::cout<<"\n op not fusable: "<second != fOperators[j + 1]->GetOpOrder() || FusableKinds.count(fOperators[j + 1]->GetOpKind()) == 0)) { - // std::cout << "\nBreaking here, second: " << freqIt->second << ", idx: " << fOperators[j+1]->GetOpOrder(); fusable_propagate_tensor_name = tensorName; break; } @@ -591,9 +589,6 @@ void RModel::Initialize(const std::map & inputParams, bool } fOperators[op_idx]->Initialize(*this); - // if(fOperators[op_idx]->GetOpName().length()==0){ - // std::cout<<"\nempty name for op, typeid: "<GetOpOutputTensors()) { std::string name{it}; if (fIntermediateTensorFrequencyLookup.find(it) == fIntermediateTensorFrequencyLookup.end() && @@ -920,7 +915,6 @@ void RModel::GenerateSessionCode() std::string intermediate_memory_alloc_string = ""; intermediate_memory_alloc_string += "\n// --- Positioning intermediate tensor memory --"; for (size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx) { - // std::cout<GetOpKind()); intermediate_memory_alloc_string += AllocateIntermediateMemory(fOperators[op_idx]->GetOpOutputTensors(), allocated_tensors); CheckAndFlushIntermediateMemory(fOperators[op_idx]->GetOpInputTensors(), op_idx); }