Skip to content

Commit 8639e94

Browse files
Olialmoneta
authored andcommitted
Time Profiler for Sofie
1 parent e520baf commit 8639e94

File tree

9 files changed

+255
-23
lines changed

9 files changed

+255
-23
lines changed

tmva/sofie/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ ROOT_STANDARD_LIBRARY_PACKAGE(ROOTTMVASofie
2222
TMVA/OperatorList.hxx
2323
TMVA/RModel_Base.hxx
2424
TMVA/RModel.hxx
25+
TMVA/RModelProfiler.hxx
2526
TMVA/ROperator.hxx
2627
TMVA/ROperator_BasicUnary.hxx
2728
TMVA/ROperator_BasicBinary.hxx
@@ -77,6 +78,7 @@ ROOT_STANDARD_LIBRARY_PACKAGE(ROOTTMVASofie
7778
SOURCES
7879
src/RModel_Base.cxx
7980
src/RModel.cxx
81+
src/RModelProfiler.cxx
8082
src/RModel_GNN.cxx
8183
src/RModel_GraphIndependent.cxx
8284
src/RFunction.cxx

tmva/sofie/inc/TMVA/RModel.hxx

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,23 @@ namespace SOFIE {
1111

1212
class RModel final : public RModel_Base {
1313

14+
friend class RModelProfiler;
15+
1416
private:
1517
bool fIsInitialized = false;
1618
bool fIsSubGraph = false;
19+
bool fProfile = false;
20+
1721
int fVerbose = 0;
1822
int fBatchSize = -1;
1923
long fReadPos = 0; // reading file position
24+
2025
size_t fConstantTensorSize = 0; // size (in Bytes) of the allocated constant tensors
2126
size_t fWeightsTensorSize = 0; // size (in Bytes) of the allocated weight tensors
2227
size_t fOtherTensorSize = 0; // size (in Bytes) of intermediate tensors which are not managed by the memory pool
2328

29+
std::string fProfilerGC = "";
30+
2431
OptimizationLevel fOptimizationLevel = OptimizationLevel::kExtended;
2532

2633
std::unordered_map<std::string, InputTensorInfo> fInputTensorInfos; // input tensors where shape may not fully defined or other graph inputs?
@@ -152,7 +159,7 @@ public:
152159
void Initialize(int batchSize = -1, bool verbose = false);
153160
void Initialize(const std::map<std::string,size_t> & inputParams, bool verbose = false);
154161

155-
void Generate(std::underlying_type_t<Options> options, int batchSize = -1, long pos = 0, bool verbose = false);
162+
void Generate(std::underlying_type_t<Options> options, int batchSize = -1, long pos = 0, bool verbose = false);
156163
void Generate(Options options = Options::kDefault, int batchSize = -1, int pos = 0, bool verbose = false)
157164
{
158165
Generate(static_cast<std::underlying_type_t<Options>>(options), batchSize, pos, verbose);
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#ifndef TMVA_SOFIE_RMODELPROFILER
2+
#define TMVA_SOFIE_RMODELPROFILER
3+
4+
#include "TMVA/RModel.hxx"
5+
6+
namespace TMVA {
7+
namespace Experimental {
8+
namespace SOFIE {
9+
10+
/// \class RModelProfiler
11+
/// \brief A helper class to generate profiled inference code for an RModel.
12+
///
13+
/// This class instruments the generated C++ code to measure the execution
14+
/// time of each operator. It is invoked when the RModel::Generate is called
15+
/// with the Options::kProfile flag.
16+
class RModelProfiler {
17+
private:
18+
RModel &fModel;
19+
20+
void GenerateUtilityFunctions();
21+
22+
public:
23+
// The profiler must be constructed with a model to work on.
24+
RModelProfiler() = delete;
25+
RModelProfiler(RModel &model);
26+
~RModelProfiler() = default;
27+
28+
// There is no point in copying or moving an RModelProfiler
29+
RModelProfiler(const RModelProfiler &other) = delete;
30+
RModelProfiler(RModelProfiler &&other) = delete;
31+
RModelProfiler &operator=(const RModelProfiler &other) = delete;
32+
RModelProfiler &operator=(RModelProfiler &&other) = delete;
33+
34+
// Main function to generate the profiled code.
35+
void Generate();
36+
};
37+
38+
} // namespace SOFIE
39+
} // namespace Experimental
40+
} // namespace TMVA
41+
42+
#endif // TMVA_SOFIE_RMODELPROFILER

tmva/sofie/inc/TMVA/RModel_Base.hxx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ enum class Options {
2626
kRootBinaryWeightFile = 0x4,
2727
kGNN = 0x8,
2828
kGNNComponent = 0x10,
29+
kProfile = 0x20,
2930
};
3031

3132
// Optimization levels inspired by ONNXRuntime.

tmva/sofie/inc/TMVA/ROperator.hxx

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@ public:
3737
//virtual void Forward_blas() = 0;
3838
virtual ~ROperator(){}
3939

40+
std::string name = "UnnamedOperator";
41+
const std::string &GetOperatorName() { return name; };
42+
4043
protected:
4144

4245
const std::string SP = " "; ///< space used to correctly indent the generated C++ code

tmva/sofie/src/RModel.cxx

Lines changed: 29 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#endif
1010

1111
#include "TMVA/RModel.hxx"
12+
#include "TMVA/RModelProfiler.hxx"
1213
#include "TMVA/SOFIE_common.hxx"
1314

1415
namespace TMVA {
@@ -941,7 +942,7 @@ void RModel::GenerateSessionCode()
941942
CheckAndFlushIntermediateMemory(fOperators[op_idx]->GetOpInputTensors(), op_idx);
942943
}
943944

944-
// to check remaining unused fragments after memory allocation (lesser the better)
945+
// to check remaining unused fragments after memory allocation (lesser the better)
945946
// for (const auto &it: fIntermediateMemoryInfo.available_stack){
946947
// std::cout<<"chunk_idx: "<<it.first<<", chunk_size: "<<it.second<<"\n";
947948
// }
@@ -969,13 +970,13 @@ void RModel::GenerateSessionCode()
969970
// Generate code for Session constructor
970971
if (fUseSession) {
971972
std::string sessionName = "Session";
972-
if (fIsSubGraph)
973+
if (fIsSubGraph)
973974
sessionName += "_" + fName;
974975
// add here specific operator code that needs to define session data members
975976
fGC += "\n";
976977
for (size_t id = 0; id < fOperators.size(); id++) {
977978
std::string opName = std::to_string(id);
978-
fGC += fOperators[id]->GenerateSessionMembersCode(opName);
979+
fGC += fOperators[id]->GenerateSessionMembersCode(opName);
979980
}
980981
fGC += "\n";
981982
// here add initialization and reading of weight tensors
@@ -1021,23 +1022,28 @@ void RModel::GenerateSessionCode()
10211022
fGC += "}\n\n";
10221023
}
10231024

1024-
fGC += doInferSignature + "{\n";
1025-
fGC += "\n";
1025+
if (fProfile) {
1026+
RModelProfiler profiler(*this);
1027+
profiler.Generate();
1028+
fGC += fProfilerGC;
1029+
} else {
1030+
fGC += doInferSignature + "{\n";
1031+
fGC += "\n";
10261032

1027-
// generate the inference code
1028-
if (fVerbose)
1029-
std::cout << "Generating main inference code for " << fName << std::endl;
1033+
// generate the inference code
1034+
if (fVerbose)
1035+
std::cout << "Generating main inference code for " << fName << std::endl;
10301036

1031-
if (fOutputTensorNames.size() == 0)
1032-
throw std::runtime_error("TMVA-SOFIE: output size=0 are not supported");
1037+
if (fOutputTensorNames.size() == 0)
1038+
throw std::runtime_error("TMVA-SOFIE: output size=0 are not supported");
10331039

1034-
for (size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx) {
1035-
if (fVerbose)
1040+
for (size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx) {
1041+
if (fVerbose)
10361042
std::cout << "Generating code for operator .... " << op_idx << std::endl;
1037-
fGC += (fOperators[op_idx]->Generate(std::to_string(op_idx)));
1038-
}
1043+
fGC += (fOperators[op_idx]->Generate(std::to_string(op_idx)));
1044+
}
10391045

1040-
fGC += SP + "using TMVA::Experimental::SOFIE::UTILITY::FillOutput;\n\n";
1046+
fGC += SP + "using TMVA::Experimental::SOFIE::UTILITY::FillOutput;\n\n";
10411047

10421048
for (std::string const &name : fOutputTensorNames) {
10431049
// need to check is size is the same (don't want to return a vector with
@@ -1048,7 +1054,8 @@ void RModel::GenerateSessionCode()
10481054
fGC += SP + "FillOutput(tensor_" + name + ", output_tensor_" + name + ", " + n + ");\n";
10491055
}
10501056

1051-
fGC += "}\n\n";
1057+
fGC += "}\n\n";
1058+
}
10521059

10531060
// generate the inference overload that returns an output struct
10541061
GenerateOutput();
@@ -1061,9 +1068,11 @@ void RModel::GenerateSessionCode()
10611068

10621069
void RModel::Generate(std::underlying_type_t<Options> options, int batchSize, long pos, bool verbose)
10631070
{
1071+
bool profile = (options & static_cast<std::underlying_type_t<Options>>(Options::kProfile));
10641072
fVerbose = verbose;
10651073
fBatchSize = batchSize;
10661074
fReadPos = pos;
1075+
fProfile = profile;
10671076

10681077
// session flag is used in operator initialize
10691078
if (static_cast<std::underlying_type_t<Options>>(Options::kNoSession) & options) {
@@ -1083,9 +1092,9 @@ void RModel::Generate(std::underlying_type_t<Options> options, int batchSize, lo
10831092
"TMVA-SOFIE: RModel::Generate: cannot use a separate weight file without generating a Session class");
10841093
}
10851094

1086-
if (static_cast<std::underlying_type_t<Options>>(Options::kGNN) & options)
1095+
if (static_cast<std::underlying_type_t<Options>>(Options::kGNN) & options)
10871096
fIsGNN = true;
1088-
if (static_cast<std::underlying_type_t<Options>>(Options::kGNNComponent) & options)
1097+
if (static_cast<std::underlying_type_t<Options>>(Options::kGNNComponent) & options)
10891098
fIsGNNComponent = true;
10901099

10911100
// initialize the model including all operators and sub-graphs
@@ -1099,13 +1108,13 @@ void RModel::Generate(std::underlying_type_t<Options> options, int batchSize, lo
10991108

11001109
// generate first code for the subgraphs
11011110
for (auto &graph : fSubGraphs) {
1102-
if (fVerbose)
1111+
if (fVerbose)
11031112
std::cout << "generate session code for subgraph " << graph->fName << std::endl;
11041113
graph->GenerateSessionCode();
11051114
fGC += graph->fGC;
11061115
}
11071116

1108-
if (fVerbose)
1117+
if (fVerbose)
11091118
std::cout << "generate Main session code - model " << fName << std::endl;
11101119

11111120
// generate main session code

tmva/sofie/src/RModelProfiler.cxx

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
#include "TMVA/RModelProfiler.hxx"
2+
#include "TMVA/SOFIE_common.hxx"
3+
4+
namespace TMVA {
5+
namespace Experimental {
6+
namespace SOFIE {
7+
8+
// The constructor now just registers the necessary C++ libraries.
9+
RModelProfiler::RModelProfiler(RModel &model) : fModel(model)
10+
{
11+
fModel.AddNeededStdLib("chrono"); // for timing operators
12+
fModel.AddNeededStdLib("vector"); // for storing profiling results
13+
fModel.AddNeededStdLib("string"); // for operator names
14+
fModel.AddNeededStdLib("map"); // for the results map
15+
fModel.AddNeededStdLib("iostream"); // for printing results
16+
fModel.AddNeededStdLib("iomanip"); // for printing results
17+
}
18+
19+
// This function generates the helper functions inside the Session struct.
20+
void RModelProfiler::GenerateUtilityFunctions()
21+
{
22+
auto &gc = fModel.fProfilerGC;
23+
24+
// Generate PrintProfilingResults function
25+
gc += " void PrintProfilingResults() const {\n";
26+
gc += " if (fProfilingResults.empty()) {\n";
27+
gc += " std::cout << \"No profiling results to display.\" << std::endl;\n";
28+
gc += " return;\n";
29+
gc += " }\n";
30+
gc += "\n";
31+
gc += " std::cout << \"\\n\" << std::string(50, '=') << std::endl;\n";
32+
gc += " std::cout << \" AVERAGE PROFILING RESULTS\" << std::endl;\n";
33+
gc += " std::cout << std::string(50, '=') << std::endl;\n";
34+
gc += " for (const auto& op : fProfilingResults) {\n";
35+
gc += " double sum = 0.0;\n";
36+
gc += " for (double time : op.second) {\n";
37+
gc += " sum += time;\n";
38+
gc += " }\n";
39+
gc += " double average = sum / op.second.size();\n";
40+
gc += " std::cout << \" \" << std::left << std::setw(20) << op.first\n";
41+
gc += " << \": \" << std::fixed << std::setprecision(6) << average << \" us\"\n";
42+
gc += " << \" (over \" << op.second.size() << \" runs)\" << std::endl;\n";
43+
gc += " }\n";
44+
gc += " std::cout << std::string(50, '=') << \"\\n\" << std::endl;\n";
45+
gc += " }\n";
46+
gc += "\n";
47+
48+
// Generate ResetProfilingResults function
49+
gc += " void ResetProfilingResults() {\n";
50+
gc += " fProfilingResults.clear();\n";
51+
gc += " }\n";
52+
gc += "\n";
53+
54+
// Generate GetOpAvgTime function
55+
gc += " std::map<std::string, double> GetOpAvgTime() const {\n";
56+
gc += " if (fProfilingResults.empty()) {\n";
57+
gc += " return {};\n";
58+
gc += " }\n";
59+
gc += "\n";
60+
gc += " std::map<std::string, double> avg;\n";
61+
gc += " for (const auto& op : fProfilingResults) {\n";
62+
gc += " double mean = 0.0;\n";
63+
gc += " for (double time : op.second) {\n";
64+
gc += " mean += time;\n";
65+
gc += " }\n";
66+
gc += " mean /= op.second.size();\n";
67+
gc += " avg[op.first] = mean;\n";
68+
gc += " }\n";
69+
gc += "\n";
70+
gc += " return avg;\n";
71+
gc += " }\n";
72+
gc += "\n";
73+
74+
// Generate GetOpVariance function
75+
gc += " std::map<std::string, double> GetOpVariance() const {\n";
76+
gc += " if (fProfilingResults.empty()) {\n";
77+
gc += " return {};\n";
78+
gc += " }\n";
79+
gc += "\n";
80+
gc += " std::map<std::string, double> variance;\n";
81+
gc += " for (const auto& op : fProfilingResults) {\n";
82+
gc += " // Var[X] = E[X^2] - E[X]^2\n";
83+
gc += " double mean = 0.0, mean2 = 0.0;\n";
84+
gc += " for (double time : op.second) {\n";
85+
gc += " mean += time;\n";
86+
gc += " mean2 += time * time;\n";
87+
gc += " }\n";
88+
gc += " mean /= op.second.size();\n";
89+
gc += " mean2 /= op.second.size();\n";
90+
gc += " variance[op.first] = mean2 - mean * mean;\n";
91+
gc += " }\n";
92+
gc += "\n";
93+
gc += " return variance;\n";
94+
gc += " }\n";
95+
}
96+
97+
// Main generation function for the profiler.
98+
void RModelProfiler::Generate()
99+
{
100+
// Clear the profiler's code string to start fresh.
101+
fModel.fProfilerGC.clear();
102+
auto &gc = fModel.fProfilerGC;
103+
104+
// 1. Add the data member to the Session struct to store results.
105+
gc += "public:\n";
106+
gc += " // Maps an operator name to a vector of its execution times (in microseconds).\n";
107+
gc += " std::map<std::string, std::vector<double>> fProfilingResults;\n\n";
108+
109+
// 2. Generate and add the utility functions like PrintProfilingResults.
110+
GenerateUtilityFunctions();
111+
112+
// 3. Generate the signature for the profiled doInfer method.
113+
std::string doInferSignature = fModel.GenerateInferSignature();
114+
if (!doInferSignature.empty()) doInferSignature += ", ";
115+
for (auto const &name : fModel.GetOutputTensorNames()) {
116+
doInferSignature += " std::vector<" + ConvertTypeToString(fModel.GetTensorType(name)) + "> &output_tensor_" + name + ",";
117+
}
118+
if (!fModel.GetOutputTensorNames().empty()) {
119+
doInferSignature.back() = ' ';
120+
}
121+
gc += "void doInfer(" + doInferSignature + ") {\n";
122+
123+
// 4. Generate the body of the doInfer method with timing instrumentation.
124+
gc += " // Timer variable for profiling\n";
125+
gc += " std::chrono::steady_clock::time_point tp_start, tp_overall_start;\n\n";
126+
gc += " tp_overall_start = std::chrono::steady_clock::now();\n\n";
127+
128+
for (size_t op_idx = 0; op_idx < fModel.fOperators.size(); ++op_idx) {
129+
const auto& op = fModel.fOperators[op_idx];
130+
gc += " // -- Profiling for operator " + op->name + " --\n";
131+
gc += " tp_start = std::chrono::steady_clock::now();\n\n";
132+
133+
// Add the actual operator inference code
134+
gc += op->Generate(std::to_string(op_idx));
135+
136+
// Add the code to stop the timer and store the result
137+
gc += "\n fProfilingResults[\"" + op->name + "\"].push_back(\n";
138+
gc += " std::chrono::duration_cast<std::chrono::duration<double, std::micro>>(\n";
139+
gc += " std::chrono::steady_clock::now() - tp_start).count());\n\n";
140+
}
141+
142+
// 5. Generate the code to fill the output tensors.
143+
gc += " using TMVA::Experimental::SOFIE::UTILITY::FillOutput;\n\n";
144+
for (std::string const &name : fModel.GetOutputTensorNames()) {
145+
bool isIntermediate = fModel.fIntermediateTensorInfos.count(name) > 0;
146+
std::string n = isIntermediate ? std::to_string(ConvertShapeToLength(fModel.GetTensorShape(name)))
147+
: ConvertDynamicShapeToLength(fModel.GetDynamicTensorShape(name));
148+
gc += " FillOutput(tensor_" + name + ", output_tensor_" + name + ", " + n + ");\n";
149+
}
150+
151+
gc += "\n // -- Record overall inference time --\n";
152+
gc += " fProfilingResults[\"Overall_Time\"].push_back(\n";
153+
gc += " std::chrono::duration_cast<std::chrono::duration<double, std::micro>>(\n";
154+
gc += " std::chrono::steady_clock::now() - tp_overall_start).count());\n";
155+
156+
gc += "}\n\n"; // End of doInfer function
157+
}
158+
159+
} // namespace SOFIE
160+
} // namespace Experimental
161+
} // namespace TMVA

0 commit comments

Comments
 (0)