Skip to content

Commit c2f5fdf

Browse files
committed
Add profiler code
1 parent 75df976 commit c2f5fdf

File tree

1 file changed

+9
-0
lines changed

1 file changed

+9
-0
lines changed

scripts/cpu/gen-dense-cpu-ops.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,8 @@ class AtenIpexCPUDefault {{
140140
#include <ATen/CPUGenerator.h>
141141
#include <c10/util/Exception.h>
142142
#include <c10/util/Logging.h>
143+
#include <torch/csrc/autograd/function.h>
144+
#include <torch/csrc/autograd/record_function.h>
143145
144146
#include "aten_ipex_bridge.h"
145147
#include "utils.h"
@@ -515,9 +517,16 @@ def is_conv_overrideable_func(fname):
515517
# Gen definition code for cpp file
516518
code = '{} {{\n'.format(cpp_func_str_cpp)
517519

520+
# Gen profile info
518521
code += '#if defined(_DEBUG)\n'
519522
code += ' printf("{}::{}\\n");\n'.format(_IPEX_OP_FUNC_NS, cpp_sig.def_name)
520523
code += '#endif\n'
524+
profiler_inputs = []
525+
for param in cpp_sig.input_params:
526+
if param.core_type in ['Tensor', 'Scalar']:
527+
profiler_inputs.append(param.name)
528+
code += ' RECORD_FUNCTION("{ns}::{name}", std::vector<c10::IValue>({{{input_names}}}), torch::autograd::Node::peek_at_next_sequence_nr());\n'.format(ns=_IPEX_OP_FUNC_NS, name=cpp_sig.def_name, input_names=', '.join(profiler_inputs))
529+
521530

522531
if is_conv_overrideable_func(cpp_sig.def_name):
523532
code += ' return AtenIpexCPUDev::dil_{}({});\n'.format(cpp_sig.def_name, ', '.join([param.name for param in cpp_sig.input_params]))

0 commit comments

Comments
 (0)