Skip to content

Commit 77ea624

Browse files
mcr229facebook-github-bot
authored andcommitted
Initial print profiling for xnnpack ops (#227)
Summary: Pull Request resolved: #227 Currently debugging the mv2 q8 vs mv2 fp32. (there is not much difference in performance). Adding some general utils for grabbing per-op profiling data from XNNPACK. Private fns that interact with XNNPACK runtime to grab op info: ``` Error get_runtime_operator_names(std::vector<char>& operator_names); Error get_runtime_num_operators(size_t& num_operators); Error get_runtime_operator_timings(std::vector<uint64_t>& timing_stats); ``` these get operator names, number of operators, and per-operator timings Delegate Public fns to initialize, log, and print profiling data ``` Error init_profiler() Error log_op_timings() Error print_avg_op_timings() ``` Reviewed By: digantdesai Differential Revision: D48992397 fbshipit-source-id: a89a3a323ada1e8ab5ea4f2f028cf5d9355dc5a8
1 parent 060f4b3 commit 77ea624

File tree

5 files changed

+123
-2
lines changed

5 files changed

+123
-2
lines changed

backends/xnnpack/runtime/XNNCompiler.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1485,12 +1485,17 @@ __ET_NODISCARD Error XNNCompiler::compileModel(
14851485
return err;
14861486
}
14871487
}
1488+
uint32_t runtime_flags = 0;
1489+
1490+
#ifdef ENABLE_XNNPACK_PROFILING
1491+
runtime_flags |= XNN_FLAG_BASIC_PROFILING;
1492+
#endif
14881493

14891494
xnn_runtime_t runtime_ptr = nullptr;
14901495
status = xnn_create_runtime_v2(
14911496
subgraph.get(),
14921497
torch::executorch::threadpool::get_pthreadpool(),
1493-
0,
1498+
runtime_flags,
14941499
&runtime_ptr);
14951500
ET_CHECK_OR_RETURN_ERROR(
14961501
xnn_status_success == status,
@@ -1502,6 +1507,10 @@ __ET_NODISCARD Error XNNCompiler::compileModel(
15021507
std::unique_ptr<xnn_runtime, decltype(&xnn_delete_runtime)>(
15031508
runtime_ptr, xnn_delete_runtime);
15041509

1510+
#ifdef ENABLE_XNNPACK_PROFILING
1511+
executor->init_profiler();
1512+
#endif
1513+
15051514
for (auto old_id : *flatbuffer_graph->input_ids()) {
15061515
executor->input_ids_.emplace_back(remapped_ids.at(old_id));
15071516
}

backends/xnnpack/runtime/XNNExecutor.cpp

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,91 @@ Error XNNExecutor::set_external_input(uint32_t id, Tensor* input) {
7373
return Error::Ok;
7474
}
7575

76+
inline void XNNExecutor::get_runtime_operator_names(
77+
std::vector<char>& operator_names) {
78+
size_t required_size = 0;
79+
// First call returns xnn_status_out_of_memory, but sets required_size to
80+
// the correct size of the buffer to store the result
81+
xnn_status status = xnn_get_runtime_profiling_info(
82+
runtime_.get(), // runtime
83+
xnn_profile_info_operator_name, // param_name
84+
0, // param_value_size
85+
nullptr, // param_value
86+
&required_size // param_value_size_ret
87+
);
88+
89+
if (status == xnn_status_out_of_memory) {
90+
operator_names.resize(required_size);
91+
status = xnn_get_runtime_profiling_info(
92+
runtime_.get(),
93+
xnn_profile_info_operator_name,
94+
operator_names.size(),
95+
operator_names.data(),
96+
&required_size);
97+
}
98+
if (status != xnn_status_success) {
99+
ET_LOG(Error, "Failed to get XNNPACK Operator Timings");
100+
}
101+
}
102+
103+
inline void XNNExecutor::get_runtime_num_operators(size_t& num_operators) {
104+
size_t required_size = 0;
105+
xnn_status status = xnn_get_runtime_profiling_info(
106+
runtime_.get(),
107+
xnn_profile_info_num_operators,
108+
sizeof(num_operators),
109+
&num_operators,
110+
&required_size);
111+
if (status != xnn_status_success) {
112+
ET_LOG(Error, "Failed to get XNNPACK Operator Timings");
113+
}
114+
}
115+
116+
inline void XNNExecutor::get_runtime_operator_timings(
117+
std::vector<uint64_t>& timing_stats) {
118+
size_t required_size;
119+
// Get number of runtime operators for timing_stats.size
120+
timing_stats.resize(num_ops_);
121+
xnn_status status = xnn_get_runtime_profiling_info(
122+
runtime_.get(),
123+
xnn_profile_info_operator_timing,
124+
timing_stats.size() * sizeof(uint64_t),
125+
timing_stats.data(),
126+
&required_size);
127+
if (status != xnn_status_success) {
128+
ET_LOG(Error, "Failed to get XNNPACK Operator Timings");
129+
}
130+
}
131+
132+
void XNNExecutor::init_profiler() {
133+
get_runtime_operator_names(op_names_);
134+
get_runtime_num_operators(num_ops_);
135+
}
136+
137+
void XNNExecutor::log_op_timings() {
138+
std::vector<uint64_t> op_stats;
139+
get_runtime_operator_timings(op_stats);
140+
op_timings_.emplace_back(op_stats);
141+
}
142+
143+
void XNNExecutor::print_avg_op_timings() {
144+
size_t num_iterations = op_timings_.size();
145+
size_t name_len = 0;
146+
const char* op_name = nullptr;
147+
float avg_total = 0;
148+
for (size_t xnn_node_idx = 0; xnn_node_idx < num_ops_; xnn_node_idx++) {
149+
op_name = &op_names_[name_len];
150+
name_len += strlen(op_name) + 1;
151+
float total_op_time = 0;
152+
for (size_t it = 0; it < num_iterations; it++) {
153+
total_op_time += op_timings_[it][xnn_node_idx];
154+
}
155+
float avg_op_time = total_op_time / static_cast<float>(num_iterations);
156+
ET_LOG(Info, ">>, %s, %f", op_name, avg_op_time);
157+
avg_total += avg_op_time;
158+
}
159+
ET_LOG(Info, ">>, Total Time, %f", avg_total);
160+
}
76161
} // namespace delegate
77162
} // namespace xnnpack
78163
} // namespace executor

backends/xnnpack/runtime/XNNExecutor.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,28 @@ class XNNExecutor {
3838

3939
Error set_external_input(uint32_t id, Tensor* input);
4040

41+
// XNNPACK Profiling
42+
// Used to hold profiling data
43+
// * To hold op names and duration (in usec) for each operator execution
44+
// * Both indexed with xnn_node_idx (0.. node_id)
45+
using microsecond_t = uint64_t;
46+
size_t num_ops_;
47+
std::vector<char> op_names_;
48+
// op_timings[i][j] represents the runtime of operator j on the ith run
49+
std::vector<std::vector<microsecond_t>> op_timings_;
50+
51+
void get_runtime_operator_names(std::vector<char>& operator_names);
52+
void get_runtime_num_operators(size_t& num_operators);
53+
void get_runtime_operator_timings(std::vector<uint64_t>& timing_stats);
54+
4155
public:
4256
XNNExecutor() = default;
4357

58+
// XNNPACK Profiling public fn
59+
void init_profiler();
60+
void log_op_timings();
61+
void print_avg_op_timings();
62+
4463
inline void append_arg(uint32_t id) {
4564
external_id_args_.push_back(id);
4665
// Insertion order is not guaranteed here.

backends/xnnpack/runtime/XNNPACKBackend.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,9 @@ class XnnpackBackend final : public PyTorchBackendInterface {
8888
}
8989

9090
err = executor->forward();
91+
#ifdef ENABLE_XNNPACK_PROFILING
92+
executor->log_op_timings(); // Log the op execution time.
93+
#endif
9194

9295
for (int i = executor->getNumInputs();
9396
i < executor->getNumInputs() + executor->getNumOutputs();
@@ -113,6 +116,9 @@ class XnnpackBackend final : public PyTorchBackendInterface {
113116
void destroy(DelegateHandle* handle) const override {
114117
if (handle != nullptr) {
115118
auto executor = static_cast<xnnpack::delegate::XNNExecutor*>(handle);
119+
#ifdef ENABLE_XNNPACK_PROFILING
120+
executor->print_avg_op_timings();
121+
#endif
116122
// XNNExecutor is not trivially destructible. Since this was constructed
117123
// manually in init(), we must destroy it manually here.
118124
executor->~XNNExecutor();

backends/xnnpack/targets.bzl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,9 @@ def define_common_targets():
6565
"//executorch/extension/pybindings/...",
6666
"@EXECUTORCH_CLIENTS",
6767
],
68-
preprocessor_flags = [] if runtime.is_oss else ["-DENABLE_DYNAMIC_QUANTIZATION"],
68+
preprocessor_flags = [
69+
# "-DENABLE_XNNPACK_PROFILING",
70+
] + ([] if runtime.is_oss else ["-DENABLE_DYNAMIC_QUANTIZATION"]),
6971
deps = [
7072
third_party_dep("XNNPACK"),
7173
":xnnpack_schema",

0 commit comments

Comments
 (0)