Skip to content

Commit 015c6d6

Browse files
anijain2305pytorchmergebot
authored andcommitted
[dynamo][guards] Turn on profiling of guard manager (pytorch#145420)
Pull Request resolved: pytorch#145420 Approved by: https://github.com/ezyang ghstack dependencies: pytorch#145351
1 parent fef92c9 commit 015c6d6

File tree

3 files changed

+16
-22
lines changed

3 files changed

+16
-22
lines changed

torch/_C/_dynamo/guards.pyi

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ def install_symbolic_shape_guard(
158158
def profile_guard_manager(
159159
guard_manager: GuardManager,
160160
f_locals: dict[str, Any],
161+
n_iters: int,
161162
) -> float: ...
162163

163164
class TensorGuards:

torch/_dynamo/guards.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2472,11 +2472,14 @@ def cleanup_builder(weak_b):
24722472
self.guard_manager, output_graph.local_scope
24732473
)
24742474

2475-
if guards_log.isEnabledFor(logging.DEBUG):
2476-
latency = profile_guard_manager(
2477-
self.guard_manager.root, output_graph.local_scope
2478-
)
2479-
guards_log.debug("Guard eval latency = %s us", f"{latency:.2f}")
2475+
# NB for developers: n_iters is chosen to be 50 to achieve
2476+
# statistical significance. If you are working on a guard
2477+
# optimization, it might be a good idea to increase this number for
2478+
# more stabiilty during development.
2479+
latency = profile_guard_manager(
2480+
self.guard_manager.root, output_graph.local_scope, 50
2481+
)
2482+
guards_log.debug("Guard eval latency = %s us", f"{latency:.2f}")
24802483

24812484
# NB - We have to very careful of cleaning up here. Because of the
24822485
# invalidate function, we can create a weakref finalizer that keeps

torch/csrc/dynamo/guards.cpp

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5060,36 +5060,26 @@ void install_storage_overlapping_guard(
50605060
/* overlapping= */ false);
50615061
}
50625062

5063-
double profile_guard_manager(RootGuardManager* root, py::object f_locals) {
5063+
double profile_guard_manager(
5064+
RootGuardManager* root,
5065+
py::object f_locals,
5066+
int n_iters) {
50645067
PyObject* locals = f_locals.ptr();
50655068

50665069
// Warmup
5067-
for (int i = 0; i < 10; i++) {
5070+
for (int i = 0; i < 5; i++) {
50685071
root->check_nopybind(locals);
50695072
}
50705073

5071-
int count = 0;
50725074
auto start = std::chrono::high_resolution_clock::now();
5073-
float profile_duration = 1.0;
5074-
5075-
// Run the loop for profile_duration seconds
5076-
while (true) {
5075+
for (int i = 0; i < n_iters; i++) {
50775076
root->check_nopybind(locals);
5078-
count++;
5079-
auto end = std::chrono::high_resolution_clock::now();
5080-
std::chrono::duration<double> elapsed = end - start;
5081-
5082-
// Break the loop if 1 second has passed
5083-
if (elapsed.count() >= 1.0) {
5084-
break;
5085-
}
50865077
}
5087-
50885078
auto end = std::chrono::high_resolution_clock::now();
50895079
std::chrono::duration<double> total_elapsed = end - start;
50905080

50915081
// Calculate the average time per iteration in microseconds
5092-
return (total_elapsed.count() * profile_duration * 1e6) / count;
5082+
return (total_elapsed.count() * 1e6) / n_iters;
50935083
}
50945084

50955085
} // namespace

0 commit comments

Comments
 (0)