File tree Expand file tree Collapse file tree 3 files changed +16
-22
lines changed
Expand file tree Collapse file tree 3 files changed +16
-22
lines changed Original file line number Diff line number Diff line change @@ -158,6 +158,7 @@ def install_symbolic_shape_guard(
158158def profile_guard_manager (
159159 guard_manager : GuardManager ,
160160 f_locals : dict [str , Any ],
161+ n_iters : int ,
161162) -> float : ...
162163
163164class TensorGuards :
Original file line number Diff line number Diff line change @@ -2472,11 +2472,14 @@ def cleanup_builder(weak_b):
24722472 self .guard_manager , output_graph .local_scope
24732473 )
24742474
2475- if guards_log .isEnabledFor (logging .DEBUG ):
2476- latency = profile_guard_manager (
2477- self .guard_manager .root , output_graph .local_scope
2478- )
2479- guards_log .debug ("Guard eval latency = %s us" , f"{ latency :.2f} " )
2475+ # NB for developers: n_iters is chosen to be 50 to achieve
2476+ # statistical significance. If you are working on a guard
2477+ # optimization, it might be a good idea to increase this number for
2478+ # more stabiilty during development.
2479+ latency = profile_guard_manager (
2480+ self .guard_manager .root , output_graph .local_scope , 50
2481+ )
2482+ guards_log .debug ("Guard eval latency = %s us" , f"{ latency :.2f} " )
24802483
24812484 # NB - We have to very careful of cleaning up here. Because of the
24822485 # invalidate function, we can create a weakref finalizer that keeps
Original file line number Diff line number Diff line change @@ -5060,36 +5060,26 @@ void install_storage_overlapping_guard(
50605060 /* overlapping= */ false );
50615061}
50625062
5063- double profile_guard_manager (RootGuardManager* root, py::object f_locals) {
5063+ double profile_guard_manager (
5064+ RootGuardManager* root,
5065+ py::object f_locals,
5066+ int n_iters) {
50645067 PyObject* locals = f_locals.ptr ();
50655068
50665069 // Warmup
5067- for (int i = 0 ; i < 10 ; i++) {
5070+ for (int i = 0 ; i < 5 ; i++) {
50685071 root->check_nopybind (locals);
50695072 }
50705073
5071- int count = 0 ;
50725074 auto start = std::chrono::high_resolution_clock::now ();
5073- float profile_duration = 1.0 ;
5074-
5075- // Run the loop for profile_duration seconds
5076- while (true ) {
5075+ for (int i = 0 ; i < n_iters; i++) {
50775076 root->check_nopybind (locals);
5078- count++;
5079- auto end = std::chrono::high_resolution_clock::now ();
5080- std::chrono::duration<double > elapsed = end - start;
5081-
5082- // Break the loop if 1 second has passed
5083- if (elapsed.count () >= 1.0 ) {
5084- break ;
5085- }
50865077 }
5087-
50885078 auto end = std::chrono::high_resolution_clock::now ();
50895079 std::chrono::duration<double > total_elapsed = end - start;
50905080
50915081 // Calculate the average time per iteration in microseconds
5092- return (total_elapsed.count () * profile_duration * 1e6 ) / count ;
5082+ return (total_elapsed.count () * 1e6 ) / n_iters ;
50935083}
50945084
50955085} // namespace
You can’t perform that action at this time.
0 commit comments