Skip to content

Commit 824b3a5

Browse files
HannaMaofacebook-github-bot
authored andcommitted
Make benchmark for training work with multiple gpus
Summary: 1. For tensorboard_trace_handler, ``worker_name`` should be unique for each worker in distributed scenario. 2. Turn off saving `profiler-trace-iter{}.json` in the output dir when `save_tensorboard=True` as it seems like we cannot have both enabled right now (discussions are here https://fb.workplace.com/groups/289111155699012/permalink/640990860511038/) Reviewed By: ppwwyyxx Differential Revision: D31312437 fbshipit-source-id: b50d80c9f288a35483d9a41ce07e0307c15c297f
1 parent 9f8d35e commit 824b3a5

File tree

1 file changed

+18
-16
lines changed

1 file changed

+18
-16
lines changed

detectron2/engine/hooks.py

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -414,7 +414,8 @@ def before_step(self):
414414
self._output_dir,
415415
"log",
416416
"profiler-tensorboard-iter{}".format(self.trainer.iter),
417-
)
417+
),
418+
f"worker{comm.get_rank()}",
418419
)
419420
else:
420421
on_trace_ready = None
@@ -434,21 +435,22 @@ def after_step(self):
434435
if self._profiler is None:
435436
return
436437
self._profiler.__exit__(None, None, None)
437-
PathManager.mkdirs(self._output_dir)
438-
out_file = os.path.join(
439-
self._output_dir, "profiler-trace-iter{}.json".format(self.trainer.iter)
440-
)
441-
if "://" not in out_file:
442-
self._profiler.export_chrome_trace(out_file)
443-
else:
444-
# Support non-posix filesystems
445-
with tempfile.TemporaryDirectory(prefix="detectron2_profiler") as d:
446-
tmp_file = os.path.join(d, "tmp.json")
447-
self._profiler.export_chrome_trace(tmp_file)
448-
with open(tmp_file) as f:
449-
content = f.read()
450-
with PathManager.open(out_file, "w") as f:
451-
f.write(content)
438+
if not self._save_tensorboard:
439+
PathManager.mkdirs(self._output_dir)
440+
out_file = os.path.join(
441+
self._output_dir, "profiler-trace-iter{}.json".format(self.trainer.iter)
442+
)
443+
if "://" not in out_file:
444+
self._profiler.export_chrome_trace(out_file)
445+
else:
446+
# Support non-posix filesystems
447+
with tempfile.TemporaryDirectory(prefix="detectron2_profiler") as d:
448+
tmp_file = os.path.join(d, "tmp.json")
449+
self._profiler.export_chrome_trace(tmp_file)
450+
with open(tmp_file) as f:
451+
content = f.read()
452+
with PathManager.open(out_file, "w") as f:
453+
f.write(content)
452454

453455

454456
class AutogradProfiler(TorchProfiler):

0 commit comments

Comments
 (0)