Add export_stacks option to pipeline benchmarking (#3234)

SSYernar · facebook-github-bot · commit 7fc8d112522f · 2025-07-25T13:03:49.000-07:00
Summary: Pull Request resolved: #3234 Added a new configuration option `export_stacks=True|False` to the RunOptions dataclass in benchmark_train_pipeline.py. This option enables stack trace export during benchmark runs, which helps with performance analysis by providing detailed execution flow information. The exported stack traces can be used to identify bottlenecks and optimize critical paths in the training pipeline. Example stack trace output: {F1980460020} Reviewed By: aliafzal Differential Revision: D78749183 fbshipit-source-id: 0638105a522888c7929d53a3d4c31d16bbda7f11
diff --git a/torchrec/distributed/benchmark/benchmark_train_pipeline.py b/torchrec/distributed/benchmark/benchmark_train_pipeline.py
@@ -110,6 +110,7 @@ class RunOptions:
     sparse_lr: float = 0.1
     sparse_momentum: Optional[float] = None
     sparse_weight_decay: Optional[float] = None
+    export_stacks: bool = False
 
 
 @dataclass
@@ -390,6 +391,7 @@ def _func_to_benchmark(
                 func_to_benchmark=_func_to_benchmark,
                 benchmark_func_kwargs={"model": sharded_model, "pipeline": pipeline},
                 rank=rank,
+                export_stacks=run_option.export_stacks,
             )
             results.append(result)