reduce batch size and renable perf tracker (#278)

felipemello1 · Felipe Mello · web-flow · commit 1f748698c38d · 2025-10-01T13:47:51.000-04:00
Co-authored-by: Felipe Mello &lt;felipemello@fb.com&gt;
diff --git a/apps/grpo/qwen3_8b.yaml b/apps/grpo/qwen3_8b.yaml
@@ -4,8 +4,8 @@
 # Global configuration
 group_size: 8
 batch_size: 16
-max_req_tokens: 512
-max_res_tokens: 512
+max_req_tokens: 468
+max_res_tokens: 468
 model: "Qwen/Qwen3-8B"
 off_by_n: 1 # Off by one by default
 
diff --git a/src/forge/observability/perf_tracker.py b/src/forge/observability/perf_tracker.py
@@ -106,7 +106,7 @@ def __init__(
 
         self.prefix = prefix
         self.track_memory = track_memory
-        self.time_with_gpu = False  # timer == "gpu"
+        self.time_with_gpu = timer == "gpu"
         self._disable = os.getenv(DISABLE_PERF_METRICS, "false") == "true"
         self._active = False
 
@@ -297,7 +297,7 @@ def _compute_elapsed(start_event, end_event):
         index = len(self._futures)
         self._futures.append((name, future, index))
 
-        if len(self._futures) >= 20:  # clean up every 20 to avoid memory leak
+        if len(self._futures) >= 5:  # clean up every 5
             self._collect_completed_futures()
 
         self._chain_start = end_event