NVIDIA-NeMo
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/design_docs/gpu_logger.md‎ b/‎docs/design_docs/gpu_logger.md‎
diff --git a/‎docs/design_docs/logger.md‎
Lines changed: 28 additions & 0 deletions b/‎docs/design_docs/logger.md‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎examples/configs/grpo_math_1B.yaml‎
Lines changed: 4 additions & 0 deletions b/‎examples/configs/grpo_math_1B.yaml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎examples/configs/sft.yaml‎
Lines changed: 5 additions & 1 deletion b/‎examples/configs/sft.yaml‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎examples/run_grpo_math.py‎
Lines changed: 4 additions & 1 deletion b/‎examples/run_grpo_math.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎nemo_reinforcer/algorithms/grpo.py‎
Lines changed: 6 additions & 2 deletions b/‎nemo_reinforcer/algorithms/grpo.py‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎nemo_reinforcer/algorithms/loss_functions.py‎
Lines changed: 5 additions & 1 deletion b/‎nemo_reinforcer/algorithms/loss_functions.py‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎nemo_reinforcer/algorithms/sft.py‎
Lines changed: 7 additions & 3 deletions b/‎nemo_reinforcer/algorithms/sft.py‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎nemo_reinforcer/algorithms/utils.py‎
Lines changed: 1 addition & 0 deletions b/‎nemo_reinforcer/algorithms/utils.py‎
Lines changed: 1 addition & 0 deletions
@@ -21,6 +21,7 @@ dist/
 # Cache
 uv_cache/
 hf_home/
+hf_datasets_cache/
 *logs/
 datasets/
 docker/
 
@@ -78,3 +78,31 @@ When enabled, the pretty logging will generate formatted text similar to:
 
 ![Validation Pretty Logging Example](../assets/val-log.png)
 
+## GPU Metric Logging
+
+Reinforcer monitors GPU memory and utilization through [system metrics](https://docs.ray.io/en/latest/ray-observability/reference/system-metrics.html#system-metrics) exposed by Ray nodes. While Ray makes these metrics available for tools like Prometheus, Reinforcer directly polls GPU memory and utilization data and logs them to TensorBoard and/or Weights & Biases.
+
+This approach allows us to offer the same GPU metric tracking on all loggers (not just wandb) and simplifies the implementation greatly.
+
+This feature is enabled with the `monitor_gpus` configuration parameter and the frequency of collection and flushing to the loggers is controlled by `gpu_collection_interval` and `gpu_flush_interval` (both in seconds), respectively:
+
+```python
+logger:
+  wandb_enabled: false
+  tensorboard_enabled: false
+  monitor_gpus: true
+  gpu_monitoring:
+    collection_interval: 10
+    flush_interval: 10
+```
+
+:::{note}
+While monitoring through the remote workers is possible, it requires some delicate implementation details to make sure:
+* sending logs back to driver does not incur a large overhead
+* metrics are easily interpretable since we may be double counting due to colocated workers
+* workers gracefully flush their logs in the event of failure
+* the logging is the same for tensorboard and wandb
+* some workers which spawn other workers correctly report the total usage of the grandchild worker
+
+These reasons lead us to the simple implementation of collecting on the driver
+:::
@@ -77,10 +77,14 @@ logger:
   num_val_samples_to_print: 0 # Number of validation samples to pretty print on terminal
   wandb_enabled: false
   tensorboard_enabled: false
+  monitor_gpus: false  # If true, will monitor GPU usage and log to wandb and/or tensorboard
   wandb:
     project: "grpo-dev"
     name: "grpo-dev-logger"
   tensorboard: {}
+  gpu_monitoring:
+    collection_interval: 10  # How often to collect GPU usage metrics (in seconds)
+    flush_interval: 10  # How often to flush GPU usage metrics to the loggers (in seconds)
 
 cluster:
   gpus_per_node: 1
 
@@ -44,13 +44,17 @@ data:
 
 logger:
   log_dir: "logs"  # Base directory for all logs
-  wandb_enabled: true
+  wandb_enabled: false
   tensorboard_enabled: false
+  monitor_gpus: false  # If true, will monitor GPU usage and log to wandb and/or tensorboard
   wandb:
     project: "sft-dev"
     name: "sft-dev-logger"
   tensorboard:
     log_dir: "tb_logs"
+  gpu_monitoring:
+    collection_interval: 10  # How often to collect GPU usage metrics (in seconds)
+    flush_interval: 10  # How often to flush GPU usage metrics to the loggers (in seconds)
 
 cluster:
   gpus_per_node: 8
 
@@ -195,7 +195,10 @@ def setup_data(data_config: DataConfig, policy_config: PolicyConfig, env_configs
     task_data_processors["math"] = (math_task_spec, openinstructmath2_data_processor)
 
     math_env = MathEnvironment.options(
-        runtime_env={"py_executable": MathEnvironment.DEFAULT_PY_EXECUTABLE}
+        runtime_env={
+            "py_executable": MathEnvironment.DEFAULT_PY_EXECUTABLE,
+            "env_vars": dict(os.environ),  # Pass thru all user environment variables
+        }
     ).remote(env_configs["math"])
     dataset = AllTaskProcessedDataset(
         data.formatted_ds["train"],
 
@@ -137,6 +137,12 @@ def setup(
     logger_config = master_config["logger"]
     cluster_config = master_config["cluster"]
 
+    # ==========================
+    #         Logger
+    # ==========================
+    logger = Logger(logger_config)
+    logger.log_hyperparams(master_config)
+
     # ==========================
     #      Checkpointing
     # ==========================
@@ -238,8 +244,6 @@ def setup(
     )
 
     loss_fn = ClippedPGLossFn(loss_config)
-    logger = Logger(logger_config)
-    logger.log_hyperparams(master_config)
 
     print("\n" + "=" * 60)
     print(" " * 18 + "SETUP COMPLETE")
 
@@ -166,4 +166,8 @@ def __call__(
             num_unmasked_tokens = torch.tensor(1)
         loss = -torch.sum(token_logprobs * mask) / num_unmasked_tokens
 
-        return loss, {"loss": loss.item(), "num_unmasked_tokens": num_unmasked_tokens.item(), "total_tokens": mask.numel()}
+        return loss, {
+            "loss": loss.item(),
+            "num_unmasked_tokens": num_unmasked_tokens.item(),
+            "total_tokens": mask.numel(),
+        }
@@ -61,6 +61,7 @@ class SFTConfig(TypedDict):
     val_at_start: bool
     seed: int
 
+
 class MasterConfig(TypedDict):
     policy: PolicyConfig
     data: DataConfig
@@ -102,6 +103,12 @@ def setup(
     cluster_config = master_config["cluster"]
     sft_config = master_config["sft"]
 
+    # ==========================
+    #         Logger
+    # ==========================
+    logger = Logger(logger_config)
+    logger.log_hyperparams(master_config)
+
     # ==========================
     #      Checkpointing
     # ==========================
@@ -179,9 +186,6 @@ def setup(
     loss_fn = NLLLoss()
     print(f"  ✓ Model initialized")
 
-    logger = Logger(logger_config)
-    logger.log_hyperparams(master_config)
-
     print("\n" + "=" * 60)
     print(" " * 18 + "SETUP COMPLETE")
     print("=" * 60 + "\n")
 
@@ -123,6 +123,7 @@ def masked_mean(values, mask, dim=None):
         return values[mask.bool()].mean()
     return as_masked_tensor(values, mask.bool()).mean(dim=dim).to_tensor(torch.nan)
 
+
 def set_seed(seed: int):
     """Sets the seed for python, numpy, and pytorch."""
     random.seed(seed)