Merge branch 'meta-pytorch:main' into main

DNXie · web-flow · commit fd1d38bdbe5a · 2025-08-27T18:18:34.000-07:00
diff --git a/apps/grpo/main.py b/apps/grpo/main.py
diff --git a/apps/rl/llama3_8b.yaml b/apps/rl/llama3_8b.yaml
@@ -46,7 +46,7 @@ trainer:
     disable_loss_parallel: false
 
   checkpoint:
-    enable_checkpoint: true
+    enable: true
     folder: /tmp/Meta-Llama-3.1-8B-Instruct/saved_checkpoints
     initial_load_path: /tmp/Meta-Llama-3.1-8B-Instruct/
     initial_load_in_hf: true
@@ -119,7 +119,7 @@ replay_buffer:
 #     disable_loss_parallel: false
 #
 #   checkpoint:
-#     enable_checkpoint: true
+#     enable: true
 #     folder: /tmp/Meta-Llama-3.1-8B-Instruct/
 #     interval: 500
 #     async_mode: "disabled"
diff --git a/apps/sft/llama3_8b.yaml b/apps/sft/llama3_8b.yaml
@@ -55,7 +55,7 @@ parallelism:
   disable_loss_parallel: false
 
 checkpoint:
-  enable_checkpoint: true
+  enable: true
   folder: /tmp/Meta-Llama-3.1-8B-Instruct/saved_checkpoints
   initial_load_path:  /tmp/Meta-Llama-3.1-8B-Instruct/
   initial_load_in_hf: true
diff --git a/apps/sft_v2/llama3_8b.yaml b/apps/sft_v2/llama3_8b.yaml
@@ -47,7 +47,7 @@ parallelism:
   disable_loss_parallel: false
 
 checkpoint:
-  enable_checkpoint: true
+  enable: true
   folder: /tmp/Meta-Llama-3.1-8B-Instruct/saved_checkpoints
   initial_load_path:  /tmp/Meta-Llama-3.1-8B-Instruct/
   initial_load_in_hf: true
diff --git a/src/forge/actors/policy.py b/src/forge/actors/policy.py
@@ -60,6 +60,7 @@ class SamplingOverrides:
 
     num_samples: int
     guided_decoding: bool = False
+    max_tokens: int = 512
 
 
 @dataclass
@@ -87,6 +88,7 @@ class PolicyConfig:
     num_workers: int
     worker_params: WorkerConfig
     sampling_params: SamplingOverrides
+    available_devices: str = None
 
 
 @dataclass
@@ -102,6 +104,11 @@ class Policy(PolicyInterface):
     @endpoint
     async def setup(self):
         # Set up policy_worker
+        self.available_devices = (
+            self.config.available_devices
+            if self.config.available_devices is not None
+            else ",".join(str(i) for i in range(torch.cuda.device_count()))
+        )
         await self.spawn_workers()
 
         self.request_id = 0
@@ -157,6 +164,7 @@ async def spawn_workers(self):
             env={
                 "MASTER_ADDR": str(get_loopback_ip()),
                 "MASTER_PORT": str(get_open_port()),
+                "CUDA_VISIBLE_DEVICES": self.available_devices,
             },
         )
         self.policy_worker = await self.worker_mesh.spawn(
@@ -200,7 +208,6 @@ async def generate(self, prompt: str, priority: int = 0) -> List[CompletionOutpu
         if (num_samples := self.sampling_params.n) == 1:
             self.output_processor.add_request(request, prompt_str, None, 0)
             request, _ = self.preprocess_add_request(request)
-
             request_fut = asyncio.Future()
             self.requests[request_id] = (None, request_fut)
 
@@ -456,7 +463,6 @@ def convert_input(prompt=None, prompt_token_ids=None) -> Dict:
 
 def get_default_sampling_params(vllm_config, overrides=None) -> SamplingParams:
     default_params = vllm_config.model_config.get_diff_sampling_param()
-    default_params["max_tokens"] = 512
     if overrides is not None:
         default_params |= overrides
     if default_params:
diff --git a/src/forge/actors/replay_buffer.py b/src/forge/actors/replay_buffer.py
@@ -11,7 +11,6 @@
 from monarch.actor import endpoint
 
 from forge.controller import ForgeActor
-from forge.types import Trajectory
 
 
 @dataclass
@@ -24,50 +23,47 @@ class ReplayBuffer(ForgeActor):
 
     @endpoint
     async def setup(self) -> None:
-        self.buffer: list[Trajectory] = []
+        self.buffer: list = []
         if self.seed is None:
             self.seed = random.randint(0, 2**32)
         random.seed(self.seed)
         self.sampler = random.sample
 
     @endpoint
-    async def add(self, trajectory: Trajectory) -> None:
-        self.buffer.append(trajectory)
+    async def add(self, episode) -> None:
+        self.buffer.append(episode)
 
     @endpoint
-    async def sample(
-        self, curr_policy_version: int, batch_size: int | None = None
-    ) -> list[Trajectory] | None:
+    async def sample(self, curr_policy_version: int, batch_size: int | None = None):
         """Sample from the replay buffer.
 
         Args:
             curr_policy_version (int): The current policy version.
-            batch_size (int, optional): Number of trajectories to sample. If none, defaults to batch size
+            batch_size (int, optional): Number of episodes to sample. If none, defaults to batch size
                 passed in at initialization.
 
         Returns:
-            A list of sampled trajectories or None if there are not enough trajectories in the buffer.
+            A list of sampled episodes or None if there are not enough episodes in the buffer.
         """
         bsz = batch_size if batch_size is not None else self.batch_size
 
-        # Evict old trajectories
+        # Evict old episodes
         self._evict(curr_policy_version)
 
         if bsz > len(self.buffer):
-            print("Not enough trajectories in the buffer.")
             return None
 
         # TODO: Make this more efficient
         idx_to_sample = self.sampler(range(len(self.buffer)), k=bsz)
         sorted_idxs = sorted(
             idx_to_sample, reverse=True
         )  # Sort in desc order to avoid shifting idxs
-        sampled_trajectories = [self.buffer.pop(i) for i in sorted_idxs]
-        return sampled_trajectories
+        sampled_episodes = [self.buffer.pop(i) for i in sorted_idxs]
+        return sampled_episodes
 
     @endpoint
     async def evict(self, curr_policy_version: int) -> None:
-        """Evict trajectories from the replay buffer if they are too old based on the current policy version
+        """Evict episodes from the replay buffer if they are too old based on the current policy version
         and the max policy age allowed.
 
         Args:
@@ -83,17 +79,17 @@ def _evict(self, curr_policy_version: int) -> None:
         ]
 
     @endpoint
-    async def _getitem(self, idx: int) -> Trajectory:
+    async def _getitem(self, idx: int):
         return self.buffer[idx]
 
     @endpoint
     async def _numel(self) -> int:
-        """Number of elements (trajectories) in the replay buffer."""
+        """Number of elements (episodes) in the replay buffer."""
         return len(self.buffer)
 
     @endpoint
     async def clear(self) -> None:
-        """Clear the replay buffer immediately - dropping all trajectories."""
+        """Clear the replay buffer immediately - dropping all episodes."""
         self.buffer.clear()
 
     @endpoint
diff --git a/src/forge/controller/replica.py b/src/forge/controller/replica.py
@@ -13,11 +13,11 @@
 from enum import Enum
 from typing import Optional
 
-from monarch.actor import Actor, ActorError, ProcMesh
-
 from forge.controller import get_proc_mesh
 from forge.types import ProcessConfig
 
+from monarch.actor import Actor, ActorError, ProcMesh
+
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.DEBUG)
 
diff --git a/src/forge/util/metric_logging.py b/src/forge/util/metric_logging.py
@@ -6,7 +6,7 @@
 import os
 import sys
 import time
-from typing import Mapping, Optional
+from typing import Mapping, Optional, Union
 
 from forge.interfaces import MetricLogger
 from forge.types import Scalar
@@ -21,11 +21,12 @@ class StdoutLogger(MetricLogger):
     """Logger to standard output.
 
     Args:
-        freq (Mapping[str, int]):
-            calls to `log` and `log_dict` will be ignored if `step % freq[metric_name] != 0`
+        freq (Union[int, Mapping[str, int]]):
+            If int, all metrics will be logged at this frequency.
+            If Mapping, calls to `log` and `log_dict` will be ignored if `step % freq[metric_name] != 0`
     """
 
-    def __init__(self, freq: Mapping[str, int]):
+    def __init__(self, freq: Union[int, Mapping[str, int]]):
         self._freq = freq
 
     def is_log_step(self, name: str, step: int) -> bool:
@@ -35,6 +36,8 @@ def is_log_step(self, name: str, step: int) -> bool:
             name (str): metric name (for checking the freq for this metric)
             step (int): current step
         """
+        if isinstance(self._freq, int):
+            return step % self._freq == 0
         return step % self._freq[name] == 0
 
     def log(self, name: str, data: Scalar, step: int) -> None:
@@ -77,8 +80,9 @@ class TensorBoardLogger(MetricLogger):
     """Logger for use w/ PyTorch's implementation of TensorBoard (https://pytorch.org/docs/stable/tensorboard.html).
 
     Args:
-        freq (Mapping[str, int]):
-            calls to `log` and `log_dict` will be ignored if `step % freq[metric_name] != 0`
+        freq (Union[int, Mapping[str, int]]):
+            If int, all metrics will be logged at this frequency.
+            If Mapping, calls to `log` and `log_dict` will be ignored if `step % freq[metric_name] != 0`
         log_dir (str): torch.TensorBoard log directory
         organize_logs (bool): If `True`, this class will create a subdirectory within `log_dir` for the current
             run. Having sub-directories allows you to compare logs across runs. When TensorBoard is
@@ -103,7 +107,7 @@ class TensorBoardLogger(MetricLogger):
 
     def __init__(
         self,
-        freq: Mapping[str, int],
+        freq: Union[int, Mapping[str, int]],
         log_dir: str = "metrics_log",
         organize_logs: bool = True,
         **kwargs,
@@ -133,6 +137,8 @@ def is_log_step(self, name: str, step: int) -> bool:
             name (str): metric name (for checking the freq for this metric)
             step (int): current step
         """
+        if isinstance(self._freq, int):
+            return step % self._freq == 0
         return step % self._freq[name] == 0
 
     def log(self, name: str, data: Scalar, step: int) -> None:
@@ -168,8 +174,9 @@ class WandBLogger(MetricLogger):
     For more information about arguments expected by WandB, see https://docs.wandb.ai/ref/python/init.
 
     Args:
-        freq (Mapping[str, int]):
-            calls to `log` and `log_dict` will be ignored if `step % freq[metric_name] != 0`
+        freq (Union[int, Mapping[str, int]]):
+            If int, all metrics will be logged at this frequency.
+            If Mapping, calls to `log` and `log_dict` will be ignored if `step % freq[metric_name] != 0`
         log_dir (Optional[str]): WandB log directory.
         project (str): WandB project name. Default is `torchtune`.
         entity (Optional[str]): WandB entity name. If you don't specify an entity,
@@ -197,7 +204,7 @@ class WandBLogger(MetricLogger):
 
     def __init__(
         self,
-        freq: Mapping[str, int],
+        freq: Union[int, Mapping[str, int]],
         project: str,
         log_dir: str = "metrics_log",
         entity: Optional[str] = None,
@@ -241,6 +248,8 @@ def is_log_step(self, name: str, step: int) -> bool:
             name (str): metric name (for checking the freq for this metric)
             step (int): current step
         """
+        if isinstance(self._freq, int):
+            return step % self._freq == 0
         return step % self._freq[name] == 0
 
     def log(self, name: str, data: Scalar, step: int) -> None: