meta-pytorch
diff --git a/‎.github/workflows/docs.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/docs.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/unit_test.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/unit_test.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎apps/grpo/main.py‎
Lines changed: 28 additions & 2 deletions b/‎apps/grpo/main.py‎
Lines changed: 28 additions & 2 deletions
diff --git a/‎apps/grpo/qwen3_1_7b.yaml‎
Lines changed: 7 additions & 0 deletions b/‎apps/grpo/qwen3_1_7b.yaml‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎apps/grpo/qwen3_32b.yaml‎
Lines changed: 7 additions & 0 deletions b/‎apps/grpo/qwen3_32b.yaml‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎apps/grpo/qwen3_8b.yaml‎
Lines changed: 7 additions & 0 deletions b/‎apps/grpo/qwen3_8b.yaml‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎assets/ci/monarch_no_torch-0.1.0.dev20250826-py3-none-any.whl‎ renamed to ‎assets/ci/monarch_no_torch-0.1.0.dev20251010-py3-none-any.whl‎
23.1 MB b/‎assets/ci/monarch_no_torch-0.1.0.dev20250826-py3-none-any.whl‎ renamed to ‎assets/ci/monarch_no_torch-0.1.0.dev20251010-py3-none-any.whl‎
23.1 MB
diff --git a/‎assets/wheels/monarch-0.0.1-cp310-cp310-linux_x86_64.whl‎
758 KB b/‎assets/wheels/monarch-0.0.1-cp310-cp310-linux_x86_64.whl‎
758 KB
diff --git a/‎src/forge/actors/policy.py‎
Lines changed: 5 additions & 2 deletions b/‎src/forge/actors/policy.py‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎src/forge/actors/trainer.py‎
Lines changed: 4 additions & 1 deletion b/‎src/forge/actors/trainer.py‎
Lines changed: 4 additions & 1 deletion
@@ -40,7 +40,7 @@ jobs:
         run: python -m pip install torch==2.9.0 --index-url https://download.pytorch.org/whl/test/cu130
       - name: Install monarch
         shell: bash -l {0}
-        run: python -m pip install monarch-no-torch==0.1.0.dev20250826 --find-links assets/ci
+        run: pip install assets/ci/monarch_no_torch-0.1.0.dev20251010-py3-none-any.whl
       - name: Install torchforge
         shell: bash -l {0}
         env:
 
@@ -26,7 +26,7 @@ jobs:
       - name: Install pytorch
         run: python -m pip install torch==2.9.0.dev20250826 --extra-index-url https://download.pytorch.org/whl/nightly/cpu
       - name: Install monarch
-        run: python -m pip install monarch-no-torch==0.1.0.dev20250826 --find-links assets/ci
+        run: pip install assets/ci/monarch_no_torch-0.1.0.dev20251010-py3-none-any.whl
       - name: Install torchstore
         run: pip install assets/wheels/torchstore-0.1.0-py3-none-any.whl
       - name: Install torchtitan
 
@@ -28,6 +28,7 @@
 from forge.controller.actor import ForgeActor
 from forge.controller.provisioner import init_provisioner, shutdown
 from forge.data.rewards import MathReward, ThinkingReward
+from forge.env import MONARCH_HOSTMESH_V1
 from forge.observability.metric_actors import get_or_create_metric_logger
 from forge.observability.metrics import record_metric, Reduce
 from forge.observability.perf_tracker import Tracer
@@ -314,14 +315,23 @@ async def main(cfg: DictConfig):
     max_res_tokens = cfg.max_res_tokens
 
     # ---- Global setups ---- #
+    provisioner = None
     if cfg.get("provisioner", None) is not None:
-        await init_provisioner(
+        provisioner = await init_provisioner(
             ProvisionerConfig(launcher_config=LauncherConfig(**cfg.provisioner))
         )
+    else:
+        provisioner = await init_provisioner()
+
     metric_logging_cfg = cfg.get("metric_logging", {"console": {"log_per_rank": False}})
     mlogger = await get_or_create_metric_logger()
     await mlogger.init_backends.call_one(metric_logging_cfg)
-    await ts.initialize(strategy=ts.ControllerStorageVolumes())
+
+    # In the host mesh v0 case, actors on remote hosts are not able to communicate
+    # with one another. Therefore we use the controller as our storage volume.
+    if not MONARCH_HOSTMESH_V1.get_value():
+        await ts.initialize(strategy=ts.ControllerStorageVolumes())
+        print("Torchstore successfully initialized with controller storage strategy")
 
     # ---- Setup services ---- #
 
@@ -351,6 +361,22 @@ async def main(cfg: DictConfig):
 
     print("All services initialized successfully!")
 
+    # In the HostMesh v1 case, we spawn a torchstore storage volume
+    # per trainer process.
+    # We initialize after service initialization because torchstore currently
+    # requires access to the underlying proc meshes in the local rank strategy.
+    # We should be able to hide this in the future.
+    if MONARCH_HOSTMESH_V1.get_value():
+        # TODO: support multiple host meshes
+        trainer_num_procs = cfg.actors.trainer["procs"]
+        trainer_host_mesh_name = cfg.actors.trainer["mesh_name"]
+        trainer_hosts = provisioner.get_host_mesh(trainer_host_mesh_name)
+        await ts.initialize(
+            mesh=trainer_hosts.spawn_procs(per_host={"procs": trainer_num_procs}),
+            strategy=ts.LocalRankStrategy(),
+        )
+        print("Torchstore successfully initialized with local rank strategy")
+
     # ---- Core RL loops ---- #
     async def continuous_rollouts():
         rollout_count = 0
 
@@ -117,26 +117,33 @@ services:
   policy:
     procs: ${policy.engine_args.tensor_parallel_size}
     num_replicas: 1
+    mesh_name: policy
     with_gpus: true
   ref_model:
     procs: 1
     num_replicas: 1
+    mesh_name: ref_model
     with_gpus: true
   reward_actor:
     procs: 1
     num_replicas: 1
+    mesh_name: reward_actor
     with_gpus: false
 
 actors:
   dataset:
     procs: 1
     with_gpus: false
+    mesh_name: dataset
   trainer:
     procs: 1
     with_gpus: true
+    mesh_name: trainer
   replay_buffer:
     procs: 1
     with_gpus: false
+    mesh_name: replay_buffer
   compute_advantages:
     procs: 1
     with_gpus: false
+    mesh_name: compute_advantages
@@ -122,26 +122,33 @@ services:
     num_replicas: 1
     hosts: 1
     with_gpus: true
+    mesh_name: policy
   ref_model:
     procs: ${ref_model.parallelism.tensor_parallel_degree}
     num_replicas: 1
     with_gpus: true
+    mesh_name: ref_model
   reward_actor:
     procs: 1
     num_replicas: 1
     with_gpus: false
+    mesh_name: reward_actor
 
 actors:
   dataset:
     procs: 1
     with_gpus: false
+    mesh_name: dataset
   trainer:
     procs: 8
     hosts: 1
     with_gpus: true
+    mesh_name: trainer
   replay_buffer:
     procs: 1
     with_gpus: false
+    mesh_name: replay_buffer
   compute_advantages:
     procs: 1
     with_gpus: false
+    mesh_name: compute_advantages
@@ -117,25 +117,32 @@ services:
     procs: ${policy.engine_args.tensor_parallel_size}
     num_replicas: 1
     with_gpus: true
+    mesh_name: policy
   ref_model:
     procs: 1
     num_replicas: 1
     with_gpus: true
+    mesh_name: ref_model
   reward_actor:
     procs: 1
     num_replicas: 1
     with_gpus: false
+    mesh_name: reward_actor
 
 actors:
   dataset:
     procs: 1
     with_gpus: false
+    mesh_name: dataset
   trainer:
     procs: 2
     with_gpus: true
+    mesh_name: trainer
   replay_buffer:
     procs: 1
     with_gpus: false
+    mesh_name: replay_buffer
   compute_advantages:
     procs: 1
     with_gpus: false
+    mesh_name: compute_advantages
@@ -53,6 +53,7 @@
 from forge.data.sharding import VLLMSharding
 from forge.data_models.completion import Completion
 from forge.data_models.prompt import to_prompt
+from forge.env import TORCHSTORE_USE_RDMA
 from forge.interfaces import Policy as PolicyInterface
 from forge.observability.metrics import record_metric, Reduce
 from forge.observability.perf_tracker import Tracer
@@ -67,7 +68,9 @@ class Policy(PolicyInterface):
     engine_args: EngineArgs | Mapping = field(default_factory=EngineArgs)
     sampling_params: SamplingParams | Mapping = field(default_factory=SamplingParams)
     available_devices: str | None = None
-    use_dcp: bool = True
+    use_dcp: bool = (
+        TORCHSTORE_USE_RDMA.get_value() == 0
+    )  # torchstore currently only accepts 0 or 1
     # Gets set up by setup
     lora_request: LoRARequest | None = None
     tokenization_kwargs: dict = field(default_factory=dict)
@@ -83,7 +86,7 @@ def __post_init__(self):
 
         if isinstance(self.engine_args, Mapping):
             self.engine_args = EngineArgs(**self.engine_args)
-            self.engine_args._is_v1_supported_oracle = lambda *_: True
+        self.engine_args._is_v1_supported_oracle = lambda *_: True
 
         if isinstance(self.sampling_params, Mapping):
             self.sampling_params = SamplingParams.from_optional(**self.sampling_params)
 
@@ -46,6 +46,7 @@
 
 from forge.controller import ForgeActor
 from forge.data.utils import batch_to_device
+from forge.env import TORCHSTORE_USE_RDMA
 from forge.observability.metrics import record_metric, Reduce
 from forge.observability.perf_tracker import Tracer
 
@@ -111,7 +112,9 @@ class RLTrainer(ForgeActor):
     # Non JobConfig-related fields
     loss: Callable = lambda logits, **targets: logits
     state_dict_key: str = "model_state_dict"
-    use_dcp: bool = True
+    use_dcp: bool = (
+        TORCHSTORE_USE_RDMA.get_value() == 0
+    )  # torchstore currently only accepts 0 or 1
     dcp_path: str = "forge_dcp_tmp"
 
     def __post_init__(self):