Skip to content

Commit 9c3bf46

Browse files
committed
more updates for testing
1 parent b559831 commit 9c3bf46

File tree

6 files changed

+16
-7
lines changed

6 files changed

+16
-7
lines changed
758 KB
Binary file not shown.

scripts/build_wheels.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ NC='\033[0m'
1717
# Configuration
1818
PYTORCH_VERSION="2.9.0.dev20250905"
1919
VLLM_BRANCH="v0.10.0"
20-
MONARCH_COMMIT="6ca383aca99480aa1bf5853478d4d09fcb224035"
20+
MONARCH_COMMIT="d1c5ea4732704454efad82db678d4e66a4131bb2"
2121
TORCHTITAN_COMMIT="0cfbd0b3c2d827af629a107a77a9e47229c31663"
2222
TORCHSTORE_COMMIT="eed96eb55ce87d4a9880597dd7dfd0d291e9ac81"
2323
BUILD_DIR="$HOME/forge-build"

src/forge/actors/policy.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
from forge.data.sharding import VLLMSharding
5454
from forge.data_models.completion import Completion
5555
from forge.data_models.prompt import to_prompt
56+
from forge.env import TORCHSTORE_USE_RDMA
5657
from forge.interfaces import Policy as PolicyInterface
5758
from forge.observability.metrics import record_metric, Reduce
5859
from forge.observability.perf_tracker import Tracer
@@ -140,7 +141,7 @@ class Policy(PolicyInterface):
140141
engine_config: EngineConfig | Mapping = field(default_factory=EngineConfig)
141142
sampling_config: SamplingConfig | Mapping = field(default_factory=SamplingConfig)
142143
available_devices: str | None = None
143-
use_dcp: bool = True
144+
use_dcp: bool = not TORCHSTORE_USE_RDMA.get_value()
144145
# Gets set up by setup
145146
sampling_params: SamplingParams | None = None
146147
lora_request: LoRARequest | None = None

src/forge/actors/trainer.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646

4747
from forge.controller import ForgeActor
4848
from forge.data.utils import batch_to_device
49+
from forge.env import TORCHSTORE_USE_RDMA
4950
from forge.observability.metrics import record_metric, Reduce
5051
from forge.observability.perf_tracker import Tracer
5152

@@ -111,7 +112,7 @@ class RLTrainer(ForgeActor):
111112
# Non JobConfig-related fields
112113
loss: Callable = lambda logits, **targets: logits
113114
state_dict_key: str = "model_state_dict"
114-
use_dcp: bool = True
115+
use_dcp: bool = not TORCHSTORE_USE_RDMA.get_value()
115116
dcp_path: str = "forge_dcp_tmp"
116117

117118
def __post_init__(self):

src/forge/env.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,13 +102,13 @@ def get_value(self) -> Any:
102102

103103
MONARCH_HOSTMESH_V1 = EnvVar(
104104
name="MONARCH_HOSTMESH_V1",
105-
default=True,
105+
default=False,
106106
description="Whether or not to use Monarch's experimental hostmesh v1 APIs",
107107
)
108108

109109
TORCHSTORE_USE_RDMA = EnvVar(
110110
name="TORCHSTORE_RDMA_ENABLED",
111-
default=True,
111+
default=False,
112112
description="Whether or not to use RDMA in TorchStore.",
113113
)
114114

src/forge/observability/metric_actors.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@
88
import logging
99
from typing import Any, Union
1010

11-
from monarch.actor import Actor, endpoint, get_or_spawn_controller, ProcMesh, this_proc
11+
from monarch.actor import Actor, endpoint, ProcMesh
1212

13-
from forge.env import FORGE_DISABLE_METRICS
13+
from forge.env import FORGE_DISABLE_METRICS, MONARCH_HOSTMESH_V1
1414
from forge.observability.metrics import (
1515
BackendRole,
1616
get_logger_backend_class,
@@ -19,6 +19,13 @@
1919
reduce_metrics_states,
2020
)
2121

22+
if MONARCH_HOSTMESH_V1.get_value():
23+
from monarch._src.actor.v1.host_mesh import this_proc
24+
from monarch._src.actor.v1.proc_mesh import get_or_spawn_controller
25+
else:
26+
from monarch.actor import get_or_spawn_controller, this_proc
27+
28+
2229
logger = logging.getLogger(__name__)
2330

2431
_global_logger = None

0 commit comments

Comments
 (0)