diff --git a/.meta/mast/main.py b/.meta/mast/main.py index 513d96fc6..d867db1c2 100644 --- a/.meta/mast/main.py +++ b/.meta/mast/main.py @@ -63,8 +63,6 @@ async def main(cfg: DictConfig, mode: str = "detached", extra_args: list = None) extra_args=extra_args or [], ) await launcher.launch_mast_job() - print(f"MAST job {launcher.job_name} launched successfully with client role.") - print("The client is running inside MAST and will execute the training.") else: # In remote mode, we're already running inside MAST, so mount directory, init provisioner and run training mount_mnt_directory("/mnt/wsfuse") @@ -97,7 +95,6 @@ def _main(cfg): # Override job name from CLI if args.job_name: cfg[JOB_NAME_KEY] = args.job_name - print(f"Using job name: {args.job_name}") asyncio.run(main(cfg, mode=args.mode, extra_args=remaining)) _main() # @parse grabs the cfg from CLI diff --git a/src/forge/controller/launcher.py b/src/forge/controller/launcher.py index 333acbe32..a11ab50be 100644 --- a/src/forge/controller/launcher.py +++ b/src/forge/controller/launcher.py @@ -17,6 +17,8 @@ import monarch import torchx.specs as specs + +from forge.types import Launcher, LauncherConfig from monarch._rust_bindings.monarch_hyperactor.alloc import AllocConstraints from monarch._rust_bindings.monarch_hyperactor.channel import ChannelTransport @@ -24,12 +26,9 @@ from monarch._src.actor.allocator import RemoteAllocator, TorchXRemoteAllocInitializer from monarch.actor import Actor, endpoint, ProcMesh from monarch.tools import commands -from monarch.tools.commands import info -from monarch.tools.components import hyperactor +from monarch.tools.commands import create, info from monarch.tools.config import Config, Workspace -from forge.types import Launcher, LauncherConfig - _MAST_AVAILABLE = False try: @@ -259,8 +258,12 @@ async def launch_mast_job(self): ), ) - await commands.get_or_create(self.job_name, config) - return server_spec + job_handle = create(config, name=self.job_name) + print( + f"MAST job launched successfully:\n" + f"\033[92mhttps://www.internalfb.com/mlhub/pipelines/runs/mast/{self.job_name}\033[0m" + ) + return job_handle def add_additional_packages(self, packages: "Packages") -> "Packages": packages.add_package("oil.oilfs:stable")