refactor

pradeepfn · pradeepfn · commit 32ac7f3bc137 · 2025-09-09T20:42:32.000-07:00
diff --git a/src/forge/actors/policy.py b/src/forge/actors/policy.py
@@ -107,7 +107,6 @@ class Policy(PolicyInterface):
     lora_request: LoRARequest | None = None
     tokenization_kwargs: dict = field(default_factory=dict)
     policy_worker: "PolicyWorker" = None
-    store = None
 
     def __post_init__(self):
         self._run_task: asyncio.Task | None = None
@@ -121,7 +120,6 @@ async def launch(  # pyright: ignore[reportIncompatibleMethodOverride]
         *,
         process_config: ProcessConfig,
         config: PolicyConfig,
-        store=None,
         **kwargs,
     ) -> "Policy":
         # Note - get_proc_mesh will set MASTER_ADDR, MASTER_PORT and CUDA_VISIBLE_DEVICES
@@ -172,7 +170,7 @@ async def shutdown(  # pyright: ignore[reportIncompatibleMethodOverride]
     async def setup(self):
         # Set up policy_worker
         assert self.policy_worker is not None, "Policy worker should not be None"
-        await self.policy_worker.setup.call(store=self.store)
+        await self.policy_worker.setup.call()
 
         self.request_id = 0
         self.requests: Dict[str, tuple[None | ParentRequest, asyncio.Future]] = {}
@@ -395,7 +393,7 @@ def __post_init__(self):
         self.vllm_args = self.vllm_args.create_engine_config(UsageContext.LLM_CLASS)
 
     @endpoint
-    async def setup(self, store=None):
+    async def setup(self):
         # TODO: remove ["gpus"] when monarch implements a flat rank
         self.rank = current_rank()["gpus"]
         self.worker = self.setup_worker()
diff --git a/tests/integration_tests/test_policy_update.py b/tests/integration_tests/test_policy_update.py
@@ -186,15 +186,12 @@ def get_configs(
 
 
 @pytest_asyncio.fixture(scope="session")
-async def llama3_torchstore_setup():
+async def setup_test():
     """
-    Pytest fixture to load Llama 3.1 8B-Instruct. We use the loaded state dict as SOT for validation.
-    Uses session scope so it's only called once when both tests are run.
+    Pytest fixture to load Llama 3.1 8B-Instruct. We use the loaded state dict
+    as the SOT for validation. Uses session scope so it's only called once
+    across UT.
     """
-    print("=== PHASE 1: Writing Llama 3.1 8B-Instruct to TorchStore ===")
-
-    store = await ts.initialize()
-
     model_path = "meta-llama/Meta-Llama-3.1-8B-Instruct"
 
     # Load the model from local path - using device_map="auto" for efficient loading
@@ -207,76 +204,70 @@ async def llama3_torchstore_setup():
 
     original_state_dict = model.state_dict()
     print(f"Original state dict has {len(original_state_dict)} parameters")
-    converted_state_dict = convert_state_dict(original_state_dict)
-    print(f"Converted state dict has {len(converted_state_dict)} parameters")
+    hf_state_dict = convert_state_dict(original_state_dict)
+    print(f"Converted state dict has {len(hf_state_dict)} parameters")
 
-    return store, converted_state_dict
+    return hf_state_dict
 
 
 async def run_rl_trainer(worker_size) -> None:
     """
-    1. Spawn the trainer.
-    2. Inject torchstore references via setup call.
-    2. Call push weights.
+    Spawn the RL trainer
+    Args:
+        worker_size: Number of workers/procs.
     """
     cfg: DictConfig = OmegaConf.load("apps/rl/llama3_8b.yaml")
     rl_trainer = await spawn_service(
-        ServiceConfig(procs_per_replica=1, with_gpus=True, num_replicas=1),
+        ServiceConfig(procs_per_replica=worker_size, with_gpus=True, num_replicas=1),
         RLTrainer,
         **cfg.trainer,
     )
     # Push the weights to torchstore
     await rl_trainer.push_weights.choose()
 
 
-async def run_policy_integration(store, worker_size) -> Dict[str, torch.Tensor]:
+async def run_policy_integration(worker_size) -> Dict[str, torch.Tensor]:
     """
-    Common helper function to test Policy integration with different GPU configurations.
+    Launch the policy service.
 
     Args:
         store: TorchStore instance
-        original_state_dict: Original state dict for validation
-        num_gpus: Number of GPUs to use (1 for single GPU, 2+ for tensor parallel)
+        worker_size: Number of workers/procs (2+ for tensor parallel)
     """
-    print(f"=== PHASE 2: Testing Policy Integration (Workers: {worker_size}) ===")
+    print(f"=== PHASE 2: Launching Policy Engine (Workers: {worker_size}) ===")
 
     policy_config, service_config = get_configs(
         worker_size=worker_size, model_name="meta-llama/Llama-3.1-8B-Instruct"
     )
-    policy = await spawn_service(
-        service_config, Policy, config=policy_config, store=store
-    )
+    policy = await spawn_service(service_config, Policy, config=policy_config)
 
     # Policy engine start with default version 0 that gets incremented.
     print("Calling Policy.update() to load weights from torchstore...")
     await policy.update_weights.call()
     print(
         "Successfully called Policy.update_weights() to load weights from torchstore!"
     )
-    # We get the result as a list.
-    #results = await policy._get_model_params.call()
-    #assert len(results) == 1
-    #print("Successfully got model state dict after update")
-    #return results[0]
-    return {}
+    results = await policy._get_model_params.call()
+    assert len(results) == 1
+    print("Successfully got model state dict after update")
+    return results[0]
 
 
 @pytest.mark.asyncio
 @requires_cuda
-async def test_llama3_policy_update_single():
+async def test_llama3_policy_update_single(setup_test):
     print("Starting Llama 3 8B torchstore test (single GPU)...")
 
-    # store, original_state_dict = llama3_torchstore_setup
     await ts.initialize()
+    expected_state_dict = setup_test
     await run_rl_trainer(worker_size=1)
-    loaded_state_dict = await run_policy_integration(None, worker_size=1)
-    assert False, "Planned failure"
+    loaded_state_dict = await run_policy_integration(worker_size=1)
 
     # validating for single resource case.
-    # validate_loaded_tensors_equals_original(
-    #    loaded_state_dict, original_state_dict, tensor_parallel_size=0, rank=0
-    # )
-
+    validate_loaded_tensors_equals_original(
+       loaded_state_dict, expected_state_dict, tensor_parallel_size=0, rank=0
+    )
     print(
         "Single GPU test passed! Llama 3.1 8B-Instruct model successfully loaded into Policy via TorchStore!"
     )
+    assert False, "Planned failure"