@@ -470,7 +470,7 @@ async def simple_rl_step():
470470 if batch is not None :
471471 print (" Training on batch..." )
472472 inputs, targets = batch # GRPO returns (inputs, targets) tuple
473- loss = await trainer.train_step.call(inputs, targets) # RLTrainer is an actor
473+ loss = await trainer.train_step.call(inputs, targets) # TitanTrainer is an actor
474474 print (f " Training loss: { loss} " )
475475 return loss
476476 else :
@@ -507,7 +507,7 @@ reward_actor = await RewardActor.options(
507507)
508508
509509# Training needs fewer but more powerful replicas
510- trainer = await RLTrainer .options(
510+ trainer = await TitanTrainer .options(
511511 procs = 1 , with_gpus = True # Fewer but GPU-heavy
512512).as_actor( # Trainer typically uses .as_actor() not .as_service()
513513 model = {" name" : " qwen3" , " flavor" : " 1.7B" },
@@ -580,7 +580,7 @@ import torch
580580from forge.actors.generator import Generator as Policy
581581from forge.actors.reference_model import ReferenceModel
582582from forge.actors.replay_buffer import ReplayBuffer
583- from forge.actors.trainer import RLTrainer
583+ from forge.actors.trainer import TitanTrainer
584584from apps.grpo.main import DatasetActor, RewardActor, ComputeAdvantages
585585from forge.data.rewards import MathReward, ThinkingReward
586586
@@ -603,7 +603,7 @@ print("Initializing all services...")
603603 engine_config = {" model" : " Qwen/Qwen3-1.7B" , " tensor_parallel_size" : 1 },
604604 sampling_config = {" n" : 1 , " max_tokens" : 512 }
605605 ),
606- RLTrainer .options(procs = 1 , with_gpus = True ).as_actor(
606+ TitanTrainer .options(procs = 1 , with_gpus = True ).as_actor(
607607 model = {" name" : " qwen3" , " flavor" : " 1.7B" , " hf_assets_path" : " hf://Qwen/Qwen3-1.7B" },
608608 optimizer = {" name" : " AdamW" , " lr" : 1e-5 },
609609 training = {" local_batch_size" : 2 , " seq_len" : 2048 }
@@ -667,7 +667,7 @@ print("Shutting down services...")
667667await asyncio.gather(
668668 DatasetActor.shutdown(dataloader),
669669 policy.shutdown(),
670- RLTrainer .shutdown(trainer),
670+ TitanTrainer .shutdown(trainer),
671671 ReplayBuffer.shutdown(replay_buffer),
672672 ComputeAdvantages.shutdown(compute_advantages),
673673 ReferenceModel.shutdown(ref_model),
0 commit comments