forked from microsoft/agent-lightning
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtrain_rag.py
More file actions
200 lines (161 loc) · 6.84 KB
/
train_rag.py
File metadata and controls
200 lines (161 loc) · 6.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
# Copyright (c) Microsoft. All rights reserved.
"""Train a RAG agent using Agent-lightning.
Usage:
python train_rag.py fast # Fast training for CI/testing
python train_rag.py single_gpu # Optimized for Single GPU (1.5B/7B models)
"""
from __future__ import annotations
import argparse
import os
import uuid
from copy import deepcopy
from datetime import datetime
from typing import Any, Dict, List, Optional
import pandas as pd
from rag_agent import RAGAgent # Make sure to import your RAGAgent class
import agentlightning as agl
# Base configuration (default configuration, can be overridden)
RL_TRAINING_CONFIG: Dict[str, Any] = {
"algorithm": {
"adv_estimator": "grpo", # Use GRPO algorithm
"use_kl_in_reward": False,
},
"data": {
"train_batch_size": 16, # Default configuration for multi-GPU
"max_prompt_length": 8192,
"max_response_length": 2048,
"truncation": "error",
},
"actor_rollout_ref": {
"rollout": {
"tensor_model_parallel_size": 1,
"n": 4, # Generate 4 responses per sampling
"log_prob_micro_batch_size_per_gpu": 4,
"multi_turn": {"format": "hermes"}, # Ensure using template format matching the model
"name": "vllm",
"gpu_memory_utilization": 0.6, # vLLM GPU memory utilization
"engine_kwargs": {
"vllm": {
"enable_auto_tool_choice": True,
"tool_call_parser": "hermes",
}
},
},
"actor": {
"ppo_mini_batch_size": 16,
"ppo_micro_batch_size_per_gpu": 4,
"optim": {"lr": 1e-6},
"use_kl_loss": False,
"kl_loss_coef": 0.0,
"entropy_coeff": 0,
"clip_ratio_low": 0.2,
"clip_ratio_high": 0.3,
"fsdp_config": {
"param_offload": True, # Enable parameter offloading to save GPU memory
"optimizer_offload": True,
},
},
"ref": {
"log_prob_micro_batch_size_per_gpu": 8,
"fsdp_config": {"param_offload": True},
},
"model": {
"path": "Qwen/Qwen2.5-1.5B-Instruct", # Default model
"use_remove_padding": True,
"enable_gradient_checkpointing": True,
},
},
"trainer": {
"n_gpus_per_node": 1,
"val_before_train": True,
"critic_warmup": 0,
"logger": ["console"], # Disable wandb for easier local debugging, add back when needed
"project_name": "AgentLightning",
"experiment_name": "rag_agent",
"nnodes": 1,
"test_freq": 10,
"total_epochs": 200,
},
}
def config_train_fast() -> Dict[str, Any]:
"""Fast training configuration for CI/testing"""
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
random_suffix = uuid.uuid4().hex[:8]
EXPERIMENT_NAME = f"rag_fast_{timestamp}_{random_suffix}"
PROJECT_NAME = "AgentLightningCI"
# Simulate writing to $GITHUB_OUTPUT if it’s set
github_output = os.getenv("GITHUB_OUTPUT")
if github_output:
with open(github_output, "a") as f:
f.write(f"project_name={PROJECT_NAME}\n")
f.write(f"run_name={EXPERIMENT_NAME}\n")
print("Set environment variables:")
print(f"PROJECT_NAME={PROJECT_NAME}")
print(f"EXPERIMENT_NAME={EXPERIMENT_NAME}")
config = deepcopy(RL_TRAINING_CONFIG)
# Keep it tiny/light without adding new knobs
config["actor_rollout_ref"]["rollout"]["gpu_memory_utilization"] = 0.8
config["trainer"]["total_epochs"] = 2
config["trainer"]["test_freq"] = 5
config["trainer"]["experiment_name"] = EXPERIMENT_NAME
config["trainer"]["project_name"] = PROJECT_NAME
config["trainer"]["logger"] = ["console", "wandb"]
return config
def config_train_single_gpu() -> Dict[str, Any]:
"""Single GPU training optimized configuration (optimized for 24GB GPU memory)"""
config = deepcopy(RL_TRAINING_CONFIG)
# 1. Reduce vLLM memory usage to leave space for training
config["actor_rollout_ref"]["rollout"]["gpu_memory_utilization"] = 0.4
# 2. Reduce Batch Size to prevent OOM
config["data"]["train_batch_size"] = 4
config["actor_rollout_ref"]["actor"]["ppo_mini_batch_size"] = 4
config["actor_rollout_ref"]["actor"]["ppo_micro_batch_size_per_gpu"] = 1
config["actor_rollout_ref"]["rollout"]["log_prob_micro_batch_size_per_gpu"] = 2
# 3. Ensure Offload is enabled
config["actor_rollout_ref"]["actor"]["fsdp_config"]["param_offload"] = True
config["actor_rollout_ref"]["actor"]["fsdp_config"]["optimizer_offload"] = True
return config
def train(config: Dict[str, Any], active_agent: Optional[str]) -> None:
"""Train the RAG agent with the given configuration."""
# 1. Instantiate your Agent
agent = RAGAgent()
# 2. Initialize algorithm (VERL)
algorithm = agl.VERL(config)
# 3. Initialize Trainer
# n_runners=4 means 4 concurrent rollout runners (can be reduced if insufficient memory, or managed internally by VERL)
trainer = agl.Trainer(n_runners=4, algorithm=algorithm, adapter={"agent_match": active_agent})
# 4. Load data
# NOTE: Fill in the path to your previously converted parquet file here
# For demo purposes, we use the same dataset for training and validation,
# which should be avoided in production.
train_df: pd.DataFrame = pd.read_parquet("data/dataset_tiny.parquet") # type: ignore
val_df: pd.DataFrame = pd.read_parquet("data/dataset_tiny.parquet") # type: ignore
# Keep the rest of the code unchanged
train_data: List[Dict[str, Any]] = train_df.to_dict(orient="records") # type: ignore
val_data: List[Dict[str, Any]] = val_df.to_dict(orient="records") # type: ignore
# 5. Start training
trainer.fit(agent, train_dataset=train_data, val_dataset=val_data)
def main() -> None:
parser = argparse.ArgumentParser(description="Train a RAG agent using different configurations")
parser.add_argument(
"config",
choices=["fast", "single_gpu"],
default="single_gpu",
nargs="?",
help="Training configuration name",
)
parser.add_argument("--active-agent", type=str, help="Override the active agent name")
args = parser.parse_args()
config_functions = {
"fast": config_train_fast,
"single_gpu": config_train_single_gpu,
}
config = config_functions[args.config]()
# Print key information for confirmation
print(f"Starting training with '{args.config}' configuration...")
print(f"Model: {config['actor_rollout_ref']['model']['path']}")
print(f"Batch Size: {config['data']['train_batch_size']}")
print(f"GPU Mem Util: {config['actor_rollout_ref']['rollout']['gpu_memory_utilization']}")
train(config, args.active_agent)
if __name__ == "__main__":
main()