-
Notifications
You must be signed in to change notification settings - Fork 1.1k
feat(verl): add unexpected tool call filtering #467
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -40,6 +40,12 @@ | |
| import agentlightning as agl | ||
| from agentlightning.env_var import LightningEnvVar, resolve_bool_env_var, resolve_str_env_var | ||
|
|
||
| # Ensure venv bin is in PATH (needed for uvx/mcp-server-calculator in Ray workers) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Some unnecessary changes to this file. Only related config should be included here I think. |
||
| _script_dir = os.path.dirname(os.path.abspath(__file__)) | ||
| _venv_bin = os.path.join(_script_dir, "..", "..", ".venv", "bin") | ||
| if os.path.isdir(_venv_bin): | ||
| os.environ["PATH"] = os.path.abspath(_venv_bin) + ":" + os.environ.get("PATH", "") | ||
|
|
||
|
|
||
| def verl_default_config() -> Dict[str, Any]: | ||
| config = { | ||
|
|
@@ -123,6 +129,11 @@ def train( | |
| trajectory_level: bool = False, | ||
| weave: bool, | ||
| mongo_uri: Optional[str], | ||
| filter_unexpected_tool_calls: bool = False, | ||
| experiment_name: Optional[str] = None, | ||
| n_gpus: int = 1, | ||
| checkpoint_dir: str = "/home/jovyan/msra/experiments/checkpoints", | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you please explain about this line? It seems that this path belongs to someone else? |
||
| resume: bool = False, | ||
| ): | ||
| """The training entrypoint function for Calc-X agent with VERL algorithm. | ||
|
|
||
|
|
@@ -141,6 +152,7 @@ def train( | |
| trajectory_level: Whether to enable trajectory level in trace aggregator. | ||
| weave: Whether to enable Weave tracing. | ||
| mongo_uri: MongoDB URI to use for the store. | ||
| experiment_name: Custom experiment name for W&B logging. | ||
| """ | ||
| # Load datasets (respect CLI file paths) | ||
| train_dataset = cast(agl.Dataset[MathProblem], HuggingFaceDataset.from_parquet(train_file).to_list()) # type: ignore | ||
|
|
@@ -156,6 +168,26 @@ def train( | |
| if model: | ||
| config["actor_rollout_ref"]["model"]["path"] = model | ||
|
|
||
| # Override experiment name if provided (for W&B logging) | ||
| if experiment_name: | ||
| config["trainer"]["experiment_name"] = experiment_name | ||
| print(f"Using custom experiment name: {experiment_name}") | ||
|
|
||
| # Override n_gpus_per_node for multi-GPU training | ||
| if n_gpus > 1: | ||
| config["trainer"]["n_gpus_per_node"] = n_gpus | ||
| print(f"Multi-GPU training enabled: n_gpus_per_node={n_gpus}") | ||
|
|
||
| # Set checkpoint directory and conversation dump directory | ||
| config["trainer"]["default_local_dir"] = checkpoint_dir | ||
| config["trainer"]["resume_mode"] = "auto" if resume else "disable" | ||
| conversations_dir = checkpoint_dir.replace("checkpoints", "conversations") | ||
| config["trainer"]["rollout_data_dir"] = conversations_dir | ||
| os.makedirs(conversations_dir, exist_ok=True) | ||
| print(f"Checkpoint directory: {checkpoint_dir}") | ||
| print(f"Conversations directory: {conversations_dir}") | ||
| print(f"Resume mode: {config['trainer']['resume_mode']}") | ||
|
|
||
| # Enable LoRA configuration if requested | ||
| if lora: | ||
| config["actor_rollout_ref"]["model"]["lora_rank"] = lora_rank | ||
|
|
@@ -175,6 +207,19 @@ def train( | |
| } | ||
| print("Trajectory level enabled in trace aggregator.") | ||
|
|
||
| # ========================================================================= | ||
| # Tool Call Filtering (Youtu-Agent style) | ||
| # Filters out turns where the model generates unexpected content after | ||
| # a tool call (hallucinated tool responses). Helps prevent entropy explosion. | ||
| # ========================================================================= | ||
| if filter_unexpected_tool_calls: | ||
| if "agentlightning" not in config: | ||
| config["agentlightning"] = {"trace_aggregator": {}} | ||
| if "trace_aggregator" not in config["agentlightning"]: | ||
| config["agentlightning"]["trace_aggregator"] = {} | ||
| config["agentlightning"]["trace_aggregator"]["filter_unexpected_tool_calls"] = True | ||
| print("Tool call filtering enabled (Youtu-Agent style).") | ||
|
|
||
| # CI toggle keeps everything else the same but you can tweak the lightweight bits here if desired | ||
| if ci or ci_fast: | ||
| # Config the experiment name and project name so that they are available to CI | ||
|
|
@@ -290,6 +335,35 @@ def main(): | |
| default=None, | ||
| help="MongoDB URI to use for the store.", | ||
| ) | ||
| parser.add_argument( | ||
| "--filter-unexpected-tool-calls", | ||
| action="store_true", | ||
| help="Enable Youtu-Agent style tool call filtering. " | ||
| "Filters out turns where the model generates unexpected content after a tool call.", | ||
| ) | ||
| parser.add_argument( | ||
| "--experiment-name", | ||
| type=str, | ||
| default=None, | ||
| help="Custom experiment name for W&B logging (default: calc_x or auto-generated for CI)", | ||
| ) | ||
| parser.add_argument( | ||
| "--n-gpus", | ||
| type=int, | ||
| default=1, | ||
| help="Number of GPUs per node for distributed training (default: 1)", | ||
| ) | ||
| parser.add_argument( | ||
| "--checkpoint-dir", | ||
| type=str, | ||
| default="/home/jovyan/msra/experiments/checkpoints", | ||
| help="Directory to save checkpoints (default: /home/jovyan/msra/experiments/checkpoints)", | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also here
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you for your careful review and for raising this question. To clarify, /home/jovyan is not a specific person's directory—it is the default home directory name on the OpenHPC server provided by my university (GIST). The msra folder is my personal working directory that I created specifically for this project, which is also linked to my GitHub repository. I have attached screenshots of my university's HPC-AI Service Portal as evidence. As you can see, /home/jovyan is the default home directory automatically assigned when a workspace is created on this server.
|
||
| ) | ||
| parser.add_argument( | ||
| "--resume", | ||
| action="store_true", | ||
| help="Resume training from the latest checkpoint in checkpoint-dir", | ||
| ) | ||
|
|
||
| args = parser.parse_args() | ||
|
|
||
|
|
@@ -321,6 +395,11 @@ def main(): | |
| trajectory_level=args.trajectory_level, | ||
| weave=args.weave, | ||
| mongo_uri=args.mongo_uri, | ||
| filter_unexpected_tool_calls=args.filter_unexpected_tool_calls, | ||
| experiment_name=args.experiment_name, | ||
| n_gpus=args.n_gpus, | ||
| checkpoint_dir=args.checkpoint_dir, | ||
| resume=args.resume, | ||
| ) | ||
|
|
||
|
|
||
|
|
||

There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Small comment: only set the logging metrics visible when
self.tool_parseris not None.