Skip to content

Commit ae13326

Browse files
committed
feat: implement TinkerScript server functionality and enhance configuration syncing
1 parent 81e2b7d commit ae13326

File tree

10 files changed

+658
-114
lines changed

10 files changed

+658
-114
lines changed

ajet/copilot/job.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -54,13 +54,14 @@ def __init__(
5454
self.config.ajet.trainer_common.algorithm.adv_estimator = algorithm
5555
if n_gpu_for_infer is None and backbone == "trinity":
5656
raise ValueError("Please specify `n_gpu_for_infer` (n_gpu_for_infer < n_gpu) for trinity backbone.")
57-
if n_gpu_for_infer is not None and backbone == "verl":
57+
if (n_gpu_for_infer is not None) and backbone == "verl":
5858
raise ValueError("n_gpu_for_infer is only for trinity backbone, please set it to `None`.")
5959
else:
60-
assert isinstance(n_gpu_for_infer, int)
61-
assert n_gpu_for_infer < n_gpu, "`n_gpu_for_infer` should be less than `n_gpu`."
62-
self.config.ajet.rollout.n_vllm_engine = n_gpu_for_infer
63-
self.config.ajet.rollout.tensor_model_parallel_size = 1
60+
if backbone == "trinity":
61+
assert isinstance(n_gpu_for_infer, int), f"`n_gpu_for_infer` should be int, got {type(n_gpu_for_infer)}."
62+
assert n_gpu_for_infer < n_gpu, "`n_gpu_for_infer` should be less than `n_gpu`."
63+
self.config.ajet.rollout.n_vllm_engine = n_gpu_for_infer
64+
self.config.ajet.rollout.tensor_model_parallel_size = 1
6465

6566
def build_job_from_yaml(self, yaml_path: str | None) -> dict:
6667
self.exp_name = datetime.now().strftime("ajet_job_%Y%m%d_%H%M%S")

ajet/default_config/ajet_default.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ ajet:
8686

8787

8888
task_reader:
89+
# how to read dataset / environment
8990
type: huggingface_dat_repo # `env_service` or `jsonl_dataset_file` or `huggingface_dat_repo` or `data_generation` or `random_dummy`
9091

9192
# when `type == jsonl_dataset_file`
@@ -284,6 +285,7 @@ ajet:
284285
enable_tinkerscript_mode: False
285286
# both tinkerscript / oai share the same interchange server
286287
enable_experimental_interchange_server: False
288+
# interchange server configuration
287289
interchange_server:
288290
interchange_method: 'ipc' # options: 'tcp' (multi-nodes) or 'ipc' (1 node)
289291
interchange_server_port: 'auto'
Lines changed: 323 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,323 @@
1+
# ------------------ main configuration ------------------
2+
ajet:
3+
project_name: "ajet_default_project"
4+
experiment_name: "read_yaml_name"
5+
experiment_dir: "auto" # {exp-dir}/{experiment_name}
6+
backbone: debug # `debug` or `trinity` or `verl`
7+
8+
9+
model:
10+
# which model should be trained
11+
path: /path/to/model/such/as/Qwen/Qwen2___5-14B-Instruct
12+
13+
data:
14+
# max number of tokens for prompt
15+
max_prompt_length: 3000
16+
# max number of tokens for response
17+
max_response_length: 15000
18+
# how many tasks per training batch
19+
train_batch_size: 32
20+
# [Hint]: The final number of samples per update will be: N_{sample} = (data.train_batch_size * rollout.num_repeat * rollout.multi_turn.expected_steps)
21+
22+
23+
rollout:
24+
25+
# the path to the workflow class
26+
user_workflow: tutorial.example_appworld.appworld->ExampleAgentScopeWorkflow
27+
28+
# whether or not to disable all tool calls
29+
force_disable_toolcalls: False
30+
31+
# maximum number of parallel environments / simulate workers
32+
max_env_worker: 64
33+
34+
# step reward gamma (experimental, do not change)
35+
gamma: 1.0
36+
37+
# monitor LLM's abormal behaviors during rollout
38+
compute_madness_checklist:
39+
- "nonsense"
40+
# send signal to terminate context tracing when LLM is losing control
41+
agent_madness_termination: True # terminate_after_gone_mad
42+
# punish the LLM when it is detected as lost control
43+
agent_madness_reward: -1.0
44+
45+
# max response length in one turn
46+
max_response_length_in_one_turn: 4096
47+
48+
# max token length allowed for the model during rollout
49+
max_model_len: 18000
50+
51+
multi_turn:
52+
# how many samples should be collected for each task run
53+
max_sample_per_task: 30
54+
# limit the maximum steps for each task
55+
max_steps: 30
56+
# the expected steps for each task, used to calculate the training batch size for trinity
57+
expected_steps: 1
58+
59+
# TP size for rollout engine
60+
tensor_model_parallel_size: 1
61+
62+
# the number of vllm engines, number of gpus for infer is `n_vllm_engine*tensor_model_parallel_size`, this argument is NOT effective when NOT using trinity
63+
n_vllm_engine: 1
64+
65+
# how many sequences are allowed to be processed in parallel by each vllm engine
66+
max_num_seqs: 10
67+
68+
# the usage of infer engine, options: (vllm, sglang)
69+
name: vllm
70+
71+
# how many times a task should be repeated
72+
num_repeat: 4
73+
74+
# rollout kwargs
75+
temperature: 0.9
76+
top_p: 1.0
77+
78+
# validation kwargs
79+
val_kwargs:
80+
# when doing validation, the sample setting when generating response
81+
temperature: 0.0
82+
top_k: -1
83+
top_p: 1.0
84+
do_sample: False
85+
num_repeat: 1
86+
87+
88+
task_reader:
89+
# how to read dataset / environment
90+
type: huggingface_dat_repo # `env_service` or `jsonl_dataset_file` or `huggingface_dat_repo` or `data_generation` or `random_dummy`
91+
92+
# when `type == jsonl_dataset_file`
93+
jsonl_dataset_file:
94+
training:
95+
file_path: "/path/to/training/data.jsonl"
96+
validation:
97+
file_path: "/path/to/validation/data.jsonl"
98+
99+
# when `type == env_service`
100+
env_service:
101+
env_type: "appworld"
102+
env_url: "http://127.0.0.1:8080"
103+
env_action_preference: code # code, text, box
104+
training_split: train
105+
validation_split: dev
106+
107+
# when `type == huggingface_dat_repo`
108+
huggingface_dat_repo:
109+
dataset_path: "gsm8k"
110+
training_split: "train"
111+
validation_split: "validation"
112+
113+
# when `type == data_generation`
114+
data_generation:
115+
document_reader:
116+
document_path:
117+
- 'dataset/document/your-document1.pdf'
118+
- 'dataset/document/your-document2.pdf'
119+
languages:
120+
- eng
121+
chunk_size: 5120
122+
split_by: "sentence"
123+
cache_enabled: true
124+
query_reader:
125+
type: jsonl_dataset_file
126+
jsonl_dataset_file:
127+
training:
128+
file_path: 'dataset/jsonl/your-queries.jsonl'
129+
task_num: 10
130+
llm_model: qwen-long
131+
llm_response_length: 8192
132+
num_workers: 32
133+
sampling_params:
134+
temperature: 0
135+
deduplication_filter:
136+
enabled: true
137+
params:
138+
similarity_threshold: 0.8
139+
db_path: ./.similarity_db
140+
model: text-embedding-v4
141+
api_key: null # load from the env
142+
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
143+
144+
145+
task_judge:
146+
judge_type: customized_protocol # Options: 'customized_protocol', 'rubrics_auto_grader'
147+
148+
# when `judge_type == customized_protocol`
149+
judge_protocol: ajet.task_judge.env_service_as_judge->EnvServiceJudge
150+
151+
# the helper LLM model used for LLM-AS-Judge
152+
alien_llm_model: qwen3-235b-a22b-instruct-2507
153+
alien_llm_response_length: 512
154+
155+
# when `judge_type == rubrics_auto_grader`
156+
rubrics_auto_grader:
157+
model_name: qwen-max
158+
grader_mode: pointwise
159+
language: en
160+
query_specific_generate_number: 1
161+
enable_categorization: false
162+
categories_number: 5
163+
grader_name: "auto_grader"
164+
query_field: main_query
165+
answer_field: final_answer
166+
reference_field: answer
167+
custom_evaluation_prompt: null # dict or PromptTemplate or None
168+
input_data_type: jsonl_dataset_file # `env_service` or `jsonl_dataset_file` or `huggingface_dat_repo`
169+
jsonl_dataset_file:
170+
training:
171+
file_path: "tutorial/example_rm_auto_grader/rubrics_train.jsonl"
172+
# Pointwise mode settings
173+
min_score: 0
174+
max_score: 1
175+
176+
177+
178+
# context tracker protocol is valid ONLY when `use_agentscope_protocol=False`
179+
context_tracker:
180+
181+
# timeline merging policy used in Context Tracker
182+
timeline_merging_policy:
183+
184+
# compare_level = "text": relaxed compare with text, more easier to match, at very little cost
185+
# compare_level = "token": strict compare with token, cause less aggressive merging
186+
timeline_compare_level: "text" # options: "text", "token"
187+
188+
# whether or not to ignore tool calls when comparing steps, default to `True` to make merging more aggressive
189+
ignore_tools: True
190+
191+
# Fix Retokenization Drift: inconsistencies between training and inference token array
192+
# Related reading: https://github.com/vllm-project/vllm/pull/22587 (note that the implementation is very different)
193+
fix_retokenization_drift: True
194+
195+
# log tool format check results
196+
log_tool_format_check: False
197+
198+
# log tool format check results
199+
log_tool_format_error_detail: False
200+
201+
# detect at which point timeline stop growing linearly and cause a snap during a episode: this will cause additional computation.
202+
detect_timeline_snap: False
203+
204+
# deprecated
205+
alien_llm_model: qwen3-235b-a22b-instruct-2507
206+
207+
# deprecated
208+
alien_llm_response_length: 512
209+
210+
211+
# when backbone is `debug`, debug related configurations
212+
debug:
213+
214+
# max parallel runners in debug mode
215+
debug_max_parallel: 4
216+
217+
# how many task to sample from training set
218+
debug_first_n_tasks: 2
219+
220+
# what is the vllm engine port in the background
221+
debug_vllm_port: 18000
222+
223+
# what is the seed of the vllm engine in the background
224+
debug_vllm_seed: 12345
225+
226+
# what is the TP size in debug mode
227+
debug_tensor_parallel_size: 4
228+
229+
230+
# trainer common configurations
231+
trainer_common:
232+
233+
# validation before training
234+
val_before_train: False
235+
val_pass_n: 4
236+
237+
# save and test frequency (in step)
238+
save_freq: 20
239+
test_freq: 20
240+
241+
# total training epochs
242+
total_epochs: 50
243+
244+
nnodes: 1
245+
n_gpus_per_node: 8
246+
247+
# logger selection
248+
logger: swanlab
249+
250+
# algorithm setting
251+
algorithm:
252+
adv_estimator: grpo
253+
use_kl_in_reward: False
254+
255+
# number of optimizer.step per big batch
256+
mini_batch_num: 1
257+
258+
# verl offload configs
259+
fsdp_config:
260+
param_offload: True
261+
optimizer_offload: True
262+
263+
# learning rate
264+
optim:
265+
lr: 1e-6
266+
267+
# enable KL loss regularization
268+
use_kl_loss: True
269+
270+
# kl divergence loss coefficient
271+
kl_loss_coef: 0.002
272+
kl_loss_type: low_var_kl
273+
274+
# Ulysses specific configs
275+
ulysses_sequence_parallel_size: 1
276+
277+
# base directory to save checkpoints
278+
checkpoint_base_dir: ./saved_checkpoints
279+
280+
# whether to save train/eval trajectories to JSON files
281+
save_trajectory_as_json_file: False
282+
283+
284+
# the experimental ZeroMQ interchange server feature that allows `tuner.as_oai_baseurl_apikey` feature
285+
enable_tinkerscript_mode: True
286+
# both tinkerscript / oai share the same interchange server
287+
enable_experimental_interchange_server: True
288+
# interchange server configuration
289+
interchange_server:
290+
interchange_method: 'ipc' # options: 'tcp' (multi-nodes) or 'ipc' (1 node)
291+
interchange_server_port: 10086
292+
num_fastapi_process: 4 # 1, 2 or 4 is fine
293+
max_fastapi_threads: 128 # 64 or 128 is fine
294+
max_inference_tracker_threads: 64 # recommend to be equal to `ajet.rollout.max_env_worker`
295+
296+
297+
task_runner:
298+
# submit llm infer submit method
299+
llm_infer_submit_method: "async" # options: "sync", "async"
300+
301+
# how to wrap the user-defined workflow
302+
wrapper_type: "asyncio-with-gc"
303+
# - wrapper_type: "asyncio-with-gc": safe, with periodic garbage collection to prevent event loop leaks (recommended)
304+
# - wrapper_type: "asyncio": fast, but may cause event loop leak in long run
305+
# - wrapper_type: "multi-processing": safe, but resource consuming
306+
307+
# when `wrapper_type` is `multi-processing`, the timeout for each task
308+
wrapper_multiprocessing_timeout: 3600 # in seconds
309+
310+
# DO NOT EDIT, FOR ROBOT TESTING PURPOSE ONLY. NOT FOR HUMAN.
311+
execute_test: False # DO NOT EDIT, FOR ROBOT TESTING PURPOSE ONLY. NOT FOR HUMAN.
312+
execute_testing_lambda: "" # DO NOT EDIT, FOR ROBOT TESTING PURPOSE ONLY. NOT FOR HUMAN.
313+
314+
315+
# ------------------ do not edit ------------------
316+
hydra:
317+
searchpath:
318+
- file://ajet/default_config/verl
319+
320+
# ------------------ do not edit ------------------
321+
defaults:
322+
- verl_default # verl inherit 1/1
323+
- _self_

0 commit comments

Comments
 (0)