modelscope
diff --git a/‎ajet/copilot/job.py‎
Lines changed: 6 additions & 5 deletions b/‎ajet/copilot/job.py‎
Lines changed: 6 additions & 5 deletions
diff --git a/‎ajet/default_config/ajet_default.yaml‎
Lines changed: 2 additions & 0 deletions b/‎ajet/default_config/ajet_default.yaml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎ajet/default_config/ajet_ts_default.yaml‎
Lines changed: 323 additions & 0 deletions b/‎ajet/default_config/ajet_ts_default.yaml‎
Lines changed: 323 additions & 0 deletions
@@ -54,13 +54,14 @@ def __init__(
         self.config.ajet.trainer_common.algorithm.adv_estimator = algorithm
         if n_gpu_for_infer is None and backbone == "trinity":
             raise ValueError("Please specify `n_gpu_for_infer` (n_gpu_for_infer < n_gpu) for trinity backbone.")
-        if n_gpu_for_infer is not None and backbone == "verl":
+        if (n_gpu_for_infer is not None) and backbone == "verl":
             raise ValueError("n_gpu_for_infer is only for trinity backbone, please set it to `None`.")
         else:
-            assert isinstance(n_gpu_for_infer, int)
-            assert n_gpu_for_infer < n_gpu, "`n_gpu_for_infer` should be less than `n_gpu`."
-            self.config.ajet.rollout.n_vllm_engine = n_gpu_for_infer
-            self.config.ajet.rollout.tensor_model_parallel_size = 1
+            if backbone == "trinity":
+                assert isinstance(n_gpu_for_infer, int), f"`n_gpu_for_infer` should be int, got {type(n_gpu_for_infer)}."
+                assert n_gpu_for_infer < n_gpu, "`n_gpu_for_infer` should be less than `n_gpu`."
+                self.config.ajet.rollout.n_vllm_engine = n_gpu_for_infer
+                self.config.ajet.rollout.tensor_model_parallel_size = 1
 
     def build_job_from_yaml(self, yaml_path: str | None) -> dict:
         self.exp_name = datetime.now().strftime("ajet_job_%Y%m%d_%H%M%S")
 
@@ -86,6 +86,7 @@ ajet:
 
 
   task_reader:
+    # how to read dataset / environment
     type: huggingface_dat_repo # `env_service` or `jsonl_dataset_file` or `huggingface_dat_repo` or `data_generation` or `random_dummy`
 
     # when `type == jsonl_dataset_file`
@@ -284,6 +285,7 @@ ajet:
   enable_tinkerscript_mode: False
   # both tinkerscript / oai share the same interchange server
   enable_experimental_interchange_server: False
+  # interchange server configuration
   interchange_server:
     interchange_method: 'ipc' # options: 'tcp' (multi-nodes) or  'ipc' (1 node)
     interchange_server_port: 'auto'
 
@@ -0,0 +1,323 @@
+# ------------------ main configuration ------------------
+ajet:
+  project_name: "ajet_default_project"
+  experiment_name: "read_yaml_name"
+  experiment_dir: "auto"  # {exp-dir}/{experiment_name}
+  backbone: debug # `debug` or `trinity` or `verl`
+
+
+  model:
+    # which model should be trained
+    path: /path/to/model/such/as/Qwen/Qwen2___5-14B-Instruct
+
+  data:
+    # max number of tokens for prompt
+    max_prompt_length: 3000
+    # max number of tokens for response
+    max_response_length: 15000
+    # how many tasks per training batch
+    train_batch_size: 32
+    # [Hint]: The final number of samples per update will be: N_{sample} = (data.train_batch_size * rollout.num_repeat * rollout.multi_turn.expected_steps)
+
+
+  rollout:
+
+    # the path to the workflow class
+    user_workflow: tutorial.example_appworld.appworld->ExampleAgentScopeWorkflow
+
+    # whether or not to disable all tool calls
+    force_disable_toolcalls: False
+
+    # maximum number of parallel environments / simulate workers
+    max_env_worker: 64
+
+    # step reward gamma (experimental, do not change)
+    gamma: 1.0
+
+    # monitor LLM's abormal behaviors during rollout
+    compute_madness_checklist:
+      - "nonsense"
+    # send signal to terminate context tracing when LLM is losing control
+    agent_madness_termination: True # terminate_after_gone_mad
+    # punish the LLM when it is detected as lost control
+    agent_madness_reward: -1.0
+
+    # max response length in one turn
+    max_response_length_in_one_turn: 4096
+
+    # max token length allowed for the model during rollout
+    max_model_len: 18000
+
+    multi_turn:
+      # how many samples should be collected for each task run
+      max_sample_per_task: 30
+      # limit the maximum steps for each task
+      max_steps: 30
+      # the expected steps for each task, used to calculate the training batch size for trinity
+      expected_steps: 1
+
+    # TP size for rollout engine
+    tensor_model_parallel_size: 1
+
+    # the number of vllm engines, number of gpus for infer is `n_vllm_engine*tensor_model_parallel_size`, this argument is NOT effective when NOT using trinity
+    n_vllm_engine: 1
+
+    # how many sequences are allowed to be processed in parallel by each vllm engine
+    max_num_seqs: 10
+
+    # the usage of infer engine, options: (vllm, sglang)
+    name: vllm
+
+    # how many times a task should be repeated
+    num_repeat: 4
+
+    # rollout kwargs
+    temperature: 0.9
+    top_p: 1.0
+
+    # validation kwargs
+    val_kwargs:
+      # when doing validation, the sample setting when generating response
+      temperature: 0.0
+      top_k: -1
+      top_p: 1.0
+      do_sample: False
+      num_repeat: 1
+
+
+  task_reader:
+    # how to read dataset / environment
+    type: huggingface_dat_repo # `env_service` or `jsonl_dataset_file` or `huggingface_dat_repo` or `data_generation` or `random_dummy`
+
+    # when `type == jsonl_dataset_file`
+    jsonl_dataset_file:
+      training:
+        file_path: "/path/to/training/data.jsonl"
+      validation:
+        file_path: "/path/to/validation/data.jsonl"
+
+    # when `type == env_service`
+    env_service:
+      env_type: "appworld"
+      env_url: "http://127.0.0.1:8080"
+      env_action_preference: code # code, text, box
+      training_split: train
+      validation_split: dev
+
+    # when `type == huggingface_dat_repo`
+    huggingface_dat_repo:
+      dataset_path: "gsm8k"
+      training_split: "train"
+      validation_split: "validation"
+
+    # when `type == data_generation`
+    data_generation:
+      document_reader:
+        document_path:
+          - 'dataset/document/your-document1.pdf'
+          - 'dataset/document/your-document2.pdf'
+        languages:
+          - eng
+        chunk_size: 5120
+        split_by: "sentence"
+        cache_enabled: true
+      query_reader:
+        type: jsonl_dataset_file
+        jsonl_dataset_file:
+          training:
+            file_path: 'dataset/jsonl/your-queries.jsonl'
+      task_num: 10
+      llm_model: qwen-long
+      llm_response_length: 8192
+      num_workers: 32
+      sampling_params:
+        temperature: 0
+      deduplication_filter:
+        enabled: true
+        params:
+          similarity_threshold: 0.8
+          db_path: ./.similarity_db
+          model: text-embedding-v4
+          api_key: null # load from the env
+          base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
+
+
+  task_judge:
+    judge_type: customized_protocol  # Options: 'customized_protocol', 'rubrics_auto_grader'
+
+    # when `judge_type == customized_protocol`
+    judge_protocol: ajet.task_judge.env_service_as_judge->EnvServiceJudge
+
+    # the helper LLM model used for LLM-AS-Judge
+    alien_llm_model: qwen3-235b-a22b-instruct-2507
+    alien_llm_response_length: 512
+
+    # when `judge_type == rubrics_auto_grader`
+    rubrics_auto_grader:
+      model_name: qwen-max
+      grader_mode: pointwise
+      language: en
+      query_specific_generate_number: 1
+      enable_categorization: false
+      categories_number: 5
+      grader_name: "auto_grader"
+      query_field: main_query
+      answer_field: final_answer
+      reference_field: answer
+      custom_evaluation_prompt: null # dict or PromptTemplate or None
+      input_data_type: jsonl_dataset_file # `env_service` or `jsonl_dataset_file` or `huggingface_dat_repo`
+      jsonl_dataset_file:
+        training:
+          file_path: "tutorial/example_rm_auto_grader/rubrics_train.jsonl"
+      # Pointwise mode settings
+      min_score: 0
+      max_score: 1
+
+
+
+  # context tracker protocol is valid ONLY when `use_agentscope_protocol=False`
+  context_tracker:
+
+    # timeline merging policy used in Context Tracker
+    timeline_merging_policy:
+
+      # compare_level = "text":  relaxed compare with text, more easier to match, at very little cost
+      # compare_level = "token": strict compare with token, cause less aggressive merging
+      timeline_compare_level: "text"  # options: "text", "token"
+
+      # whether or not to ignore tool calls when comparing steps, default to `True` to make merging more aggressive
+      ignore_tools: True
+
+    # Fix Retokenization Drift: inconsistencies between training and inference token array
+    # Related reading: https://github.com/vllm-project/vllm/pull/22587 (note that the implementation is very different)
+    fix_retokenization_drift: True
+
+    # log tool format check results
+    log_tool_format_check: False
+
+    # log tool format check results
+    log_tool_format_error_detail: False
+
+    # detect at which point timeline stop growing linearly and cause a snap during a episode: this will cause additional computation.
+    detect_timeline_snap: False
+
+    # deprecated
+    alien_llm_model: qwen3-235b-a22b-instruct-2507
+
+    # deprecated
+    alien_llm_response_length: 512
+
+
+  # when backbone is `debug`, debug related configurations
+  debug:
+
+    # max parallel runners in debug mode
+    debug_max_parallel: 4
+
+    # how many task to sample from training set
+    debug_first_n_tasks: 2
+
+    # what is the vllm engine port in the background
+    debug_vllm_port: 18000
+
+    # what is the seed of the vllm engine in the background
+    debug_vllm_seed: 12345
+
+    # what is the TP size in debug mode
+    debug_tensor_parallel_size: 4
+
+
+  # trainer common configurations
+  trainer_common:
+
+    # validation before training
+    val_before_train: False
+    val_pass_n: 4
+
+    # save and test frequency (in step)
+    save_freq: 20
+    test_freq: 20
+
+    # total training epochs
+    total_epochs: 50
+
+    nnodes: 1
+    n_gpus_per_node: 8
+
+    # logger selection
+    logger: swanlab
+
+    # algorithm setting
+    algorithm:
+      adv_estimator: grpo
+      use_kl_in_reward: False
+
+    # number of optimizer.step per big batch
+    mini_batch_num: 1
+
+    # verl offload configs
+    fsdp_config:
+      param_offload: True
+      optimizer_offload: True
+
+    # learning rate
+    optim:
+      lr: 1e-6
+
+    # enable KL loss regularization
+    use_kl_loss: True
+
+    # kl divergence loss coefficient
+    kl_loss_coef: 0.002
+    kl_loss_type: low_var_kl
+
+    # Ulysses specific configs
+    ulysses_sequence_parallel_size: 1
+
+    # base directory to save checkpoints
+    checkpoint_base_dir: ./saved_checkpoints
+
+    # whether to save train/eval trajectories to JSON files
+    save_trajectory_as_json_file: False
+
+
+  # the experimental ZeroMQ interchange server feature that allows `tuner.as_oai_baseurl_apikey` feature
+  enable_tinkerscript_mode: True
+  # both tinkerscript / oai share the same interchange server
+  enable_experimental_interchange_server: True
+  # interchange server configuration
+  interchange_server:
+    interchange_method: 'ipc' # options: 'tcp' (multi-nodes) or  'ipc' (1 node)
+    interchange_server_port: 10086
+    num_fastapi_process: 4  # 1, 2 or 4 is fine
+    max_fastapi_threads: 128  # 64 or 128 is fine
+    max_inference_tracker_threads: 64 # recommend to be equal to `ajet.rollout.max_env_worker`
+
+
+  task_runner:
+    # submit llm infer submit method
+    llm_infer_submit_method: "async" # options: "sync", "async"
+
+    # how to wrap the user-defined workflow
+    wrapper_type: "asyncio-with-gc"
+    # - wrapper_type: "asyncio-with-gc":  safe, with periodic garbage collection to prevent event loop leaks (recommended)
+    # - wrapper_type: "asyncio":          fast, but may cause event loop leak in long run
+    # - wrapper_type: "multi-processing": safe, but resource consuming
+
+    # when `wrapper_type` is `multi-processing`, the timeout for each task
+    wrapper_multiprocessing_timeout: 3600  # in seconds
+
+  # DO NOT EDIT, FOR ROBOT TESTING PURPOSE ONLY. NOT FOR HUMAN.
+  execute_test: False        # DO NOT EDIT, FOR ROBOT TESTING PURPOSE ONLY. NOT FOR HUMAN.
+  execute_testing_lambda: "" # DO NOT EDIT, FOR ROBOT TESTING PURPOSE ONLY. NOT FOR HUMAN.
+
+
+# ------------------ do not edit ------------------
+hydra:
+  searchpath:
+    - file://ajet/default_config/verl
+
+# ------------------ do not edit ------------------
+defaults:
+  - verl_default    # verl inherit 1/1
+  - _self_