chenyushuo
diff --git a/‎examples/learn2ask/data_prepare/2_build_dataset.py‎
Lines changed: 0 additions & 44 deletions b/‎examples/learn2ask/data_prepare/2_build_dataset.py‎
Lines changed: 0 additions & 44 deletions
diff --git a/‎examples/learn2ask/README.md‎ ‎examples/learn_to_ask/README.md‎examples/learn2ask/README.md renamed to examples/learn_to_ask/README.md
Lines changed: 10 additions & 10 deletions b/‎examples/learn2ask/README.md‎ ‎examples/learn_to_ask/README.md‎examples/learn2ask/README.md renamed to examples/learn_to_ask/README.md
Lines changed: 10 additions & 10 deletions
diff --git a/‎…/data_prepare/1_info_extract_pipeline.py‎ ‎…/data_prepare/1_info_extract_pipeline.py‎examples/learn2ask/data_prepare/1_info_extract_pipeline.py renamed to examples/learn_to_ask/data_prepare/1_info_extract_pipeline.py
Lines changed: 17 additions & 11 deletions b/‎…/data_prepare/1_info_extract_pipeline.py‎ ‎…/data_prepare/1_info_extract_pipeline.py‎examples/learn2ask/data_prepare/1_info_extract_pipeline.py renamed to examples/learn_to_ask/data_prepare/1_info_extract_pipeline.py
Lines changed: 17 additions & 11 deletions
diff --git a/‎examples/learn_to_ask/data_prepare/2_build_dataset.py‎
Lines changed: 54 additions & 0 deletions b/‎examples/learn_to_ask/data_prepare/2_build_dataset.py‎
Lines changed: 54 additions & 0 deletions
diff --git a/‎…/data_prepare/3_rollout_then_evaluate.py‎ ‎…/data_prepare/3_rollout_then_evaluate.py‎examples/learn2ask/data_prepare/3_rollout_then_evaluate.py renamed to examples/learn_to_ask/data_prepare/3_rollout_then_evaluate.py
Lines changed: 45 additions & 21 deletions b/‎…/data_prepare/3_rollout_then_evaluate.py‎ ‎…/data_prepare/3_rollout_then_evaluate.py‎examples/learn2ask/data_prepare/3_rollout_then_evaluate.py renamed to examples/learn_to_ask/data_prepare/3_rollout_then_evaluate.py
Lines changed: 45 additions & 21 deletions
diff --git a/‎…2ask/data_prepare/llm_info_extraction.py‎ ‎…_ask/data_prepare/llm_info_extraction.py‎examples/learn2ask/data_prepare/llm_info_extraction.py renamed to examples/learn_to_ask/data_prepare/llm_info_extraction.py b/‎…2ask/data_prepare/llm_info_extraction.py‎ ‎…_ask/data_prepare/llm_info_extraction.py‎examples/learn2ask/data_prepare/llm_info_extraction.py renamed to examples/learn_to_ask/data_prepare/llm_info_extraction.py
diff --git a/‎…arn2ask/data_prepare/message_splitter.py‎ ‎…_to_ask/data_prepare/message_splitter.py‎examples/learn2ask/data_prepare/message_splitter.py renamed to examples/learn_to_ask/data_prepare/message_splitter.py b/‎…arn2ask/data_prepare/message_splitter.py‎ ‎…_to_ask/data_prepare/message_splitter.py‎examples/learn2ask/data_prepare/message_splitter.py renamed to examples/learn_to_ask/data_prepare/message_splitter.py
diff --git a/‎examples/learn2ask/train.yaml‎ ‎examples/learn_to_ask/train.yaml‎examples/learn2ask/train.yaml renamed to examples/learn_to_ask/train.yaml
Lines changed: 1 addition & 3 deletions b/‎examples/learn2ask/train.yaml‎ ‎examples/learn_to_ask/train.yaml‎examples/learn2ask/train.yaml renamed to examples/learn_to_ask/train.yaml
Lines changed: 1 addition & 3 deletions
diff --git a/‎…es/learn2ask/plugins/prompt_learn2ask.py‎ ‎…earn_to_ask/workflow/prompt_learn2ask.py‎examples/learn2ask/plugins/prompt_learn2ask.py renamed to examples/learn_to_ask/workflow/prompt_learn2ask.py b/‎…es/learn2ask/plugins/prompt_learn2ask.py‎ ‎…earn_to_ask/workflow/prompt_learn2ask.py‎examples/learn2ask/plugins/prompt_learn2ask.py renamed to examples/learn_to_ask/workflow/prompt_learn2ask.py
diff --git a/‎…/learn2ask/plugins/workflow_learn2ask.py‎ ‎…rn_to_ask/workflow/workflow_learn2ask.py‎examples/learn2ask/plugins/workflow_learn2ask.py renamed to examples/learn_to_ask/workflow/workflow_learn2ask.py b/‎…/learn2ask/plugins/workflow_learn2ask.py‎ ‎…rn_to_ask/workflow/workflow_learn2ask.py‎examples/learn2ask/plugins/workflow_learn2ask.py renamed to examples/learn_to_ask/workflow/workflow_learn2ask.py
@@ -3,10 +3,10 @@
 This guide demonstrates how to train a proactive LLM using the **Learn2Ask** framework from [Grounded in Reality: Learning and Deploying Proactive LLM from Offline Logs](https://arxiv.org/abs/2510.25441).
 **Hardware requirement**: ≥32 H20 (or equivalent) GPUs for full-scale reproduction.
 
-All relevant files are located under `examples/learn2ask/`:
-- Workflow & prompts: `examples/learn2ask/workflow/`
-- Training config: `examples/learn2ask/train.yaml`
-- Data preparation scripts: `examples/learn2ask/data_prepare/`
+All relevant files are located under `examples/learn_to_ask/`:
+- Workflow & prompts: `examples/learn_to_ask/workflow/`
+- Training config: `examples/learn_to_ask/train.yaml`
+- Data preparation scripts: `examples/learn_to_ask/data_prepare/`
 
 ---
 
@@ -21,15 +21,15 @@ Download the [RealMedConv](https://huggingface.co/datasets/datajuicer/RealMedCon
   "messages": [{"role": "user", "content": "Sore throat, phlegm, red eyes, cough, hoarse voice"}, {"role": "user", "content": "I took Amoxicillin"}, {"role": "user", "content": "But I still don't feel well"}, {"role": "user", "content": "Mainly it's a respiratory infection, sore throat, phlegm, hoarse voice, red eyes"}, {"role": "user", "content": "When I wake up, there is a lot of eye discharge, and a lot of phlegm"}, {"role": "assistant", "content": "How long have the symptoms been present?"}, {"role": "user", "content": "About 2 days"}, {"role": "user", "content": "My eyes are very red"}, {"role": "assistant", "content": "Is there any discharge?"}, {"role": "user", "content": "Yes"}, {"role": "user", "content": "Please check my description, I wrote all the details"}, {"role": "assistant", "content": "Sure"}, {"role": "assistant", "content": "The internet was down just now"}, {"role": "user", "content": "Okay"}, {"role": "assistant", "content": "Is the discharge thick, thin, or stringy?"}, {"role": "user", "content": "It's thick"}, {"role": "user", "content": "Yellowish"}, {"role": "user", "content": "Mainly a lot in the morning, and a lot of phlegm"}, {"role": "assistant", "content": "Does it affect your vision? Do you have eye pain? Itchy eyes? Foreign body sensation? Tears?"}, {"role": "user", "content": "No"}, {"role": "user", "content": "Mainly still sore throat"}, {"role": "user", "content": "The eyes are just red and have discharge"}, {"role": "user", "content": "Sore throat, a lot of phlegm, mild cough, hoarse voice"}, {"role": "assistant", "content": "Okay"}, {"role": "assistant", "content": "Have you had any medical examinations or medication history? Any history of drug allergies or chronic diseases?"}, {"role": "user", "content": "No"}, {"role": "user", "content": "Please help as soon as possible, it's getting late"}, {"role": "assistant", "content": "<med_search>"}]
 }
 ```
-You need to perform the following preprocessing steps to turn the log in to training/testing samples for our learn2ask framework, there are two simple steps:
+You need to perform the following preprocessing steps to turn the log in to training/testing samples for our `learn_to_ask` framework, there are two simple steps:
 - Segment the original conversation log (session) into context–future pairs, then extract `info_truth` labels from the `remaining_chat` field.
 ```bash
-python examples/learn2ask/workflow/data_prepare/1_info_extract_pipeline.py
+python examples/learn_to_ask/workflow/data_prepare/1_info_extract_pipeline.py --input_file /path/to/RealMedConv/train.jsonl --output_file examples/learn_to_ask/data_raw/train_processed.jsonl
 ```
 
 - Convert these samples into final training/testing datasets.
 ```bash
-examples/learn2ask/workflow/data_prepare/2_build_dataset.py
+python examples/learn_to_ask/workflow/data_prepare/2_build_dataset.py --input_file examples/learn_to_ask/data_raw/train_processed.jsonl --output_file examples/learn_to_ask/data/train.jsonl
 ```
 
 These scripts are implementations of the following procedures.
@@ -68,14 +68,14 @@ These ground truth are used to evaluate the rewards in training, e.g., $R_a$ and
 ---
 
 ## Step 2. Configure and Train
-Update `examples/learn2ask/train.yaml` with paths to:
+Update `examples/learn_to_ask/train.yaml` with paths to:
 - Your processed datasets,
 - Base model,
 - Checkpoint output directory.
 
 Then, launch training:
 ```bash
-trinity run --config examples/learn2ask/train.yaml --plugin-dir examples/learn2ask/workflow
+trinity run --config examples/learn_to_ask/train.yaml --plugin-dir examples/learn_to_ask/workflow
 ````
 ---
 
@@ -86,5 +86,5 @@ Use the rollout-n-evaluate pipeline:
 
 You may configure the settings then run the pipeline by launching:
 ```bash
-python examples/learn2ask/workflow/data_prepare/3_rollout_then_evaluate.py
+python examples/learn_to_ask/workflow/data_prepare/3_rollout_then_evaluate.py
 ```
@@ -1,3 +1,4 @@
+import argparse
 import json
 import time
 
@@ -9,13 +10,11 @@ def process_jsonl_file(
     input_file, output_file, model_call_mode="online_api", max_retries=3, **kwargs
 ):
     """
-    Process all sessions in a JSONL file and save results based on specified output mode.
+    Process all sessions in a JSONL file and save results to output file.
 
     Args:
         input_file (str): Path to input JSONL file
-        output_mode (str): Either "single_file" or "multiple_files"
-        output_file (str): Path to output file (required if output_mode="single_file")
-        output_dir (str): Path to output directory (required if output_mode="multiple_files")
+        output_file (str): Path to output JSONL file
         model_call_mode (str): Either "online_api" or "local_vllm"
         max_retries (int): Maximum number of retries for LLM calls
         **kwargs: Additional parameters for API calls
@@ -25,7 +24,9 @@ def process_jsonl_file(
     """
     try:
         # Read and process each session
-        with open(input_file, "r", encoding="utf-8") as infile:
+        with open(input_file, "r", encoding="utf-8") as infile, open(
+            output_file, "w", encoding="utf-8"
+        ) as outfile:
             for line_num, line in enumerate(infile, 1):
                 if line.strip():
                     try:
@@ -38,9 +39,8 @@ def process_jsonl_file(
                         processed_lines = process_session(
                             session, model_call_mode, max_retries, **kwargs
                         )
-                        for line in processed_lines:
-                            with open(output_file, "a", encoding="utf-8") as outfile:
-                                outfile.write(line + "\n")
+                        for processed_line in processed_lines:
+                            outfile.write(processed_line + "\n")
 
                     except json.JSONDecodeError as e:
                         print(f"Warning: Skipping invalid JSON at line {line_num}: {e}")
@@ -109,6 +109,12 @@ def process_session(session, model_call_mode="online_api", max_retries=3, **kwar
 
 # Example usage:
 if __name__ == "__main__":
-    input_file_path = "data_prepare_learn2ask/test_origin.jsonl"
-    output_file_path = "data_prepare_learn2ask/test_processed.jsonl"
-    process_jsonl_file(input_file=input_file_path, output_file=output_file_path)
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--input_file", type=str, default="examples/learn_to_ask/data_raw/train_origin.jsonl"
+    )
+    parser.add_argument(
+        "--output_file", type=str, default="examples/learn_to_ask/data_raw/train_processed.jsonl"
+    )
+    args = parser.parse_args()
+    process_jsonl_file(input_file=args.input_file, output_file=args.output_file)
@@ -0,0 +1,54 @@
+import argparse
+import json
+
+
+def process_message(json_obj):
+    info_set_str = ", ".join(json_obj["info_set"])
+    if "user: " not in json_obj["remaining_chat"]:
+        decision_str = "stop"
+    else:
+        decision_str = "continue"
+    if info_set_str == "" and decision_str == "continue":
+        if_keep = False
+    else:
+        if_keep = True
+    return if_keep, info_set_str, decision_str
+
+
+def main(input_file_path, output_file_path):
+    with open(input_file_path, "r", encoding="utf-8") as infile, open(
+        output_file_path, "w", encoding="utf-8"
+    ) as outfile:
+        print("data processing started...")
+        for line in infile:
+            data = json.loads(line.strip())
+            if_keep, info_set, decision = process_message(data)
+            if not if_keep:
+                continue
+
+            new_item = {
+                "cid": data["cid"],
+                "session_id": data["session_id"],
+                "diagn": data["diagn"],
+                "messages": data["messages"],
+                "decision_truth": decision,
+                "info_truth": info_set,
+            }
+            outfile.write(json.dumps(new_item, ensure_ascii=False) + "\n")
+    print("job done!")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+
+    # The file generated by 1_info_extract_pipeline.py
+    parser.add_argument(
+        "--input_file", type=str, default="examples/learn_to_ask/data_raw/train_processed.jsonl"
+    )
+
+    # The final file for training or testing
+    parser.add_argument("--output_file", type=str, default="examples/learn_to_ask/data/train.jsonl")
+
+    args = parser.parse_args()
+
+    main(args.input_file, args.output_file)
@@ -3,22 +3,20 @@
 The associated submit_rollout.sh script is used to submit the job to Nebula.
 """
 
+import argparse
 import copy
 import gc
 import json
+import os
 import re
 import time
-from datetime import datetime
 
 import torch
 from transformers import AutoTokenizer
 from vllm import LLM, SamplingParams
 
-# from prompt_eval import deploy_prompt_v3a0 as sys_prompt
-from trinity.plugins.prompt_learn2ask import reward_prompt_med as grader_prompt
-from trinity.plugins.prompt_learn2ask import rollout_prompt_med as rollout_prompt
-
-today = datetime.now().strftime("%Y%m%d")
+from trinity.common.constants import PLUGIN_DIRS_ENV_VAR
+from trinity.utils.plugin_loader import load_plugins
 
 
 def init_llm(model_path):
@@ -40,14 +38,16 @@ def init_llm(model_path):
 
 
 def rollout(llm, tokenizer, sampling_params, input_file_path, output_file_path, rollout_repeat=3):
+    from trinity.plugins.prompt_learn2ask import rollout_prompt_med as rollout_prompt
+
     with open(input_file_path, "r") as lines:
         sample_list = [json.loads(line.strip()) for line in lines]
     print(f"loaded samples: {len(sample_list)}")
 
-    for index, sample in enumerate(sample_list[:700]):
+    for index, sample in enumerate(sample_list):
         record = copy.deepcopy(sample)
         print(f"index: {index}, session_id: {sample['session_id']}")
-        user_content = "# 对话记录\n" + sample["input"]
+        user_content = "# Dialog History\n" + sample["input"]
         print(f"user_content: {user_content}")
         messages = [
             {"role": "system", "content": rollout_prompt},
@@ -75,6 +75,8 @@ def rollout(llm, tokenizer, sampling_params, input_file_path, output_file_path,
 
 
 def eval_sample(llm, tokenizer, sampling_params, input_file_path, output_file_path):
+    from trinity.plugins.prompt_learn2ask import reward_prompt_med as grader_prompt
+
     print(f"input_file_path: {input_file_path}")
     print(f"output_file_path: {output_file_path}")
 
@@ -135,7 +137,7 @@ def msg2str(msg_list):
                     try:
                         format_score = float(res_dict.get("format_score", 0.0))
                         content_score = float(res_dict.get("content_score", 0.0))
-                        res_think = res_dict.get("think", "无")
+                        res_think = res_dict.get("think", "None")
                     except Exception as e:
                         print(e)
                 else:
@@ -158,21 +160,43 @@ def msg2str(msg_list):
 
 
 if __name__ == "__main__":
-    rollout_repeat = 3
-    test_file_path = "path/to/your/input_file.jsonl"  # <<< Your test sample path
-    rollout_file_path = "path/to/your/rollout_file.jsonl"  # <<< rollout results given test samples
-    eval_model_path = "path/to/your/ckpt/or/model"  # <<< ckpt for testing
-    grader_model_path = "path/to/your/qwen2.5-32b-instruct"  # <<< model to empower the grading
-    eval_file_path = (
-        "path/to/your/rollout_eval_result_file.jsonl"  # <<< final output given rollout results
-    )
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--rollout_repeat", type=int, default=3)
+
+    # Your test sample path
+    parser.add_argument("--test_file_path", type=str, required=True)
+
+    # Rollout results given test samples
+    parser.add_argument("--rollout_file_path", type=str, required=True)
+
+    # Ckpt for testing
+    parser.add_argument("--eval_model_path", type=str, required=True)
+
+    # Model to empower the grading, Qwen2.5-32b-instruct is recommended
+    parser.add_argument("--grader_model_path", type=str, required=True)
+
+    # Final output given rollout results
+    parser.add_argument("--eval_file_path", type=str, required=True)
+
+    args = parser.parse_args()
+
+    os.environ[PLUGIN_DIRS_ENV_VAR] = os.path.join(os.path.dirname(__file__), "..", "workflow")
+    load_plugins()
+
     # rollout stage
-    llm, tokenizer, sampling_params = init_llm(eval_model_path)
-    rollout(llm, tokenizer, sampling_params, test_file_path, rollout_file_path, rollout_repeat)
+    llm, tokenizer, sampling_params = init_llm(args.eval_model_path)
+    rollout(
+        llm,
+        tokenizer,
+        sampling_params,
+        args.test_file_path,
+        args.rollout_file_path,
+        args.rollout_repeat,
+    )
     del llm  # clean up the memory after the inference
     gc.collect()
     torch.cuda.empty_cache()  # release gpu memory
 
     # eval stage
-    llm2, tokenizer2, sampling_params2 = init_llm(grader_model_path)
-    eval_sample(llm2, tokenizer2, sampling_params2, rollout_file_path, eval_file_path)
+    llm2, tokenizer2, sampling_params2 = init_llm(args.grader_model_path)
+    eval_sample(llm2, tokenizer2, sampling_params2, args.rollout_file_path, args.eval_file_path)
@@ -5,7 +5,6 @@ checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints}
 algorithm:
   algorithm_type: grpo
   repeat_times: 5
-  sample_strategy: warmup
   policy_loss_fn: ppo
   advantage_fn: grpo
   kl_penalty_fn: none
@@ -30,7 +29,7 @@ buffer:
     taskset:
       name: taskset
       storage_type: file
-      path: ${oc.env:TRINITY_TASKSET_PATH}
+      path: ${oc.env:TRINITY_TASKSET_PATH,examples/learn_to_ask/data}
       split: train
       subset_name: null
       format:
@@ -46,7 +45,6 @@ buffer:
     experience_buffer:
       name: experience_buffer
       storage_type: queue
-      enable_progress_bar: false
       path: ''
       replay_buffer:
         enable: true