Fix bugs for pipeline running.

chenyushuo · chenyushuo · commit 253e92c42c4a · 2025-11-07T16:20:39.000+08:00
diff --git a/examples/learn_to_ask/README.md b/examples/learn_to_ask/README.md
@@ -24,12 +24,12 @@ Download the [RealMedConv](https://huggingface.co/datasets/datajuicer/RealMedCon
 You need to perform the following preprocessing steps to turn the log in to training/testing samples for our `learn_to_ask` framework, there are two simple steps:
 - Segment the original conversation log (session) into context–future pairs, then extract `info_truth` labels from the `remaining_chat` field.
 ```bash
-python examples/learn_to_ask/workflow/data_prepare/1_info_extract_pipeline.py --input_file /path/to/RealMedConv/train.jsonl --output_file examples/learn_to_ask/data_raw/train_processed.jsonl
+python examples/learn_to_ask/data_prepare/1_info_extract_pipeline.py --input_file /path/to/RealMedConv/train.jsonl --output_file examples/learn_to_ask/data_raw/train_processed.jsonl
 ```
 
 - Convert these samples into final training/testing datasets.
 ```bash
-python examples/learn_to_ask/workflow/data_prepare/2_build_dataset.py --input_file examples/learn_to_ask/data_raw/train_processed.jsonl --output_file examples/learn_to_ask/data/train.jsonl
+python examples/learn_to_ask/data_prepare/2_build_dataset.py --input_file examples/learn_to_ask/data_raw/train_processed.jsonl --output_file examples/learn_to_ask/data/train.jsonl
 ```
 
 These scripts are implementations of the following procedures.
@@ -76,7 +76,7 @@ Update `examples/learn_to_ask/train.yaml` with paths to:
 Then, launch training:
 ```bash
 trinity run --config examples/learn_to_ask/train.yaml --plugin-dir examples/learn_to_ask/workflow
-````
+```
 ---
 
 ## Step 3. Evaluate
@@ -86,5 +86,7 @@ Use the rollout-n-evaluate pipeline:
 
 You may configure the settings then run the pipeline by launching:
 ```bash
-python examples/learn_to_ask/workflow/data_prepare/3_rollout_then_evaluate.py
+python examples/learn_to_ask/data_prepare/3_rollout_then_evaluate.py --eval_model_path path/to/trained/model --grader_model_path path/to/qwen2.5-32b-instruct  --test_file_path examples/learn_to_ask/data/test.jsonl --rollout_file_path path/to/rollout.jsonl --eval_file_path path/to/output.jsonl
 ```
+
+Note: `eval_model_path` is the location of the model you want to evaluate. This model must first be converted into the HuggingFace format. For instructions on converting FSDP checkpoints, see [this guide](https://modelscope.github.io/Trinity-RFT/en/main/tutorial/faq.html).
diff --git a/examples/learn_to_ask/data_prepare/1_info_extract_pipeline.py b/examples/learn_to_ask/data_prepare/1_info_extract_pipeline.py
@@ -116,5 +116,17 @@ def process_session(session, model_call_mode="online_api", max_retries=3, **kwar
     parser.add_argument(
         "--output_file", type=str, default="examples/learn_to_ask/data_raw/train_processed.jsonl"
     )
+    parser.add_argument(
+        "--model_call_mode", type=str, choices=["online_api", "local_vllm"], default="local_vllm"
+    )
+    parser.add_argument("--model_path", type=str, required=True)
     args = parser.parse_args()
-    process_jsonl_file(input_file=args.input_file, output_file=args.output_file)
+    print(
+        process_jsonl_file(
+            input_file=args.input_file,
+            output_file=args.output_file,
+            model_call_mode=args.model_call_mode,
+            model_path=args.model_path,
+            # Additional parameters for API calls
+        )
+    )
diff --git a/examples/learn_to_ask/data_prepare/2_build_dataset.py b/examples/learn_to_ask/data_prepare/2_build_dataset.py
@@ -3,12 +3,13 @@
 
 
 def process_message(json_obj):
-    info_set_str = ", ".join(json_obj["info_set"])
+    info_set = json_obj.get("info_set")
+    info_set_str = ", ".join(info_set) if isinstance(info_set, list) else ""
     if "user: " not in json_obj["remaining_chat"]:
         decision_str = "stop"
     else:
         decision_str = "continue"
-    if info_set_str == "" and decision_str == "continue":
+    if not info_set_str and decision_str == "continue":
         if_keep = False
     else:
         if_keep = True
diff --git a/examples/learn_to_ask/data_prepare/3_rollout_then_evaluate.py b/examples/learn_to_ask/data_prepare/3_rollout_then_evaluate.py
@@ -1,6 +1,5 @@
 """
 This script is used to use VLLM to generate rollout samples from the converted checkpoints.
-The associated submit_rollout.sh script is used to submit the job to Nebula.
 """
 
 import argparse
@@ -47,12 +46,7 @@ def rollout(llm, tokenizer, sampling_params, input_file_path, output_file_path,
     for index, sample in enumerate(sample_list):
         record = copy.deepcopy(sample)
         print(f"index: {index}, session_id: {sample['session_id']}")
-        user_content = "# Dialog History\n" + sample["input"]
-        print(f"user_content: {user_content}")
-        messages = [
-            {"role": "system", "content": rollout_prompt},
-            {"role": "user", "content": user_content},
-        ]
+        messages = [{"role": "system", "content": rollout_prompt}] + sample["messages"]
 
         prompt = tokenizer.apply_chat_template(
             messages, tokenize=False, add_generation_prompt=True, enable_thinking=False
@@ -63,7 +57,6 @@ def rollout(llm, tokenizer, sampling_params, input_file_path, output_file_path,
             time_probe = time.perf_counter()
             outputs = llm.generate([prompt], sampling_params=sampling_params)
             print(f"time cost: {time.perf_counter() - time_probe}")
-            # print(json.dumps(outputs, ensure_ascii=False, indent=2))
             for output in outputs:
                 response = output.outputs[0].text
                 response_list.append(response)
@@ -163,19 +156,19 @@ def msg2str(msg_list):
     parser = argparse.ArgumentParser()
     parser.add_argument("--rollout_repeat", type=int, default=3)
 
-    # Your test sample path
-    parser.add_argument("--test_file_path", type=str, required=True)
-
-    # Rollout results given test samples
-    parser.add_argument("--rollout_file_path", type=str, required=True)
-
     # Ckpt for testing
     parser.add_argument("--eval_model_path", type=str, required=True)
 
     # Model to empower the grading, Qwen2.5-32b-instruct is recommended
     parser.add_argument("--grader_model_path", type=str, required=True)
 
-    # Final output given rollout results
+    # Your test sample path [input]
+    parser.add_argument("--test_file_path", type=str, required=True)
+
+    # Rollout results given test samples [output]
+    parser.add_argument("--rollout_file_path", type=str, required=True)
+
+    # Final output given rollout results [output]
     parser.add_argument("--eval_file_path", type=str, required=True)
 
     args = parser.parse_args()
diff --git a/examples/learn_to_ask/data_prepare/llm_info_extraction.py b/examples/learn_to_ask/data_prepare/llm_info_extraction.py
@@ -1,6 +1,11 @@
 import os
 
 import openai
+import torch
+import transformers
+
+tokenizer = None
+llm = None
 
 
 def LLM_info_extraction(remaining_chat, model_call_mode, **kwargs):
@@ -19,12 +24,12 @@ def LLM_info_extraction(remaining_chat, model_call_mode, **kwargs):
     # Create messages format with system and user roles
     system_message = """
     # Task:
-    You are a medical information assistant. Given a dialogue between a physician (assistant) and a patient (user), extract the clinical attributes of interest to the physician based on their questions. The target fields include: symptom, symptom nature, symptom location, symptom severity, and symptom trigger. Then, identify the corresponding specific information from the patient’s responses and pair it with the respective field.
+    You are a medical information assistant. Given a dialogue between a physician (assistant) and a patient (user), extract the clinical attributes of interest to the physician based on their questions. The target fields include: symptom, symptom nature, symptom location, symptom severity, and symptom trigger. Then, identify the corresponding specific information from the patient's responses and pair it with the respective field.
     # Requirements:
         - Do not fabricate information or introduce new fields not listed above. Ignore patient-reported information regarding prior medication use, allergies, or underlying comorbidities; do not include such details in the output.
         - Only include fields explicitly inquired about by the physician. Omit any fields not addressed in the dialogue. Avoid outputting vague terms (e.g., "unspecified" or "unknown").
         - Prevent duplication: if a symptom description already includes anatomical location, do not separately list the location field.
-        - Format each entry as a string enclosed in single quotes ('), and separate multiple entries with commas. Enclose the entire output within square brackets to form a list. If the dialogue is unrelated to the aforementioned clinical attributes, output only "[]".
+        - Format each entry as a string enclosed in single quotes ('), and separate multiple entries with commas, ensuring any necessary escape characters within the strings. Enclose the entire output within square brackets to form a list. If the dialogue is unrelated to the aforementioned clinical attributes, output only "[]".
         - Do not include reasoning steps or additional commentary outside the specified format. Condense colloquial patient expressions into concise, standardized, and clinically appropriate terminology.
     # Example output format:
     ['symptom: diarrhea', 'symptom nature: watery stool', 'symptom severity: 4-5 times per day']
@@ -33,7 +38,7 @@ def LLM_info_extraction(remaining_chat, model_call_mode, **kwargs):
 
     messages = [
         {"role": "system", "content": system_message},
-        {"role": "user", "content": user_message},
+        {"role": "user", "content": "```\n" + user_message + "\n```\n"},
     ]
 
     try:
@@ -66,22 +71,6 @@ def _call_online_api(messages, **kwargs):
     return response.choices[0].message.content
 
 
-def _convert_messages_to_prompt(messages):
-    """Convert messages format to a single prompt string"""
-    prompt = ""
-    for message in messages:
-        role = message["role"]
-        content = message["content"]
-        if role == "system":
-            prompt += f"System: {content}\n"
-        elif role == "user":
-            prompt += f"User: {content}\n"
-        elif role == "assistant":
-            prompt += f"Assistant: {content}\n"
-    prompt += "Assistant:"
-    return prompt
-
-
 def _call_local_vllm(messages, **kwargs):
     """Handle local vLLM calls"""
     try:
@@ -97,21 +86,23 @@ def _call_local_vllm(messages, **kwargs):
         repetition_penalty = kwargs.get("repetition_penalty", 1.1)
 
         # GPU/CUDA related parameters for vLLM
-        tensor_parallel_size = kwargs.get("tensor_parallel_size", 1)
+        tensor_parallel_size = kwargs.get("tensor_parallel_size", torch.cuda.device_count())
         gpu_memory_utilization = kwargs.get("gpu_memory_utilization", 0.9)
         enforce_eager = kwargs.get("enforce_eager", False)
         dtype = kwargs.get("dtype", "auto")
         max_model_len = kwargs.get("max_model_len", 4096)
 
         # Initialize the LLM with the provided model path and GPU parameters
-        llm = LLM(
-            model=model_path,
-            tensor_parallel_size=tensor_parallel_size,
-            gpu_memory_utilization=gpu_memory_utilization,
-            enforce_eager=enforce_eager,
-            dtype=dtype,
-            max_model_len=max_model_len,
-        )
+        global llm, tokenizer
+        if llm is None:
+            llm = LLM(
+                model=model_path,
+                tensor_parallel_size=tensor_parallel_size,
+                gpu_memory_utilization=gpu_memory_utilization,
+                enforce_eager=enforce_eager,
+                dtype=dtype,
+                max_model_len=max_model_len,
+            )
 
         sampling_params = SamplingParams(
             temperature=temperature,
@@ -121,7 +112,9 @@ def _call_local_vllm(messages, **kwargs):
         )
 
         # Convert messages to a single prompt string
-        prompt = _convert_messages_to_prompt(messages)
+        if tokenizer is None:
+            tokenizer = transformers.AutoTokenizer.from_pretrained(model_path)
+        prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 
         outputs = llm.generate([prompt], sampling_params)
 
@@ -152,4 +145,4 @@ def parse_llm_output(output_str):
 
         return result
     except Exception as e:
-        return f"Error parsing output: {str(e)}"
+        return f"Error parsing output: [{repr(output_str)}] error = {str(e)}"
diff --git a/examples/learn_to_ask/train.yaml b/examples/learn_to_ask/train.yaml
@@ -38,6 +38,9 @@ buffer:
       rollout_args:
         temperature: 1.0
         logprobs: 0
+      workflow_args:
+        train_mode: "Ra+Rs"
+        fusion_mode: "default"
     eval_tasksets: [ ]
     default_workflow_type: learn2ask_workflow
     default_reward_fn_type: math_reward
diff --git a/examples/learn_to_ask/workflow/workflow_learn2ask.py b/examples/learn_to_ask/workflow/workflow_learn2ask.py
@@ -17,19 +17,16 @@
 logger = get_logger(__name__)
 
 """
-For ablation studies, you may set the train_type to:
+For ablation studies, you may set the `taskset.workflow_args.train_mode` to:
 - Ra+Rs: the default setting,
 - Ra: without Rs,
 - Rs: without Ra.
 
-Also, you can choose the reward fusion_mode to:
+Also, you can choose the reward `taskset.workflow_args.fusion_mode` to:
 - default: using the multiplicative fusion function,
 - sum: using the sum fusion function.
 """
 
-train_mode = "Ra+Rs"
-fusion_mode = "default"
-
 
 @WORKFLOWS.register_module("learn2ask_workflow")
 class Learn2AskWorkflow(SimpleWorkflow):
@@ -42,7 +39,11 @@ def __init__(
         model: ModelWrapper,
         auxiliary_models: Optional[List[openai.OpenAI]] = None,
     ):
-        self.reset(task)
+        self.train_mode = task.workflow_args.get("train_mode", "Ra+Rs")
+        self.fusion_mode = task.workflow_args.get("fusion_mode", "default")
+        assert (
+            auxiliary_models is not None and len(auxiliary_models) == 1
+        ), "Please provide one `auxiliary_models` in explorer config for `learn2ask_workflow`."
         super().__init__(
             task=task,
             model=model,
@@ -54,7 +55,7 @@ def resettable(self):
         return True
 
     def reset(self, task: Task):
-        if train_mode == "Ra":  # we have a different system prompt for this training mode.
+        if self.train_mode == "Ra":  # we have a different system prompt for this training mode.
             from trinity.plugins.prompt_learn2ask import (
                 rollout_prompt_med_Ra as system_prompt,
             )
@@ -186,13 +187,13 @@ def reward_fn(self, response):
         else:
             action_score, format_score, content_score = 0.0, 0.0, 0.0
 
-        if train_mode == "Ra+Rs":  # the default setting
+        if self.train_mode == "Ra+Rs":  # the default setting
             final_reward = (
                 action_score * (1 + 2 * content_score) + format_score
-                if fusion_mode != "sum"
+                if self.fusion_mode != "sum"
                 else action_score + content_score + format_score
             )
-        elif train_mode == "Ra":  # for Ra only (without Rs)
+        elif self.train_mode == "Ra":  # for Ra only (without Rs)
             final_reward = 2 * content_score + format_score
         else:  # for Rs only (without Ra)
             final_reward = action_score * 3 + format_score