HYLcool
diff --git a/‎docs/sphinx_doc/source/tutorial/example_dpo.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/sphinx_doc/source/tutorial/example_dpo.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/sphinx_doc/source/tutorial/example_reasoning_basic.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/sphinx_doc/source/tutorial/example_reasoning_basic.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/sphinx_doc/source/tutorial/trinity_configs.md‎
Lines changed: 38 additions & 5 deletions b/‎docs/sphinx_doc/source/tutorial/trinity_configs.md‎
Lines changed: 38 additions & 5 deletions
diff --git a/‎examples/dpo_humanlike/dpo.yaml‎
Lines changed: 1 addition & 1 deletion b/‎examples/dpo_humanlike/dpo.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/buffer/formatter_test.py‎
Lines changed: 219 additions & 0 deletions b/‎tests/buffer/formatter_test.py‎
Lines changed: 219 additions & 0 deletions
diff --git a/‎tests/cli/__init__.py‎ b/‎tests/cli/__init__.py‎
diff --git a/‎tests/cli/launcher_test.py‎
Lines changed: 89 additions & 0 deletions b/‎tests/cli/launcher_test.py‎
Lines changed: 89 additions & 0 deletions
@@ -68,7 +68,7 @@ buffer:
       storage_type: file
       path: $DATASET_PATH/human_like_dpo_dataset
       format:
-        prompt_type: plaintext # plaintext/messages/chatpair
+        prompt_type: plaintext
         prompt_key: prompt
         chosen_key: chosen
         rejected_key: rejected
 
@@ -182,7 +182,7 @@ buffer:
       storage_type: file
       path: <$DATASET_PATH/{sft_data}>
       format:
-        prompt_type: <prompt_type> # messages/plaintext/chatpair
+        prompt_type: <prompt_type> # messages/plaintext
         prompt_key: <prompt_key>
         response_key: <response_key>
     sft_warmup_steps: 10
 
@@ -37,14 +37,12 @@ synchronizer:
 monitor:
   # Monitoring configurations (e.g., WandB or TensorBoard)
   ...
-data_processor:
-  # Preprocessing data settings
-  ...
-
 service:
   # Services to use
   ...
-
+data_processor:
+  # Preprocessing data settings
+  ...
 log:
   # Ray actor logging
   ...
@@ -419,6 +417,41 @@ service:
 - `auto_start`: Whether to automatically start the data juicer service.
 - `port`: The port for Data Juicer service when `auto_start` is true.
 
+---
+
+## DataProcessor Configuration
+
+Configures the task / experience pipeline, please refer to {ref}`Data Processing <Data Processing>` section for details.
+
+```yaml
+data_processor:
+  task_pipeline:
+  # task pipeline related
+  task_pipeline:
+    num_process: 32
+    operators:
+      - name: "llm_difficulty_score_filter"
+        args:
+          api_or_hf_model: "qwen2.5-7b-instruct"
+          min_score: 0.0
+          input_keys: ["question", "answer"]
+          field_names: ["Question", "Answer"]
+    inputs:  # the output will be set to the explorer input automatically
+      - /PATH/TO/GSM8K/DATA/FILE
+    target_fields: ["question", "answer"]
+  experience_pipeline:
+    operators:
+      - name: data_juicer
+        args:
+          config_path: 'examples/grpo_gsm8k_experience_pipeline/dj_scoring_exp.yaml'
+      - name: reward_shaping_mapper
+        args:
+          reward_shaping_configs:
+            - stats_key: 'llm_quality_score'
+              op_type: ADD
+              weight: 1.0
+```
+
 --
 
 ## Log Configuration
 
@@ -26,7 +26,7 @@ buffer:
       enable_progress_bar: True
       path: /PATH/TO/DATASET/
       format:
-        prompt_type: plaintext # plaintext/messages/chatpair
+        prompt_type: plaintext # plaintext/messages
         prompt_key: prompt
         chosen_key: chosen
         rejected_key: rejected
 
@@ -0,0 +1,219 @@
+import unittest
+
+from transformers import AutoTokenizer
+
+from tests.tools import get_model_path
+from trinity.buffer.schema.formatter import (
+    DPOMessagesFormatter,
+    DPOPlaintextFormatter,
+    SFTMessagesFormatter,
+    SFTPlaintextFormatter,
+)
+from trinity.common.config import FormatConfig
+from trinity.common.constants import PromptType
+from trinity.common.experience import Experience
+
+
+class TestFormatter(unittest.TestCase):
+    def setUp(self):
+        self.tokenizer = AutoTokenizer.from_pretrained(get_model_path())
+
+    def test_sft_messages_formatter(self):
+        config = FormatConfig(
+            prompt_type=PromptType.MESSAGES,
+            messages_key="message_list",
+        )
+        formatter = SFTMessagesFormatter(tokenizer=self.tokenizer, format_config=config)
+        sample = {
+            "message_list": [
+                {"role": "user", "content": "Hi"},
+                {"role": "assistant", "content": "Hello"},
+            ]
+        }
+
+        exp = formatter.format(sample)
+        self.assertIsInstance(exp, Experience)
+        self.assertIsNotNone(exp.tokens)
+        self.assertIsNotNone(exp.prompt_length)
+        self.assertTrue(exp.prompt_length < len(exp.tokens))
+        sequence = self.tokenizer.decode(exp.tokens)
+
+        self.assertIn("Hi", sequence)
+        self.assertIn("Hello", sequence)
+
+        # test tool
+        config = FormatConfig(
+            prompt_type=PromptType.MESSAGES,
+            messages_key="messages",
+            tools_key="tools",
+        )
+        formatter = SFTMessagesFormatter(tokenizer=self.tokenizer, format_config=config)
+        sample = {
+            "messages": [
+                {
+                    "role": "system",
+                    "content": "You are a helpful assistant with access to various tools. Use them when needed to help users.",
+                },
+                {"role": "user", "content": "What's the weather like in Beijing today?"},
+                {
+                    "role": "assistant",
+                    "content": "Let me get the weather for you.",
+                    "tool_calls": [
+                        {
+                            "id": "call_abc123",
+                            "type": "function",
+                            "function": {
+                                "name": "get_weather",
+                                "arguments": '{"location": "Beijing", "unit": "celsius"}',
+                            },
+                        }
+                    ],
+                },
+                {
+                    "role": "tool",
+                    "content": '{"temperature": 22, "condition": "sunny", "humidity": 45}',
+                    "tool_call_id": "call_abc123",
+                },
+                {
+                    "role": "assistant",
+                    "content": "The weather in Beijing today is sunny with a temperature of 22°C and humidity at 45%. It's a pleasant day!",
+                },
+            ],
+            "tools": [
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "get_weather",
+                        "description": "Get the current weather in a given location",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {
+                                "location": {
+                                    "type": "string",
+                                    "description": "The city and state, e.g. San Francisco, CA",
+                                },
+                                "unit": {
+                                    "type": "string",
+                                    "enum": ["celsius", "fahrenheit"],
+                                    "description": "The temperature unit",
+                                },
+                            },
+                            "required": ["location"],
+                        },
+                    },
+                }
+            ],
+        }
+        exp = formatter.format(sample)
+        self.assertIsInstance(exp, Experience)
+        self.assertIsNotNone(exp.tokens)
+        self.assertIsNotNone(exp.prompt_length)
+        self.assertTrue(exp.prompt_length < len(exp.tokens))
+        sequence = self.tokenizer.decode(exp.tokens)
+        self.assertIn("What's the weather like in Beijing today?", sequence)
+        self.assertIn(
+            "The weather in Beijing today is sunny with a temperature of 22°C and humidity at 45%. It's a pleasant day!",
+            sequence,
+        )
+        self.assertIn("get_weather", sequence)
+
+    def test_sft_plaintext_formatter(self):
+        # with system prompt key
+        config = FormatConfig(
+            prompt_type=PromptType.PLAINTEXT,
+            system_prompt_key="system",
+            system_prompt="You are a programmer.",  # has lower priority than system_prompt_key
+            prompt_key="prompt",
+            response_key="response",
+        )
+        formatter = SFTPlaintextFormatter(tokenizer=self.tokenizer, format_config=config)
+        sample = {
+            "system": "You are a helpful assistant.",
+            "prompt": "What is 2+2?",
+            "response": "2+2=4",
+        }
+        exp = formatter.format(sample)
+        self.assertIsInstance(exp, Experience)
+        self.assertIsNotNone(exp.tokens)
+        self.assertIsNotNone(exp.prompt_length)
+        self.assertTrue(exp.prompt_length < len(exp.tokens))
+        # detokenize exp.tokens into text
+        sequence = self.tokenizer.decode(exp.tokens)
+        self.assertIn("You are a helpful assistant.", sequence)
+        self.assertIn("What is 2+2?", sequence)
+        self.assertIn("2+2=4", sequence)
+
+        # with system prompt
+        config = FormatConfig(
+            prompt_type=PromptType.PLAINTEXT,
+            system_prompt="You are a programmer.",
+            prompt_key="prompt",
+            response_key="response",
+        )
+        formatter = SFTPlaintextFormatter(tokenizer=self.tokenizer, format_config=config)
+
+        exp = formatter.format(sample)
+        self.assertIsInstance(exp, Experience)
+        self.assertIsNotNone(exp.tokens)
+        self.assertIsNotNone(exp.prompt_length)
+        self.assertTrue(exp.prompt_length < len(exp.tokens))
+        # detokenize exp.tokens into text
+        sequence = self.tokenizer.decode(exp.tokens)
+        self.assertIn("You are a programmer.", sequence)
+        self.assertIn("What is 2+2?", sequence)
+        self.assertIn("2+2=4", sequence)
+
+    def test_dpo_plaintext_formatter(self):
+        config = FormatConfig(
+            prompt_type=PromptType.PLAINTEXT,
+            prompt_key="prompt",
+            chosen_key="chosen",
+            rejected_key="rejected",
+        )
+        formatter = DPOPlaintextFormatter(tokenizer=self.tokenizer, format_config=config)
+        sample = {"prompt": "What is 2+2?", "chosen": "2+2=4", "rejected": "2+2=5"}
+        exp = formatter.format(sample)
+        self.assertIsInstance(exp, Experience)
+        self.assertIsNotNone(exp.tokens)
+        self.assertIsNotNone(exp.chosen)
+        self.assertIsNotNone(exp.rejected)
+        self.assertIsNotNone(exp.prompt_length)
+        prompt = self.tokenizer.decode(exp.tokens)
+        chosen = self.tokenizer.decode(exp.chosen)
+        rejected = self.tokenizer.decode(exp.rejected)
+        self.assertIn("What is 2+2?", prompt)
+        self.assertIn("2+2=4", chosen)
+        self.assertIn("2+2=5", rejected)
+        self.assertNotIn("What is 2+2?", chosen)
+        self.assertNotIn("What is 2+2?", rejected)
+        self.assertNotIn("2+2=4", prompt)
+        self.assertNotIn("2+2=5", prompt)
+
+    def test_dpo_messages_formatter(self):
+        config = FormatConfig(
+            prompt_type=PromptType.MESSAGES,
+            messages_key="messages",
+            chosen_key="chosen",
+            rejected_key="rejected",
+        )
+        formatter = DPOMessagesFormatter(tokenizer=self.tokenizer, format_config=config)
+        sample = {
+            "messages": [
+                {"role": "user", "content": "What is your name?"},
+            ],
+            "chosen": [
+                {"role": "assistant", "content": "My name is Assistant."},
+            ],
+            "rejected": [{"role": "assistant", "content": "I don't have a favorite color."}],
+        }
+        exp = formatter.format(sample)
+        self.assertIsInstance(exp, Experience)
+        self.assertIsNotNone(exp.tokens)
+        self.assertIsNotNone(exp.prompt_length)
+        # detokenize exp.tokens into text
+        prompt = self.tokenizer.decode(exp.tokens)
+        chosen = self.tokenizer.decode(exp.chosen)
+        rejected = self.tokenizer.decode(exp.rejected)
+        self.assertIn("What is your name?", prompt)
+        self.assertIn("My name is Assistant.", chosen)
+        self.assertIn("I don't have a favorite color.", rejected)
@@ -0,0 +1,89 @@
+import sys
+import unittest
+from unittest import mock
+
+from tests.tools import get_template_config
+from trinity.cli import launcher
+from trinity.common.constants import (
+    LOG_DIR_ENV_VAR,
+    LOG_LEVEL_ENV_VAR,
+    LOG_NODE_IP_ENV_VAR,
+    PLUGIN_DIRS_ENV_VAR,
+)
+
+
+class TestLauncherMain(unittest.TestCase):
+    def setUp(self):
+        self._orig_argv = sys.argv.copy()
+
+    def tearDown(self):
+        sys.argv = self._orig_argv
+
+    @mock.patch("trinity.cli.launcher.explore")
+    @mock.patch("trinity.cli.launcher.train")
+    @mock.patch("trinity.cli.launcher.both")
+    @mock.patch("trinity.cli.launcher.bench")
+    @mock.patch("trinity.cli.launcher.load_config")
+    def test_main_run_command(self, mock_load, mock_bench, mock_both, mock_train, mock_explore):
+        config = get_template_config()
+        mapping = {
+            "explore": mock_explore,
+            "train": mock_train,
+            "both": mock_both,
+            "bench": mock_bench,
+        }
+        for mode in ["explore", "train", "both", "bench"]:
+            config.mode = mode
+            mock_load.return_value = config
+            with mock.patch(
+                "argparse.ArgumentParser.parse_args",
+                return_value=mock.Mock(
+                    command="run", config="dummy.yaml", dlc=False, plugin_dir=None
+                ),
+            ):
+                launcher.main()
+            mock_load.assert_called_once_with("dummy.yaml")
+            mapping[mode].assert_called_once_with(config)
+            mock_load.reset_mock()
+            mapping[mode].reset_mock()
+
+    @mock.patch("trinity.cli.launcher.setup_ray_cluster")
+    @mock.patch("trinity.cli.launcher.both")
+    @mock.patch("trinity.cli.launcher.load_config")
+    def test_main_run_in_dlc(self, mock_load, mock_both, mock_setup):
+        config = get_template_config()
+        config.mode = "both"
+        config.log.level = "WARNING"
+        config.log.group_by_node = True
+        mock_load.return_value = config
+        with mock.patch(
+            "argparse.ArgumentParser.parse_args",
+            return_value=mock.Mock(
+                command="run", config="dummy.yaml", dlc=True, plugin_dir="/path/to/plugins"
+            ),
+        ):
+            launcher.main()
+        mock_load.assert_called_once_with("dummy.yaml")
+        mock_both.assert_called_once_with(config)
+        mock_setup.assert_called_once_with(
+            namespace=config.ray_namespace,
+            envs={
+                PLUGIN_DIRS_ENV_VAR: "/path/to/plugins",
+                LOG_DIR_ENV_VAR: config.log.save_dir,
+                LOG_LEVEL_ENV_VAR: "WARNING",
+                LOG_NODE_IP_ENV_VAR: "1",
+            },
+        )
+
+    @mock.patch("trinity.cli.launcher.studio")
+    def test_main_studio_command(self, mock_studio):
+        with mock.patch(
+            "argparse.ArgumentParser.parse_args",
+            return_value=mock.Mock(command="studio", port=9999),
+        ):
+            launcher.main()
+        mock_studio.assert_called_once_with(9999)
+
+
+if __name__ == "__main__":
+    unittest.main()