* after pre-commit

HYLcool · HYLcool · commit d5e46f38db99 · 2025-06-27T17:30:37.000+08:00
diff --git a/examples/grpo_gsm8k_experience_pipeline/dj_scoring_exp.yaml b/examples/grpo_gsm8k_experience_pipeline/dj_scoring_exp.yaml
@@ -8,4 +8,4 @@ process:
       api_or_hf_model: "qwen2.5-32b-instruct"  # use "qwen2.5-32b-instruct" to calculate the quality scores.
       min_score: 0.0
       input_keys: ["prompt_text", "prompt_text"]  # set input_keys and field_names to the existing key names in gsm-8k. Here calculating the difficulty scores according to both questions and answers.
-      field_names: ["prompt", "response"]
+      field_names: ["prompt", "response"]
diff --git a/tests/common/experience_test.py b/tests/common/experience_test.py
@@ -5,8 +5,8 @@
 
 import torch
 
-from trinity.common.experience import Experience, Experiences
 from trinity.buffer.schema.sql_schema import ExperienceModel
+from trinity.common.experience import Experience, Experiences
 
 db_url = os.path.join(os.path.dirname(__file__), "tmp", "test.db")
 dataset_path = os.path.join(os.path.dirname(__file__), "data")
diff --git a/trinity/cli/launcher.py b/trinity/cli/launcher.py
@@ -161,16 +161,21 @@ def activate_data_processor(data_processor_url: str, config_path: str):
         logger.error(f"Failed to activate data module: {res['return_msg']}.")
         return
 
+
 def stop_data_processor(base_data_processor_url: str):
     """Stop all pipelines in the data processor"""
     from trinity.cli.client import request
+
     logger.info(f"Stopping all pipelines in {base_data_processor_url}...")
-    res = request(url=f'{base_data_processor_url}/stop_all')
+    res = request(url=f"{base_data_processor_url}/stop_all")
     if res["return_code"] != 0:
         logger.error(f"Failed to stop all data pipelines: {res['return_msg']}.")
         return
 
-def validate_data_pipeline(data_pipeline_config: DataPipelineConfig, pipeline_type: DataProcessorPipelineType):
+
+def validate_data_pipeline(
+    data_pipeline_config: DataPipelineConfig, pipeline_type: DataProcessorPipelineType
+):
     """
     Check if the data pipeline is valid. The config should:
     1. Non-empty input buffer
@@ -205,9 +210,7 @@ def validate_data_pipeline(data_pipeline_config: DataPipelineConfig, pipeline_ty
         # No special items need to be checked.
         pass
     else:
-        logger.warning(
-            f'Invalid pipeline type: {pipeline_type}..'
-        )
+        logger.warning(f"Invalid pipeline type: {pipeline_type}..")
         return False
     return True
 
@@ -220,21 +223,27 @@ def run(config_path: str, dlc: bool = False, plugin_dir: str = None):
     # try to activate task pipeline for raw data
     data_processor_config = config.data_processor
     if (
-        data_processor_config.data_processor_url
-        and data_processor_config.task_pipeline
-        and validate_data_pipeline(data_processor_config.task_pipeline, DataProcessorPipelineType.TASK)
+        data_processor_config.data_processor_url is not None
+        and data_processor_config.task_pipeline is not None
+        and validate_data_pipeline(
+            data_processor_config.task_pipeline, DataProcessorPipelineType.TASK
+        )
     ):
         activate_data_processor(
-            f"{data_processor_config.data_processor_url}/{DataProcessorPipelineType.TASK.value}", config_path
+            f"{data_processor_config.data_processor_url}/{DataProcessorPipelineType.TASK.value}",
+            config_path,
         )
     # try to activate experience pipeline for experiences
     if (
-        data_processor_config.data_processor_url
-        and data_processor_config.experience_pipeline
-        and validate_data_pipeline(data_processor_config.experience_pipeline, DataProcessorPipelineType.EXPERIENCE)
+        data_processor_config.data_processor_url is not None
+        and data_processor_config.experience_pipeline is not None
+        and validate_data_pipeline(
+            data_processor_config.experience_pipeline, DataProcessorPipelineType.EXPERIENCE
+        )
     ):
         activate_data_processor(
-            f"{data_processor_config.data_processor_url}/{DataProcessorPipelineType.EXPERIENCE.value}", config_path
+            f"{data_processor_config.data_processor_url}/{DataProcessorPipelineType.EXPERIENCE.value}",
+            config_path,
         )
     if dlc:
         from trinity.utils.dlc_utils import setup_ray_cluster
@@ -268,7 +277,8 @@ def run(config_path: str, dlc: bool = False, plugin_dir: str = None):
             stop_ray_cluster(namespace=config.ray_namespace)
 
         # stop all pipelines
-        stop_data_processor(data_processor_config.data_processor_url)
+        if data_processor_config.data_processor_url is not None:
+            stop_data_processor(data_processor_config.data_processor_url)
 
 
 def studio(port: int = 8501):
diff --git a/trinity/common/config.py b/trinity/common/config.py
@@ -9,12 +9,12 @@
 from trinity.common.constants import (
     EXPLORER_NAME,
     TRAINER_NAME,
+    OpType,
     PromptType,
     ReadStrategy,
     StorageType,
     SyncMethod,
     TaskType,
-    OpType
 )
 from trinity.utils.log import get_logger
 
@@ -103,6 +103,7 @@ class StorageConfig:
     # ! DO NOT SET,  automatically set corresponding to train/eval
     task_type: TaskType = TaskType.EXPLORE
 
+
 @dataclass
 class RewardShapingConfig:
     """Config for reward shaping."""
@@ -111,6 +112,7 @@ class RewardShapingConfig:
     op_type: OpType = OpType.ADD
     weight: float = 1.0
 
+
 @dataclass
 class DataPipelineConfig:
     """Config for data pipeline."""
@@ -513,23 +515,35 @@ def _check_buffer(self) -> None:  # noqa: C901
             output_buffers = {}
             # - taskset
             if self.buffer.explorer_input.taskset.name:
-                input_buffers[self.buffer.explorer_input.taskset.name] = self.buffer.explorer_input.taskset
+                input_buffers[
+                    self.buffer.explorer_input.taskset.name
+                ] = self.buffer.explorer_input.taskset
             # - explorer output
             if self.buffer.explorer_output and self.buffer.explorer_output.name:
                 output_buffers[self.buffer.explorer_output.name] = self.buffer.explorer_output
             # - trainer input: experience buffer
-            if self.buffer.trainer_input.experience_buffer and self.buffer.trainer_input.experience_buffer.name:
-                input_buffers[self.buffer.trainer_input.experience_buffer.name] = self.buffer.trainer_input.experience_buffer
+            if (
+                self.buffer.trainer_input.experience_buffer
+                and self.buffer.trainer_input.experience_buffer.name
+            ):
+                input_buffers[
+                    self.buffer.trainer_input.experience_buffer.name
+                ] = self.buffer.trainer_input.experience_buffer
             # - trainer input: sft warmup dataset
-            if self.buffer.trainer_input.sft_warmup_dataset and self.buffer.trainer_input.sft_warmup_dataset.name:
-                input_buffers[self.buffer.trainer_input.sft_warmup_dataset.name] = self.buffer.trainer_input.sft_warmup_dataset
+            if (
+                self.buffer.trainer_input.sft_warmup_dataset
+                and self.buffer.trainer_input.sft_warmup_dataset.name
+            ):
+                input_buffers[
+                    self.buffer.trainer_input.sft_warmup_dataset.name
+                ] = self.buffer.trainer_input.sft_warmup_dataset
 
             # when experience pipeline is on, the explorer output and the
             # experience buffer of trainer input should be different
             if self.buffer.explorer_output == self.buffer.trainer_input.experience_buffer:
                 raise ValueError(
-                    f"The explorer output buffer should be different from the experience buffer of the trainer input "
-                    f"when experience pipeline is provided."
+                    "The explorer output buffer should be different from the experience buffer of the trainer input "
+                    "when experience pipeline is provided."
                 )
 
             # NOTICE: For now, input/output buffers for data processors should come from output/input buffers of trinity
@@ -552,7 +566,9 @@ def _check_buffer(self) -> None:  # noqa: C901
                     f"input buffers of trinity."
                 )
             else:
-                self.data_processor.experience_pipeline.output_buffer = input_buffers[exp_pipeline_output_buffers.name]
+                self.data_processor.experience_pipeline.output_buffer = input_buffers[
+                    exp_pipeline_output_buffers.name
+                ]
 
         # set read_batch_size / pad_token_id / tokenizer_path
         self.buffer.read_batch_size = self.buffer.batch_size * self.algorithm.repeat_times
diff --git a/trinity/common/constants.py b/trinity/common/constants.py
@@ -104,12 +104,14 @@ class RunningStatus(Enum):
     WAITING_SYNC = "waiting_sync"
     STOPPED = "stopped"
 
+
 class DataProcessorPipelineType(Enum):
     """Data processor pipeline type."""
 
     EXPERIENCE = "experience_pipeline"
     TASK = "task_pipeline"
 
+
 class OpType(Enum):
     """Operator type for reward shaping."""
 
diff --git a/trinity/data/controllers/active_iterator.py b/trinity/data/controllers/active_iterator.py
@@ -1,12 +1,12 @@
 import os
-import traceback
 import threading
+import traceback
+from functools import partial
 from numbers import Number
 from typing import Any, Dict, List, Union
-from functools import partial
-from data_juicer.utils.constant import Fields
 
 import ray
+from data_juicer.utils.constant import Fields
 
 from trinity.common.config import BufferConfig, DataPipelineConfig, RewardShapingConfig
 from trinity.common.constants import DataProcessorPipelineType, OpType
@@ -102,7 +102,7 @@ def __init__(
     def run(self, thread_event: threading.Event = None):
         """Run the active iterator."""
         # step 1. parse the dj config
-        logger.info('Parsing the Data-Juicer config...')
+        logger.info("Parsing the Data-Juicer config...")
         try:
             (
                 dj_config,
@@ -115,15 +115,15 @@ def run(self, thread_event: threading.Event = None):
             return 1, "config parsing failed."
 
         # step 2. prepare rft-dataset from the input buffers
-        logger.info('Preparing Rft-Dataset from input buffers...')
+        logger.info("Preparing Rft-Dataset from input buffers...")
         try:
             dataset = RftDataset(self.config, self.buffer_config)
         except Exception:
             traceback.print_exc()
             return 2, "RftDataset loading failed."
 
         # step 3. load processor
-        logger.info('Loading data processors...')
+        logger.info("Loading data processors...")
         try:
             if hit_cleaner:
                 cleaner = DataCleaner(
@@ -151,7 +151,7 @@ def run(self, thread_event: threading.Event = None):
                 break
 
             # step 4. load data from the input buffers for the next batch
-            logger.info('Loading data from input buffers for the next batch...')
+            logger.info("Loading data from input buffers for the next batch...")
             try:
                 dataset.read_from_buffer()
             except StopIteration:
@@ -161,7 +161,7 @@ def run(self, thread_event: threading.Event = None):
                 return 4, "RftDataset loading from buffers failed."
 
             # step 5. apply processors to calculate scores of different dimensions
-            logger.info('Applying data processors to calculate stats...')
+            logger.info("Applying data processors to calculate stats...")
             try:
                 res_dataset = dataset
                 if hit_cleaner:
@@ -177,7 +177,7 @@ def run(self, thread_event: threading.Event = None):
             # step 6. calculate the average and final scores, including priority
             try:
                 if hit_cleaner:
-                    logger.info('Calculating the average and final scores...')
+                    logger.info("Calculating the average and final scores...")
                     scored_dataset = self._group_scores(res_dataset)
                     scored_dataset = self._compute_priority_scores(scored_dataset)
                 else:
@@ -188,7 +188,11 @@ def run(self, thread_event: threading.Event = None):
 
             # step 7. reward shaping. Only available for experience pipeline and the reward shaping config is set
             try:
-                if self.pipeline_type == DataProcessorPipelineType.EXPERIENCE and len(self.config.reward_shaping) > 0:
+                if (
+                    self.pipeline_type == DataProcessorPipelineType.EXPERIENCE
+                    and self.config.reward_shaping is not None
+                    and len(self.config.reward_shaping) > 0
+                ):
                     logger.info("Rewarding shaping...")
                     reshaped_dataset = self._reward_shaping(scored_dataset)
                 else:
@@ -215,7 +219,7 @@ def run(self, thread_event: threading.Event = None):
 
             # step 10. export the result to the output buffer
             try:
-                logger.info('Writing processed data to output buffer...')
+                logger.info("Writing processed data to output buffer...")
                 res_dataset.write_to_buffer()
             except Exception:
                 traceback.print_exc()
@@ -325,13 +329,21 @@ def _reward_shaping_single(self, sample, reward_shaping_config: RewardShapingCon
         if tgt_stats not in sample[Fields.stats]:
             return sample
         if op_type == OpType.ADD:
-            sample[self.config.format.reward_key] += reward_shaping_config.weight * sample[Fields.stats][tgt_stats]
+            sample[self.config.format.reward_key] += (
+                reward_shaping_config.weight * sample[Fields.stats][tgt_stats]
+            )
         elif op_type == OpType.MUL:
-            sample[self.config.format.reward_key] *= reward_shaping_config.weight * sample[Fields.stats][tgt_stats]
+            sample[self.config.format.reward_key] *= (
+                reward_shaping_config.weight * sample[Fields.stats][tgt_stats]
+            )
         elif op_type == OpType.SUB:
-            sample[self.config.format.reward_key] -= reward_shaping_config.weight * sample[Fields.stats][tgt_stats]
+            sample[self.config.format.reward_key] -= (
+                reward_shaping_config.weight * sample[Fields.stats][tgt_stats]
+            )
         elif op_type == OpType.DIV:
-            sample[self.config.format.reward_key] /= reward_shaping_config.weight * sample[Fields.stats][tgt_stats]
+            sample[self.config.format.reward_key] /= (
+                reward_shaping_config.weight * sample[Fields.stats][tgt_stats]
+            )
         return sample
 
     def _reward_shaping(self, rft_dataset: RftDataset) -> RftDataset:
@@ -342,7 +354,9 @@ def _reward_shaping(self, rft_dataset: RftDataset) -> RftDataset:
         # get reward shaping configs
         reward_shaping_configs = self.config.reward_shaping
         for reward_shaping_config in reward_shaping_configs:
-            dataset = dataset.map(partial(self._reward_shaping_single, reward_shaping_config=reward_shaping_config))
+            dataset = dataset.map(
+                partial(self._reward_shaping_single, reward_shaping_config=reward_shaping_config)
+            )
 
         rft_dataset.data = dataset
         return rft_dataset
diff --git a/trinity/data/core/dataset.py b/trinity/data/core/dataset.py
@@ -1,22 +1,24 @@
 from abc import ABC
-from dataclasses import dataclass, fields, asdict
+from dataclasses import asdict, dataclass, fields
 from typing import Any, Dict, List, Optional, Union
 
 import networkx as nx
 from datasets import Dataset, concatenate_datasets
 
 from trinity.buffer import get_buffer_reader, get_buffer_writer
-from trinity.common.config import BufferConfig, DataPipelineConfig, StorageConfig
+from trinity.common.config import BufferConfig, DataPipelineConfig
 from trinity.data.core.formatter import BaseDataFormatter
 from trinity.utils.log import get_logger
 
 logger = get_logger(__name__)
 
+
 def dict_to_dataclass(cls, d):
     valid_keys = {f.name for f in fields(cls)}
     filtered = {k: v for k, v in d.items() if k in valid_keys}
     return cls(**filtered)
 
+
 @dataclass
 class RewardSchema:
     """Schema for reward related fields"""
diff --git a/trinity/data/processors/cleaner.py b/trinity/data/processors/cleaner.py
@@ -35,7 +35,7 @@ def __init__(
         dj_cfg: Optional[Namespace],
         clean_strategy: str = "iterative",
         min_size_ratio: PositiveFloat = None,
-        data_dist: str = "gaussian",
+        data_dist: Optional[str] = "gaussian",
         op_weights: dict = None,
         **kwargs,
     ):
diff --git a/trinity/data/server.py b/trinity/data/server.py
@@ -1,16 +1,18 @@
-import fire
 import threading
+from typing import List
+
+import fire
 import ray
 from flask import Flask, jsonify, request
 from markupsafe import escape
-from typing import List
 
 app = Flask(__name__)
 
 APP_NAME = "data_processor"
 
 EVNET_POOL: List[threading.Event] = []
 
+
 @app.route(f"/{APP_NAME}/<pipeline_type>", methods=["GET"])
 def data_processor(pipeline_type):
     from trinity.common.config import load_config
@@ -57,13 +59,15 @@ def data_processor(pipeline_type):
         EVNET_POOL.append(event)
         return jsonify({"return_code": 0, "message": "Experience pipeline starts successfully."})
 
+
 @app.route(f"/{APP_NAME}/stop_all", methods=["GET"])
 def stop_all():
     try:
         for event in EVNET_POOL:
             event.set()
-    except:
+    except Exception:
         import traceback
+
         traceback.print_exc()
         return jsonify({"return_code": 1, "message": traceback.format_exc()})
     return jsonify({"return_code": 0, "message": "All data pipelines are stopped."})