* update configs of exp_pipeline

HYLcool · HYLcool · commit d9d47736c6cf · 2025-06-27T10:32:28.000+08:00
diff --git a/examples/grpo_gsm8k_experience_pipeline/dj_scoring_exp.yaml b/examples/grpo_gsm8k_experience_pipeline/dj_scoring_exp.yaml
@@ -4,5 +4,6 @@ project_name: 'gsm-8k-experience-quality'
 process:
   - llm_quality_score_filter:
       api_or_hf_model: "qwen2.5-32b-instruct"  # use "qwen2.5-32b-instruct" to calculate the quality scores.
-      input_keys: ["prompt", "response"]  # set input_keys and field_names to the existing key names in gsm-8k. Here calculating the difficulty scores according to both questions and answers.
+      min_score: 0.0
+      input_keys: ["prompt_text", "prompt_text"]  # set input_keys and field_names to the existing key names in gsm-8k. Here calculating the difficulty scores according to both questions and answers.
       field_names: ["prompt", "response"]
diff --git a/examples/grpo_gsm8k_experience_pipeline/gsm8k.yaml b/examples/grpo_gsm8k_experience_pipeline/gsm8k.yaml
@@ -11,20 +11,19 @@ data_processor:
     # I/O buffers
     input_buffers:
       - name: gsm8k_exp_output
-        storage_type: queue
-        path: 'sqlite:///gsm8k_exp_output.db'
     output_buffer:
       name: reshaped_gsm8k_exp_input
-      storage_type: queue
-      path: 'sqlite:///reshaped_gsm8k_exp_input.db'
     # format mapping
     format:
-      prompt_key: 'prompt'
-      response_key: 'response'
-      reward_key: 'reward'
+      reward_key: 'reward'  # the key name of the reward in the experience
     # data active iterator related
     dj_config_path: 'examples/grpo_gsm8k_experience_pipeline/dj_scoring_exp.yaml'
     clean_strategy: 'iterative'
+    # reward shaping
+    reward_shaping:
+      - stats_key: 'llm_quality_score'
+        op_type: ADD
+        weight: 1.0
 
 model:
   model_path: /PATH/TO/MODEL/