Skip to content

Commit d9d4773

Browse files
committed
* update configs of exp_pipeline
1 parent e359179 commit d9d4773

File tree

2 files changed

+8
-8
lines changed

2 files changed

+8
-8
lines changed

examples/grpo_gsm8k_experience_pipeline/dj_scoring_exp.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,6 @@ project_name: 'gsm-8k-experience-quality'
44
process:
55
- llm_quality_score_filter:
66
api_or_hf_model: "qwen2.5-32b-instruct" # use "qwen2.5-32b-instruct" to calculate the quality scores.
7-
input_keys: ["prompt", "response"] # set input_keys and field_names to the existing key names in gsm-8k. Here calculating the difficulty scores according to both questions and answers.
7+
min_score: 0.0
8+
input_keys: ["prompt_text", "prompt_text"] # set input_keys and field_names to the existing key names in gsm-8k. Here calculating the difficulty scores according to both questions and answers.
89
field_names: ["prompt", "response"]

examples/grpo_gsm8k_experience_pipeline/gsm8k.yaml

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,20 +11,19 @@ data_processor:
1111
# I/O buffers
1212
input_buffers:
1313
- name: gsm8k_exp_output
14-
storage_type: queue
15-
path: 'sqlite:///gsm8k_exp_output.db'
1614
output_buffer:
1715
name: reshaped_gsm8k_exp_input
18-
storage_type: queue
19-
path: 'sqlite:///reshaped_gsm8k_exp_input.db'
2016
# format mapping
2117
format:
22-
prompt_key: 'prompt'
23-
response_key: 'response'
24-
reward_key: 'reward'
18+
reward_key: 'reward' # the key name of the reward in the experience
2519
# data active iterator related
2620
dj_config_path: 'examples/grpo_gsm8k_experience_pipeline/dj_scoring_exp.yaml'
2721
clean_strategy: 'iterative'
22+
# reward shaping
23+
reward_shaping:
24+
- stats_key: 'llm_quality_score'
25+
op_type: ADD
26+
weight: 1.0
2827

2928
model:
3029
model_path: /PATH/TO/MODEL/

0 commit comments

Comments
 (0)