-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprepare_rl_data.py
More file actions
37 lines (32 loc) · 1.01 KB
/
prepare_rl_data.py
File metadata and controls
37 lines (32 loc) · 1.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import json
from datasets import Dataset
def load_rl_dataset():
"""
Build a prompt-only dataset for GRPO training.
Each entry has:
- prompt: the user question (in chat format)
- faq_id: which FAQ this question maps to (for factual rewards)
- topic: the topic category
"""
# Load the training data from Part 1
examples = []
with open("train.jsonl") as f:
for line in f:
ex = json.loads(line)
messages = ex["messages"]
# Extract the system + user messages as the prompt
prompt = [
messages[0], # system message
messages[1], # user message
]
examples.append({
"prompt": prompt,
"faq_id": str(ex["faq_id"]),
"topic": ex["topic"],
})
dataset = Dataset.from_list(examples)
print(f"RL dataset: {len(dataset)} prompts")
return dataset
if __name__ == "__main__":
ds = load_rl_dataset()
print(ds[0])