Skip to content

Commit 890d49d

Browse files
committed
changed raft_dataset.py
1 parent af53ee0 commit 890d49d

File tree

4 files changed

+116
-10
lines changed

4 files changed

+116
-10
lines changed

recipes/finetuning/datasets/raft_dataset.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -64,21 +64,24 @@ def tokenize_dialog(dialog, tokenizer):
6464

6565
return dict(combined_tokens, attention_mask=[1]*len(combined_tokens["input_ids"]))
6666
def raft_tokenize(q_a_pair, tokenizer):
67-
# last line is the question
68-
question = q_a_pair["instruction"].split('\n')[-1]
69-
# all the lines before the last line are the context
70-
documents = q_a_pair["instruction"].split('\n')[:-1]
67+
end_tag = "<\/DOCUMENT>\n"
68+
# find the last end_tag in the instruction, the rest is the question
69+
index =q_a_pair["instruction"].rindex("<\/DOCUMENT>\n")+len(end_tag)
70+
question = q_a_pair["instruction"][index:]
71+
# all the lines before end_tag are the context
72+
documents = q_a_pair["instruction"][:index]
7173
# output is the label
7274
answer = q_a_pair["output"]
7375
system_prompt = "You are a helpful chatbot who can provide an answer to every questions from the user given a relevant context."
7476
user_prompt = """
7577
Question: {question}\nContext: {context}\n
76-
Answer this question using the information given multiple documents in the context above. Here is things to pay attention to:
78+
Answer this question using the information given by multiple documents in the context above. Here are things to pay attention to:
79+
- The context contains many documents, each document starts with <DOCUMENT> and ends </DOCUMENT>.
7780
- First provide step-by-step reasoning on how to answer the question.
7881
- In the reasoning, if you need to copy paste some sentences from the context, include them in ##begin_quote## and ##end_quote##. This would mean that things outside of ##begin_quote## and ##end_quote## are not directly copy paste from the context.
79-
- End your response with final answer in the form <ANSWER>: $answer, the answer should be succinct.
80-
You MUST begin your final answer with the tag "<ANSWER>:".
81-
""".format(question=question, context=str(documents))
82+
- End your response with final answer in the form <ANSWER>: $answer, the answer should less than 60 words.
83+
You MUST begin your final answer with the tag "<ANSWER>
84+
""".format(question=question, context=documents)
8285

8386
chat = [
8487
{"role": "system", "content": system_prompt},

0 commit comments

Comments
 (0)