Skip to content

Commit 86188e2

Browse files
authored
support ms-agent-roleplay dataset (#936)
1 parent 2455bc2 commit 86188e2

File tree

1 file changed

+19
-12
lines changed

1 file changed

+19
-12
lines changed

swift/llm/utils/dataset.py

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -986,20 +986,27 @@ def _preprocess_msagent_multirole_dataset(dataset: HfDataset) -> HfDataset:
986986
只根据对话历史进行回复\n3. 长话短说,不要说太多话,不要超过50字 """
987987
history_prompt = '\n\n【chat history】'
988988
conv_prompt = '\n {name}:{content}'
989-
query = []
990-
response = []
989+
system, query, response = [], [], []
990+
991+
def process_conversation(conv):
992+
query, response = '', conv[-1]['value']
993+
system = conv[0]['value'] if conv[0]['from'] != 'user' else ''
994+
if conv[0]['from'] == 'user':
995+
query = conv[0]['value']
996+
elif 'next_speakers:' not in system:
997+
if '【注意事项】' not in system and system:
998+
system += res_prompt
999+
system += history_prompt
1000+
system += ''.join([conv_prompt.format(name=c['from'], content=c['value']) for c in conv[1:-1]])
1001+
1002+
return system, query, response
9911003

9921004
for d in dataset:
993-
conv = d['conversations']
994-
system = conv[0]['value']
995-
if '【注意事项】' not in system:
996-
system += res_prompt
997-
system += history_prompt
998-
response.append(conv[-1]['value'])
999-
for i in range(1, len(conv) - 1):
1000-
system += conv_prompt.format(name=conv[i]['from'], content=conv[i]['value'])
1001-
query.append(system)
1002-
return HfDataset.from_dict({'query': query, 'response': response})
1005+
sys, qry, resp = process_conversation(d['conversations'])
1006+
system.append(sys)
1007+
query.append(qry)
1008+
response.append(resp)
1009+
return HfDataset.from_dict({'system': system, 'query': query, 'response': response})
10031010

10041011

10051012
register_dataset(

0 commit comments

Comments
 (0)