@@ -986,20 +986,27 @@ def _preprocess_msagent_multirole_dataset(dataset: HfDataset) -> HfDataset:
986986 只根据对话历史进行回复\n 3. 长话短说,不要说太多话,不要超过50字 """
987987 history_prompt = '\n \n 【chat history】'
988988 conv_prompt = '\n {name}:{content}'
989- query = []
990- response = []
989+ system , query , response = [], [], []
990+
991+ def process_conversation (conv ):
992+ query , response = '' , conv [- 1 ]['value' ]
993+ system = conv [0 ]['value' ] if conv [0 ]['from' ] != 'user' else ''
994+ if conv [0 ]['from' ] == 'user' :
995+ query = conv [0 ]['value' ]
996+ elif 'next_speakers:' not in system :
997+ if '【注意事项】' not in system and system :
998+ system += res_prompt
999+ system += history_prompt
1000+ system += '' .join ([conv_prompt .format (name = c ['from' ], content = c ['value' ]) for c in conv [1 :- 1 ]])
1001+
1002+ return system , query , response
9911003
9921004 for d in dataset :
993- conv = d ['conversations' ]
994- system = conv [0 ]['value' ]
995- if '【注意事项】' not in system :
996- system += res_prompt
997- system += history_prompt
998- response .append (conv [- 1 ]['value' ])
999- for i in range (1 , len (conv ) - 1 ):
1000- system += conv_prompt .format (name = conv [i ]['from' ], content = conv [i ]['value' ])
1001- query .append (system )
1002- return HfDataset .from_dict ({'query' : query , 'response' : response })
1005+ sys , qry , resp = process_conversation (d ['conversations' ])
1006+ system .append (sys )
1007+ query .append (qry )
1008+ response .append (resp )
1009+ return HfDataset .from_dict ({'system' : system , 'query' : query , 'response' : response })
10031010
10041011
10051012register_dataset (
0 commit comments