@@ -409,7 +409,7 @@ def _repair_agent_conversations(conversations: str,
409409
410410advertise_gen_prompt = """Task: Generating advertisements based on keywords.
411411Keywords: {query}
412- Advertisements: """
412+ Advertisements:"""
413413register_dataset (
414414 DatasetName .advertise_gen_zh ,
415415 'lvjianjin/AdvertiseGen' , ['train' ], ['validation' ],
@@ -513,7 +513,7 @@ def _preprocess_dureader_robust(dataset: HfDataset) -> HfDataset:
513513 prompt = """Task: Question Generation
514514Context: {context}
515515Answer: {answer}
516- Question: """
516+ Question:"""
517517 query = []
518518 response = []
519519 for d in dataset :
@@ -850,7 +850,7 @@ def _preprocess_hc3(dataset: HfDataset) -> HfDataset:
850850Question: {question}
851851Answer: {answer}
852852Category: Human, ChatGPT
853- Output: """
853+ Output:"""
854854 query = []
855855 response = []
856856 for d in dataset :
@@ -978,6 +978,9 @@ def add_self_cognition_dataset(
978978 return concatenate_datasets ([train_dataset , dataset ])
979979
980980
981+ NoneType = type (None )
982+
983+
981984def _check_dataset (
982985 dataset : Optional [None ],
983986 check_dataset_strategy : Literal ['none' , 'discard' , 'error' , 'warning' ]
@@ -1003,7 +1006,7 @@ def _check_dataset(
10031006 continue
10041007 else :
10051008 raise ValueError (f"d['response']: { d ['response' ]} , i: { i } " )
1006- if has_query and not isinstance (d ['response ' ], str ):
1009+ if has_query and not isinstance (d ['query ' ], ( str , NoneType ) ):
10071010 is_modified = True
10081011 if check_dataset_strategy == 'discard' :
10091012 continue
@@ -1012,7 +1015,7 @@ def _check_dataset(
10121015 continue
10131016 else :
10141017 raise ValueError (f"d['query']: { d ['query' ]} , i: { i } " )
1015- if has_history and not isinstance (d ['history' ], (list , type ( None ) )):
1018+ if has_history and not isinstance (d ['history' ], (list , NoneType )):
10161019 is_modified = True
10171020 if check_dataset_strategy == 'discard' :
10181021 continue
@@ -1021,7 +1024,7 @@ def _check_dataset(
10211024 continue
10221025 else :
10231026 raise ValueError (f"d['history']: { d ['history' ]} , i: { i } " )
1024- if has_system and not isinstance (d ['system' ], str ):
1027+ if has_system and not isinstance (d ['system' ], ( str , NoneType ) ):
10251028 is_modified = True
10261029 if check_dataset_strategy == 'discard' :
10271030 continue
0 commit comments