Skip to content

Commit 7e751d5

Browse files
authored
fix dataset copy (#1569)
1 parent ebfb636 commit 7e751d5

File tree

1 file changed

+4
-11
lines changed

1 file changed

+4
-11
lines changed

swift/llm/utils/dataset.py

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2582,18 +2582,11 @@ def load_dataset_from_local(dataset_path_list: Optional[Union[str, List[str]]],
25822582
assert isinstance(dataset_path, str)
25832583
df: DataFrame
25842584
if dataset_path.endswith('.csv'):
2585-
df = pd.read_csv(dataset_path, na_filter=False, dtype=str)
2586-
elif dataset_path.endswith('.jsonl'):
2587-
df = transform_jsonl_to_df(read_from_jsonl(dataset_path))
2588-
elif dataset_path.endswith('.json'):
2589-
with open(dataset_path, 'r', encoding='utf-8') as f:
2590-
obj_list = json.load(f)
2591-
df = transform_jsonl_to_df(obj_list)
2585+
dataset = HfDataset.from_csv(dataset_path, na_filter=False)
2586+
elif dataset_path.endswith('.jsonl') or dataset_path.endswith('.json'):
2587+
dataset = HfDataset.from_json(dataset_path)
25922588
else:
2593-
raise ValueError('The custom dataset only supports CSV, JSONL or JSON format. You can refer to the link '
2594-
'`https://github.com/modelscope/swift/blob/main/docs/source/LLM/自定义与拓展.md#注册数据集的方式` '
2595-
'for more information.')
2596-
dataset = HfDataset.from_dict(df.to_dict(orient='list'))
2589+
raise ValueError('The custom dataset only supports CSV, JSONL or JSON format.')
25972590
dataset_list.append(preprocess_func(dataset))
25982591
return concatenate_datasets(dataset_list)
25992592

0 commit comments

Comments
 (0)