Skip to content

Commit 97a83cc

Browse files
shijinpjlabtenwanft
authored andcommitted
feat: support excel
1 parent 989415d commit 97a83cc

File tree

6 files changed

+786
-84
lines changed

6 files changed

+786
-84
lines changed

dingo/config/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
from dingo.config.input_args import (DatasetArgs, DatasetFieldArgs, DatasetHFConfigArgs, DatasetS3ConfigArgs, DatasetSqlArgs, EvalPipline, EvalPiplineConfig, EvaluatorLLMArgs, # noqa E402.
1+
from dingo.config.input_args import (DatasetArgs, DatasetExcelArgs, DatasetFieldArgs, DatasetHFConfigArgs, DatasetS3ConfigArgs, DatasetSqlArgs, EvalPipline, EvalPiplineConfig, EvaluatorLLMArgs, # noqa E402.
22
EvaluatorRuleArgs, ExecutorArgs, ExecutorResultSaveArgs, InputArgs)

dingo/config/input_args.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,11 @@ class DatasetSqlArgs(BaseModel):
2727
connect_args: str = '' # 连接参数,如 ?charset=utf8mb4
2828

2929

30+
class DatasetExcelArgs(BaseModel):
31+
sheet_name: str | int = 0 # 默认读取第一个工作表
32+
has_header: bool = True # 第一行是否为列名,False 则使用列序号作为列名
33+
34+
3035
class DatasetFieldArgs(BaseModel):
3136
id: str = ''
3237
prompt: str = ''
@@ -43,6 +48,7 @@ class DatasetArgs(BaseModel):
4348
hf_config: DatasetHFConfigArgs = DatasetHFConfigArgs()
4449
s3_config: DatasetS3ConfigArgs = DatasetS3ConfigArgs()
4550
sql_config: DatasetSqlArgs = DatasetSqlArgs()
51+
excel_config: DatasetExcelArgs = DatasetExcelArgs()
4652

4753

4854
class ExecutorResultSaveArgs(BaseModel):

dingo/data/converter/base.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,27 @@ def _convert(raw: Union[str, Dict]):
261261
return _convert
262262

263263

264+
@BaseConverter.register("excel")
265+
class ExcelConverter(BaseConverter):
266+
"""Excel file converter."""
267+
268+
def __init__(self):
269+
super().__init__()
270+
271+
@classmethod
272+
def convertor(cls, input_args: InputArgs) -> Callable:
273+
def _convert(raw: Union[str, Dict]):
274+
j = raw
275+
if isinstance(raw, str):
276+
j = json.loads(raw)
277+
# 将 Excel 行数据作为 JSON 字符串放入 content 属性
278+
# 这样可以与其他数据格式保持一致的数据结构
279+
data_dict = {"content": json.dumps(j, ensure_ascii=False)}
280+
return Data(**data_dict)
281+
282+
return _convert
283+
284+
264285
@BaseConverter.register("listjson")
265286
class ListJsonConverter(BaseConverter):
266287
"""List json file converter."""

0 commit comments

Comments
 (0)