Skip to content

Commit 4497838

Browse files
authored
Merge pull request #305 from shijinpjlab/dev_1218
feat: support excel
2 parents 2fa6fde + a691469 commit 4497838

File tree

8 files changed

+833
-85
lines changed

8 files changed

+833
-85
lines changed

dingo/config/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
from dingo.config.input_args import (DatasetArgs, DatasetFieldArgs, DatasetHFConfigArgs, DatasetS3ConfigArgs, DatasetSqlArgs, EvalPipline, EvalPiplineConfig, EvaluatorLLMArgs, # noqa E402.
2-
EvaluatorRuleArgs, ExecutorArgs, ExecutorResultSaveArgs, InputArgs)
1+
from dingo.config.input_args import (DatasetArgs, DatasetExcelArgs, DatasetFieldArgs, DatasetHFConfigArgs, DatasetS3ConfigArgs, DatasetSqlArgs, EvalPipline, EvalPiplineConfig, # noqa E402.
2+
EvaluatorLLMArgs, EvaluatorRuleArgs, ExecutorArgs, ExecutorResultSaveArgs, InputArgs)

dingo/config/input_args.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,11 @@ class DatasetSqlArgs(BaseModel):
2727
connect_args: str = '' # 连接参数,如 ?charset=utf8mb4
2828

2929

30+
class DatasetExcelArgs(BaseModel):
31+
sheet_name: str | int = 0 # 默认读取第一个工作表
32+
has_header: bool = True # 第一行是否为列名,False 则使用列序号作为列名
33+
34+
3035
class DatasetFieldArgs(BaseModel):
3136
id: str = ''
3237
prompt: str = ''
@@ -43,6 +48,7 @@ class DatasetArgs(BaseModel):
4348
hf_config: DatasetHFConfigArgs = DatasetHFConfigArgs()
4449
s3_config: DatasetS3ConfigArgs = DatasetS3ConfigArgs()
4550
sql_config: DatasetSqlArgs = DatasetSqlArgs()
51+
excel_config: DatasetExcelArgs = DatasetExcelArgs()
4652

4753

4854
class ExecutorResultSaveArgs(BaseModel):

dingo/data/converter/base.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,25 @@ def _convert(raw: Union[str, Dict]):
261261
return _convert
262262

263263

264+
@BaseConverter.register("excel")
265+
class ExcelConverter(BaseConverter):
266+
"""Excel file converter."""
267+
268+
def __init__(self):
269+
super().__init__()
270+
271+
@classmethod
272+
def convertor(cls, input_args: InputArgs) -> Callable:
273+
def _convert(raw: Union[str, Dict]):
274+
j = raw
275+
if isinstance(raw, str):
276+
j = json.loads(raw)
277+
data_dict = j
278+
return Data(**data_dict)
279+
280+
return _convert
281+
282+
264283
@BaseConverter.register("listjson")
265284
class ListJsonConverter(BaseConverter):
266285
"""List json file converter."""

0 commit comments

Comments
 (0)