Skip to content

Commit 35d220b

Browse files
shijinpjlabtenwanft
authored andcommitted
feat: fix lint
1 parent 97a83cc commit 35d220b

File tree

2 files changed

+25
-26
lines changed

2 files changed

+25
-26
lines changed

dingo/config/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
from dingo.config.input_args import (DatasetArgs, DatasetExcelArgs, DatasetFieldArgs, DatasetHFConfigArgs, DatasetS3ConfigArgs, DatasetSqlArgs, EvalPipline, EvalPiplineConfig, EvaluatorLLMArgs, # noqa E402.
2-
EvaluatorRuleArgs, ExecutorArgs, ExecutorResultSaveArgs, InputArgs)
1+
from dingo.config.input_args import (DatasetArgs, DatasetExcelArgs, DatasetFieldArgs, DatasetHFConfigArgs, DatasetS3ConfigArgs, DatasetSqlArgs, EvalPipline, EvalPiplineConfig, # noqa E402.
2+
EvaluatorLLMArgs, EvaluatorRuleArgs, ExecutorArgs, ExecutorResultSaveArgs, InputArgs)

dingo/data/datasource/local.py

Lines changed: 23 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
import os
21
import json
2+
import os
33
from typing import Any, Dict, Generator, List, Optional
44

55
from dingo.config import InputArgs
@@ -72,10 +72,10 @@ def _load_excel_file_xlsx(self, path: str) -> Generator[str, None, None]:
7272
try:
7373
# 使用只读模式加载工作簿,节省内存
7474
wb = load_workbook(filename=path, read_only=True, data_only=True)
75-
75+
7676
sheet_name = self.input_args.dataset.excel_config.sheet_name
7777
has_header = self.input_args.dataset.excel_config.has_header
78-
78+
7979
# 选择工作表
8080
if isinstance(sheet_name, str):
8181
if sheet_name not in wb.sheetnames:
@@ -90,15 +90,15 @@ def _load_excel_file_xlsx(self, path: str) -> Generator[str, None, None]:
9090

9191
# 获取所有行的迭代器
9292
rows = ws.iter_rows(values_only=True)
93-
93+
9494
# 处理标题行
9595
if has_header:
9696
# 读取第一行作为标题
9797
headers = next(rows, None)
9898
if headers is None:
9999
wb.close()
100100
raise RuntimeError(f'Excel file "{path}" is empty')
101-
101+
102102
# 将标题转换为列表,处理 None 值
103103
headers = [str(h) if h is not None else f'Column_{i}' for i, h in enumerate(headers)]
104104
else:
@@ -107,34 +107,34 @@ def _load_excel_file_xlsx(self, path: str) -> Generator[str, None, None]:
107107
if first_row is None:
108108
wb.close()
109109
raise RuntimeError(f'Excel file "{path}" is empty')
110-
110+
111111
# 使用列序号作为列名
112112
headers = [str(i) for i in range(len(first_row))]
113-
113+
114114
# 处理第一行数据
115115
if not all(cell is None for cell in first_row):
116116
row_dict = {}
117117
for i, (header, value) in enumerate(zip(headers, first_row)):
118118
row_dict[header] = value if value is not None else ""
119119
yield json.dumps(row_dict, ensure_ascii=False) + '\n'
120-
120+
121121
# 逐行读取数据并转换为 JSON
122122
for row in rows:
123123
# 跳过空行
124124
if all(cell is None for cell in row):
125125
continue
126-
126+
127127
# 将行数据与标题组合成字典
128128
row_dict = {}
129129
for i, (header, value) in enumerate(zip(headers, row)):
130130
# 处理值为 None 的情况
131131
row_dict[header] = value if value is not None else ""
132-
132+
133133
# 转换为 JSON 字符串并 yield
134134
yield json.dumps(row_dict, ensure_ascii=False) + '\n'
135-
135+
136136
wb.close()
137-
137+
138138
except Exception as e:
139139
raise RuntimeError(
140140
f'Failed to read .xlsx file "{path}": {str(e)}. '
@@ -161,10 +161,10 @@ def _load_excel_file_xls(self, path: str) -> Generator[str, None, None]:
161161
try:
162162
# 打开工作簿
163163
wb = xlrd.open_workbook(path, on_demand=True)
164-
164+
165165
sheet_name = self.input_args.dataset.excel_config.sheet_name
166166
has_header = self.input_args.dataset.excel_config.has_header
167-
167+
168168
# 选择工作表
169169
if isinstance(sheet_name, str):
170170
try:
@@ -180,38 +180,38 @@ def _load_excel_file_xls(self, path: str) -> Generator[str, None, None]:
180180

181181
if ws.nrows == 0:
182182
raise RuntimeError(f'Excel file "{path}" is empty')
183-
183+
184184
# 处理标题行
185185
start_row = 0
186186
if has_header:
187187
# 读取第一行作为标题
188-
headers = [str(cell.value) if cell.value is not None else f'Column_{i}'
188+
headers = [str(cell.value) if cell.value is not None else f'Column_{i}'
189189
for i, cell in enumerate(ws.row(0))]
190190
start_row = 1
191191
else:
192192
# 使用列序号作为列名
193193
headers = [str(i) for i in range(ws.ncols)]
194194
start_row = 0
195-
195+
196196
# 逐行读取数据并转换为 JSON
197197
for row_idx in range(start_row, ws.nrows):
198198
row = ws.row(row_idx)
199-
199+
200200
# 跳过空行
201201
if all(cell.value is None or cell.value == '' for cell in row):
202202
continue
203-
203+
204204
# 将行数据与标题组合成字典
205205
row_dict = {}
206206
for i, (header, cell) in enumerate(zip(headers, row)):
207207
# 处理值为 None 或空的情况
208208
row_dict[header] = cell.value if cell.value is not None else ""
209-
209+
210210
# 转换为 JSON 字符串并 yield
211211
yield json.dumps(row_dict, ensure_ascii=False) + '\n'
212-
212+
213213
wb.release_resources()
214-
214+
215215
except Exception as e:
216216
raise RuntimeError(
217217
f'Failed to read .xls file "{path}": {str(e)}. '
@@ -229,7 +229,7 @@ def _load_local_file(self) -> Generator[str, None, None]:
229229

230230
if not os.path.exists(self.path):
231231
raise RuntimeError(f'"{self.path}" is not a valid path')
232-
232+
233233
f_list = []
234234
if os.path.exists(self.path) and os.path.isfile(self.path):
235235
f_list = [self.path]
@@ -284,4 +284,3 @@ def _load_local_file(self) -> Generator[str, None, None]:
284284
f'Unexpected error reading file "{f}": {str(e)}. '
285285
f'Please check if the file exists and is readable.'
286286
)
287-

0 commit comments

Comments
 (0)