Skip to content

Commit 3bf0567

Browse files
committed
feat: update by gemini assist
1 parent d060e63 commit 3bf0567

File tree

1 file changed

+14
-22
lines changed

1 file changed

+14
-22
lines changed

dingo/data/datasource/local.py

Lines changed: 14 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -188,36 +188,28 @@ def _load_csv_file(self, path: str) -> Generator[str, None, None]:
188188
raise RuntimeError(f'CSV file "{path}" is empty')
189189

190190
if has_header:
191-
# 第一行作为列名
191+
# The first row is the header
192192
headers = [str(h).strip() if h else f'column_{i}' for i, h in enumerate(first_row)]
193+
data_rows = csv_reader
193194
else:
194-
# 不使用标题行,使用 column_x 格式
195+
# Generate headers and treat the first row as data
196+
from itertools import chain
195197
headers = [f'column_{i}' for i in range(len(first_row))]
196-
first_row_data = first_row # 保存第一行数据,稍后处理
198+
data_rows = chain([first_row], csv_reader)
197199

198-
# 如果第一行是数据(has_header=False),先处理它
199-
if first_row_data is not None:
200-
row_dict = {}
201-
for i, (header, value) in enumerate(zip(headers, first_row_data)):
202-
row_dict[header] = value.strip() if value else ""
203-
yield json.dumps(row_dict, ensure_ascii=False) + '\n'
204-
205-
# 逐行读取并转换为 JSON
206-
for row in csv_reader:
207-
# 跳过空行
200+
# Process all data rows in a single loop
201+
for row in data_rows:
202+
# Skip empty rows
208203
if not row or all(not cell.strip() for cell in row):
209204
continue
210205

211-
# 将行数据与标题组合成字典
212-
row_dict = {}
213-
for i, header in enumerate(headers):
214-
# 如果当前行的列数少于标题数,用空字符串填充
215-
if i < len(row):
216-
row_dict[header] = row[i].strip() if row[i] else ""
217-
else:
218-
row_dict[header] = ""
206+
# Combine row data with headers into a dictionary, handling rows with fewer columns
207+
row_dict = {
208+
header: (row[i].strip() if row[i] else "") if i < len(row) else ""
209+
for i, header in enumerate(headers)
210+
}
219211

220-
# 转换为 JSON 字符串并 yield
212+
# Yield the JSON string
221213
yield json.dumps(row_dict, ensure_ascii=False) + '\n'
222214

223215
except UnicodeDecodeError as e:

0 commit comments

Comments
 (0)