@@ -188,36 +188,28 @@ def _load_csv_file(self, path: str) -> Generator[str, None, None]:
188188 raise RuntimeError (f'CSV file "{ path } " is empty' )
189189
190190 if has_header :
191- # 第一行作为列名
191+ # The first row is the header
192192 headers = [str (h ).strip () if h else f'column_{ i } ' for i , h in enumerate (first_row )]
193+ data_rows = csv_reader
193194 else :
194- # 不使用标题行,使用 column_x 格式
195+ # Generate headers and treat the first row as data
196+ from itertools import chain
195197 headers = [f'column_{ i } ' for i in range (len (first_row ))]
196- first_row_data = first_row # 保存第一行数据,稍后处理
198+ data_rows = chain ([ first_row ], csv_reader )
197199
198- # 如果第一行是数据(has_header=False),先处理它
199- if first_row_data is not None :
200- row_dict = {}
201- for i , (header , value ) in enumerate (zip (headers , first_row_data )):
202- row_dict [header ] = value .strip () if value else ""
203- yield json .dumps (row_dict , ensure_ascii = False ) + '\n '
204-
205- # 逐行读取并转换为 JSON
206- for row in csv_reader :
207- # 跳过空行
200+ # Process all data rows in a single loop
201+ for row in data_rows :
202+ # Skip empty rows
208203 if not row or all (not cell .strip () for cell in row ):
209204 continue
210205
211- # 将行数据与标题组合成字典
212- row_dict = {}
213- for i , header in enumerate (headers ):
214- # 如果当前行的列数少于标题数,用空字符串填充
215- if i < len (row ):
216- row_dict [header ] = row [i ].strip () if row [i ] else ""
217- else :
218- row_dict [header ] = ""
206+ # Combine row data with headers into a dictionary, handling rows with fewer columns
207+ row_dict = {
208+ header : (row [i ].strip () if row [i ] else "" ) if i < len (row ) else ""
209+ for i , header in enumerate (headers )
210+ }
219211
220- # 转换为 JSON 字符串并 yield
212+ # Yield the JSON string
221213 yield json .dumps (row_dict , ensure_ascii = False ) + '\n '
222214
223215 except UnicodeDecodeError as e :
0 commit comments