@@ -140,20 +140,29 @@ def handle_warning(invalid_row) -> str:
140
140
"encoding" : self .encoding ,
141
141
}
142
142
143
- def _finalize_pandas_output (self , frame : DataFrame ) -> DataFrame :
144
- """
145
- Processes data read in based on kwargs.
143
+ def _get_convert_options (self ):
144
+ pyarrow_csv = import_optional_dependency ("pyarrow.csv" )
146
145
147
- Parameters
148
- ----------
149
- frame: DataFrame
150
- The DataFrame to process.
146
+ try :
147
+ convert_options = pyarrow_csv .ConvertOptions (** self .convert_options )
148
+ except TypeError as err :
149
+ include = self .convert_options .get ("include_columns" , None )
150
+ if include is not None :
151
+ self ._validate_usecols (include )
151
152
152
- Returns
153
- -------
154
- DataFrame
155
- The processed DataFrame.
156
- """
153
+ nulls = self .convert_options .get ("null_values" , set ())
154
+ if not lib .is_list_like (nulls ) or not all (
155
+ isinstance (x , str ) for x in nulls
156
+ ):
157
+ raise TypeError (
158
+ "The 'pyarrow' engine requires all na_values to be strings"
159
+ ) from err
160
+
161
+ raise
162
+
163
+ return convert_options
164
+
165
+ def _adjust_column_names (self , frame : DataFrame ) -> tuple [DataFrame , bool ]:
157
166
num_cols = len (frame .columns )
158
167
multi_index_named = True
159
168
if self .header is None :
@@ -169,8 +178,9 @@ def _finalize_pandas_output(self, frame: DataFrame) -> DataFrame:
169
178
self .names = columns_prefix + self .names
170
179
multi_index_named = False
171
180
frame .columns = self .names
181
+ return frame , multi_index_named
172
182
173
- frame = self . _do_date_conversions ( frame . columns , frame )
183
+ def _finalize_index ( self , frame : DataFrame , multi_index_named : bool ) -> DataFrame :
174
184
if self .index_col is not None :
175
185
index_to_set = self .index_col .copy ()
176
186
for i , item in enumerate (self .index_col ):
@@ -196,6 +206,9 @@ def _finalize_pandas_output(self, frame: DataFrame) -> DataFrame:
196
206
if self .header is None and not multi_index_named :
197
207
frame .index .names = [None ] * len (frame .index .names )
198
208
209
+ return frame
210
+
211
+ def _finalize_dtype (self , frame : DataFrame ) -> DataFrame :
199
212
if self .dtype is not None :
200
213
# Ignore non-existent columns from dtype mapping
201
214
# like other parsers do
@@ -214,6 +227,26 @@ def _finalize_pandas_output(self, frame: DataFrame) -> DataFrame:
214
227
raise ValueError (str (err )) from err
215
228
return frame
216
229
230
+ def _finalize_pandas_output (self , frame : DataFrame ) -> DataFrame :
231
+ """
232
+ Processes data read in based on kwargs.
233
+
234
+ Parameters
235
+ ----------
236
+ frame: DataFrame
237
+ The DataFrame to process.
238
+
239
+ Returns
240
+ -------
241
+ DataFrame
242
+ The processed DataFrame.
243
+ """
244
+ frame , multi_index_named = self ._adjust_column_names (frame )
245
+ frame = self ._do_date_conversions (frame .columns , frame )
246
+ frame = self ._finalize_index (frame , multi_index_named )
247
+ frame = self ._finalize_dtype (frame )
248
+ return frame
249
+
217
250
def _validate_usecols (self , usecols ) -> None :
218
251
if lib .is_list_like (usecols ) and not all (isinstance (x , str ) for x in usecols ):
219
252
raise ValueError (
@@ -239,23 +272,7 @@ def read(self) -> DataFrame:
239
272
pa = import_optional_dependency ("pyarrow" )
240
273
pyarrow_csv = import_optional_dependency ("pyarrow.csv" )
241
274
self ._get_pyarrow_options ()
242
-
243
- try :
244
- convert_options = pyarrow_csv .ConvertOptions (** self .convert_options )
245
- except TypeError as err :
246
- include = self .convert_options .get ("include_columns" , None )
247
- if include is not None :
248
- self ._validate_usecols (include )
249
-
250
- nulls = self .convert_options .get ("null_values" , set ())
251
- if not lib .is_list_like (nulls ) or not all (
252
- isinstance (x , str ) for x in nulls
253
- ):
254
- raise TypeError (
255
- "The 'pyarrow' engine requires all na_values to be strings"
256
- ) from err
257
-
258
- raise
275
+ convert_options = self ._get_convert_options ()
259
276
260
277
try :
261
278
table = pyarrow_csv .read_csv (
0 commit comments