15
15
from decimal import Decimal
16
16
17
17
import numpy as np
18
- import pandas as pd
18
+
19
+ try :
20
+ import pandas as pd
21
+ except ImportError :
22
+ pd = None
19
23
20
24
try :
21
25
import polars as pl
@@ -170,6 +174,9 @@ def _arrow_to_pandas(arrow_table):
170
174
See https://arrow.apache.org/docs/python/pandas.html#reducing-memory-use-in-table-to-pandas
171
175
for details.
172
176
"""
177
+ if pd is None :
178
+ msg = "pandas is not installed. Try pip install pandas."
179
+ raise ValueError (msg )
173
180
return arrow_table .to_pandas (split_blocks = True , self_destruct = True )
174
181
175
182
@@ -238,10 +245,10 @@ def _arrow_to_numpy(arrow_table, schema=None):
238
245
239
246
for fname in schema :
240
247
dtype = get_numpy_type (schema [fname ])
248
+ container [fname ] = arrow_table [fname ].to_numpy ()
241
249
if dtype == np .str_ :
242
- container [fname ] = arrow_table [fname ].to_pandas ().to_numpy (dtype = dtype )
243
- else :
244
- container [fname ] = arrow_table [fname ].to_numpy ()
250
+ container [fname ] = container [fname ].astype (np .str_ )
251
+
245
252
return container
246
253
247
254
@@ -427,7 +434,7 @@ def _tabular_generator(tabular, *, exclude_none=False):
427
434
yield {k : v for k , v in row .items () if v is not None }
428
435
else :
429
436
yield row
430
- elif isinstance (tabular , pd .DataFrame ):
437
+ elif pd is not None and isinstance (tabular , pd .DataFrame ):
431
438
for row in tabular .to_dict ("records" ):
432
439
if exclude_none :
433
440
yield {k : v for k , v in row .items () if not np .isnan (v )}
@@ -498,7 +505,7 @@ def write(collection, tabular, *, exclude_none: bool = False):
498
505
cols = [tabular .column (i ).cast (new_types [i ]) for i in range (tabular .num_columns )]
499
506
tabular = Table .from_arrays (cols , names = tabular .column_names )
500
507
_validate_schema (tabular .schema .types )
501
- elif isinstance (tabular , pd .DataFrame ):
508
+ elif pd is not None and isinstance (tabular , pd .DataFrame ):
502
509
_validate_schema (ArrowSchema .from_pandas (tabular ).types )
503
510
elif pl is not None and isinstance (tabular , pl .DataFrame ):
504
511
tabular = tabular .to_arrow () # zero-copy in most cases and done in tabular_gen anyway
@@ -523,7 +530,10 @@ def write(collection, tabular, *, exclude_none: bool = False):
523
530
524
531
# Add handling for special case types.
525
532
codec_options = collection .codec_options
526
- type_registry = TypeRegistry ([_PandasNACodec (), _DecimalCodec ()])
533
+ if pd is not None :
534
+ type_registry = TypeRegistry ([_PandasNACodec (), _DecimalCodec ()])
535
+ else :
536
+ type_registry = TypeRegistry ([_DecimalCodec ()])
527
537
codec_options = codec_options .with_options (type_registry = type_registry )
528
538
529
539
while cur_offset < tab_size :
0 commit comments