1
+ import importlib .util
1
2
import json
2
3
import time
3
4
import weakref
8
9
from dataclasses import dataclass
9
10
from numbers import Real
10
11
from typing import (
12
+ TYPE_CHECKING ,
11
13
Any ,
12
14
Dict ,
13
15
Iterator ,
30
32
from .query_condition import QueryCondition
31
33
from .subarray import Subarray
32
34
33
- current_timer : ContextVar [str ] = ContextVar ("timer_scope" )
34
-
35
- try :
35
+ if TYPE_CHECKING :
36
+ # We don't want to import these eagerly since importing Pandas in particular
37
+ # can add around half a second of import time even if we never use it.
38
+ import pandas
36
39
import pyarrow
37
- from pyarrow import Table
38
- except ImportError :
39
- pyarrow = Table = None
40
40
41
- try :
42
- import pandas as pd
43
- from pandas import DataFrame
44
- except ImportError :
45
- DataFrame = None
41
+
42
+ current_timer : ContextVar [str ] = ContextVar ("timer_scope" )
46
43
47
44
48
45
# sentinel value to denote selecting an empty range
@@ -373,8 +370,12 @@ def __init__(
373
370
# we need to use a Query in order to get coords for a dense array
374
371
if not query :
375
372
query = QueryProxy (array , coords = True )
376
- if use_arrow is None :
377
- use_arrow = pyarrow is not None
373
+ use_arrow = (
374
+ bool (importlib .util .find_spec ("pyarrow" ))
375
+ if use_arrow is None
376
+ else use_arrow
377
+ )
378
+
378
379
# TODO: currently there is lack of support for Arrow list types. This prevents
379
380
# multi-value attributes, asides from strings, from being queried properly.
380
381
# Until list attributes are supported in core, error with a clear message.
@@ -390,12 +391,15 @@ def __init__(
390
391
)
391
392
super ().__init__ (array , query , use_arrow , preload_metadata = True )
392
393
393
- def _run_query (self ) -> Union [DataFrame , Table ]:
394
+ def _run_query (self ) -> Union ["pandas.DataFrame" , "pyarrow.Table" ]:
395
+ import pandas
396
+ import pyarrow
397
+
394
398
if self .pyquery is not None :
395
399
self .pyquery .submit ()
396
400
397
401
if self .pyquery is None :
398
- df = DataFrame (self ._empty_results )
402
+ df = pandas . DataFrame (self ._empty_results )
399
403
elif self .use_arrow :
400
404
with timing ("buffer_conversion_time" ):
401
405
table = self .pyquery ._buffers_to_pa_table ()
@@ -417,14 +421,14 @@ def _run_query(self) -> Union[DataFrame, Table]:
417
421
# converting all integers with NULLs to float64:
418
422
# https://arrow.apache.org/docs/python/pandas.html#arrow-pandas-conversion
419
423
extended_dtype_mapping = {
420
- pyarrow .int8 (): pd .Int8Dtype (),
421
- pyarrow .int16 (): pd .Int16Dtype (),
422
- pyarrow .int32 (): pd .Int32Dtype (),
423
- pyarrow .int64 (): pd .Int64Dtype (),
424
- pyarrow .uint8 (): pd .UInt8Dtype (),
425
- pyarrow .uint16 (): pd .UInt16Dtype (),
426
- pyarrow .uint32 (): pd .UInt32Dtype (),
427
- pyarrow .uint64 (): pd .UInt64Dtype (),
424
+ pyarrow .int8 (): pandas .Int8Dtype (),
425
+ pyarrow .int16 (): pandas .Int16Dtype (),
426
+ pyarrow .int32 (): pandas .Int32Dtype (),
427
+ pyarrow .int64 (): pandas .Int64Dtype (),
428
+ pyarrow .uint8 (): pandas .UInt8Dtype (),
429
+ pyarrow .uint16 (): pandas .UInt16Dtype (),
430
+ pyarrow .uint32 (): pandas .UInt32Dtype (),
431
+ pyarrow .uint64 (): pandas .UInt64Dtype (),
428
432
}
429
433
dtype = extended_dtype_mapping [pa_attr .type ]
430
434
else :
@@ -463,7 +467,7 @@ def _run_query(self) -> Union[DataFrame, Table]:
463
467
464
468
df = table .to_pandas ()
465
469
else :
466
- df = DataFrame (_get_pyquery_results (self .pyquery , self .array .schema ))
470
+ df = pandas . DataFrame (_get_pyquery_results (self .pyquery , self .array .schema ))
467
471
468
472
with timing ("pandas_index_update_time" ):
469
473
return _update_df_from_meta (df , self .array .meta , self .query .index_col )
@@ -663,8 +667,10 @@ def _get_empty_results(
663
667
664
668
665
669
def _update_df_from_meta (
666
- df : DataFrame , array_meta : Metadata , index_col : Union [List [str ], bool , None ] = True
667
- ) -> DataFrame :
670
+ df : "pandas.DataFrame" ,
671
+ array_meta : Metadata ,
672
+ index_col : Union [List [str ], bool , None ] = True ,
673
+ ) -> "pandas.DataFrame" :
668
674
col_dtypes = {}
669
675
if "__pandas_attribute_repr" in array_meta :
670
676
attr_dtypes = json .loads (array_meta ["__pandas_attribute_repr" ])
0 commit comments