File tree Expand file tree Collapse file tree 5 files changed +38
-18
lines changed Expand file tree Collapse file tree 5 files changed +38
-18
lines changed Original file line number Diff line number Diff line change @@ -140,12 +140,16 @@ def __init__(
140
140
# infer defaults
141
141
if storage is None :
142
142
if na_value is not libmissing .NA :
143
- if HAS_PYARROW :
144
- storage = "pyarrow"
145
- else :
146
- storage = "python"
143
+ storage = get_option ("mode.string_storage" )
144
+ if storage == "auto" :
145
+ if HAS_PYARROW :
146
+ storage = "pyarrow"
147
+ else :
148
+ storage = "python"
147
149
else :
148
150
storage = get_option ("mode.string_storage" )
151
+ if storage == "auto" :
152
+ storage = "python"
149
153
150
154
if storage == "pyarrow_numpy" :
151
155
# TODO raise a deprecation warning
Original file line number Diff line number Diff line change @@ -452,13 +452,12 @@ def is_terminal() -> bool:
452
452
453
453
string_storage_doc = """
454
454
: string
455
- The default storage for StringDtype. This option is ignored if
456
- ``future.infer_string`` is set to True.
455
+ The default storage for StringDtype.
457
456
"""
458
457
459
458
460
459
def is_valid_string_storage (value : Any ) -> None :
461
- legal_values = ["python" , "pyarrow" ]
460
+ legal_values = ["auto" , " python" , "pyarrow" ]
462
461
if value not in legal_values :
463
462
msg = "Value must be one of python|pyarrow"
464
463
if value == "pyarrow_numpy" :
@@ -473,7 +472,7 @@ def is_valid_string_storage(value: Any) -> None:
473
472
with cf .config_prefix ("mode" ):
474
473
cf .register_option (
475
474
"string_storage" ,
476
- "python " ,
475
+ "auto " ,
477
476
string_storage_doc ,
478
477
# validator=is_one_of_factory(["python", "pyarrow"]),
479
478
validator = is_valid_string_storage ,
Original file line number Diff line number Diff line change 4
4
import numpy as np
5
5
import pytest
6
6
7
- from pandas .compat import HAS_PYARROW
8
7
import pandas .util ._test_decorators as td
9
8
10
9
import pandas as pd
@@ -27,11 +26,10 @@ def test_eq_all_na():
27
26
tm .assert_extension_array_equal (result , expected )
28
27
29
28
30
- def test_config (string_storage , request , using_infer_string ):
31
- if using_infer_string and string_storage == "python" and HAS_PYARROW :
32
- # string storage with na_value=NaN always uses pyarrow if available
33
- # -> does not yet honor the option
34
- request .applymarker (pytest .mark .xfail (reason = "TODO(infer_string)" ))
29
+ def test_config (string_storage , using_infer_string ):
30
+ # with the default string_storage setting
31
+ # always "python" at the moment
32
+ assert StringDtype ().storage == "python"
35
33
36
34
with pd .option_context ("string_storage" , string_storage ):
37
35
assert StringDtype ().storage == string_storage
Original file line number Diff line number Diff line change 3
3
import numpy as np
4
4
import pytest
5
5
6
+ from pandas .compat import HAS_PYARROW
6
7
import pandas .util ._test_decorators as td
7
8
8
9
from pandas .core .dtypes .astype import astype_array
@@ -802,13 +803,17 @@ def test_pandas_dtype_ea_not_instance():
802
803
803
804
804
805
def test_pandas_dtype_string_dtypes (string_storage ):
805
- # TODO(infer_string) remove skip if "python" is supported
806
- pytest .importorskip ("pyarrow" )
806
+ with pd .option_context ("future.infer_string" , True ):
807
+ # with the default string_storage setting
808
+ result = pandas_dtype ("str" )
809
+ assert result == pd .StringDtype (
810
+ "pyarrow" if HAS_PYARROW else "python" , na_value = np .nan
811
+ )
812
+
807
813
with pd .option_context ("future.infer_string" , True ):
808
814
with pd .option_context ("string_storage" , string_storage ):
809
815
result = pandas_dtype ("str" )
810
- # TODO(infer_string) hardcoded to pyarrow until python is supported
811
- assert result == pd .StringDtype ("pyarrow" , na_value = np .nan )
816
+ assert result == pd .StringDtype (string_storage , na_value = np .nan )
812
817
813
818
with pd .option_context ("future.infer_string" , False ):
814
819
with pd .option_context ("string_storage" , string_storage ):
Original file line number Diff line number Diff line change @@ -360,6 +360,13 @@ Deltalake python package lets you access tables stored in
360
360
JVM. It provides the `` delta_table.to_pyarrow_table().to_pandas() `` method to convert
361
361
any Delta table into Pandas dataframe.
362
362
363
+ ### [ pandas-gbq] ( https://github.com/googleapis/python-bigquery-pandas )
364
+
365
+ pandas-gbq provides high performance reads and writes to and from
366
+ [ Google BigQuery] ( https://cloud.google.com/bigquery/ ) . Previously (before version 2.2.0),
367
+ these methods were exposed as ` pandas.read_gbq ` and ` DataFrame.to_gbq ` .
368
+ Use ` pandas_gbq.read_gbq ` and ` pandas_gbq.to_gbq ` , instead.
369
+
363
370
## Out-of-core
364
371
365
372
### [ Bodo] ( https://bodo.ai/ )
@@ -513,6 +520,13 @@ Arrays](https://awkward-array.org/) inside pandas' Series and
513
520
DataFrame. It also provides an accessor for using awkward functions
514
521
on Series that are of awkward type.
515
522
523
+ ### [ db-dtypes] ( https://github.com/googleapis/python-db-dtypes-pandas )
524
+
525
+ db-dtypes provides an extension types for working with types like
526
+ DATE, TIME, and JSON from database systems. This package is used
527
+ by pandas-gbq to provide natural dtypes for BigQuery data types without
528
+ a natural numpy type.
529
+
516
530
### [ Pandas-Genomics] ( https://pandas-genomics.readthedocs.io/en/latest/ )
517
531
518
532
Pandas-Genomics provides an extension type and extension array for working
You can’t perform that action at this time.
0 commit comments