File tree Expand file tree Collapse file tree 5 files changed +38
-18
lines changed Expand file tree Collapse file tree 5 files changed +38
-18
lines changed Original file line number Diff line number Diff line change @@ -140,12 +140,16 @@ def __init__(
140140 # infer defaults
141141 if storage is None :
142142 if na_value is not libmissing .NA :
143- if HAS_PYARROW :
144- storage = "pyarrow"
145- else :
146- storage = "python"
143+ storage = get_option ("mode.string_storage" )
144+ if storage == "auto" :
145+ if HAS_PYARROW :
146+ storage = "pyarrow"
147+ else :
148+ storage = "python"
147149 else :
148150 storage = get_option ("mode.string_storage" )
151+ if storage == "auto" :
152+ storage = "python"
149153
150154 if storage == "pyarrow_numpy" :
151155 # TODO raise a deprecation warning
Original file line number Diff line number Diff line change @@ -452,13 +452,12 @@ def is_terminal() -> bool:
452452
453453string_storage_doc = """
454454: string
455- The default storage for StringDtype. This option is ignored if
456- ``future.infer_string`` is set to True.
455+ The default storage for StringDtype.
457456"""
458457
459458
460459def is_valid_string_storage (value : Any ) -> None :
461- legal_values = ["python" , "pyarrow" ]
460+ legal_values = ["auto" , " python" , "pyarrow" ]
462461 if value not in legal_values :
463462 msg = "Value must be one of python|pyarrow"
464463 if value == "pyarrow_numpy" :
@@ -473,7 +472,7 @@ def is_valid_string_storage(value: Any) -> None:
473472with cf .config_prefix ("mode" ):
474473 cf .register_option (
475474 "string_storage" ,
476- "python " ,
475+ "auto " ,
477476 string_storage_doc ,
478477 # validator=is_one_of_factory(["python", "pyarrow"]),
479478 validator = is_valid_string_storage ,
Original file line number Diff line number Diff line change 44import numpy as np
55import pytest
66
7- from pandas .compat import HAS_PYARROW
87import pandas .util ._test_decorators as td
98
109import pandas as pd
@@ -27,11 +26,10 @@ def test_eq_all_na():
2726 tm .assert_extension_array_equal (result , expected )
2827
2928
30- def test_config (string_storage , request , using_infer_string ):
31- if using_infer_string and string_storage == "python" and HAS_PYARROW :
32- # string storage with na_value=NaN always uses pyarrow if available
33- # -> does not yet honor the option
34- request .applymarker (pytest .mark .xfail (reason = "TODO(infer_string)" ))
29+ def test_config (string_storage , using_infer_string ):
30+ # with the default string_storage setting
31+ # always "python" at the moment
32+ assert StringDtype ().storage == "python"
3533
3634 with pd .option_context ("string_storage" , string_storage ):
3735 assert StringDtype ().storage == string_storage
Original file line number Diff line number Diff line change 33import numpy as np
44import pytest
55
6+ from pandas .compat import HAS_PYARROW
67import pandas .util ._test_decorators as td
78
89from pandas .core .dtypes .astype import astype_array
@@ -802,13 +803,17 @@ def test_pandas_dtype_ea_not_instance():
802803
803804
804805def test_pandas_dtype_string_dtypes (string_storage ):
805- # TODO(infer_string) remove skip if "python" is supported
806- pytest .importorskip ("pyarrow" )
806+ with pd .option_context ("future.infer_string" , True ):
807+ # with the default string_storage setting
808+ result = pandas_dtype ("str" )
809+ assert result == pd .StringDtype (
810+ "pyarrow" if HAS_PYARROW else "python" , na_value = np .nan
811+ )
812+
807813 with pd .option_context ("future.infer_string" , True ):
808814 with pd .option_context ("string_storage" , string_storage ):
809815 result = pandas_dtype ("str" )
810- # TODO(infer_string) hardcoded to pyarrow until python is supported
811- assert result == pd .StringDtype ("pyarrow" , na_value = np .nan )
816+ assert result == pd .StringDtype (string_storage , na_value = np .nan )
812817
813818 with pd .option_context ("future.infer_string" , False ):
814819 with pd .option_context ("string_storage" , string_storage ):
Original file line number Diff line number Diff line change @@ -360,6 +360,13 @@ Deltalake python package lets you access tables stored in
360360JVM. It provides the `` delta_table.to_pyarrow_table().to_pandas() `` method to convert
361361any Delta table into Pandas dataframe.
362362
363+ ### [ pandas-gbq] ( https://github.com/googleapis/python-bigquery-pandas )
364+
365+ pandas-gbq provides high performance reads and writes to and from
366+ [ Google BigQuery] ( https://cloud.google.com/bigquery/ ) . Previously (before version 2.2.0),
367+ these methods were exposed as ` pandas.read_gbq ` and ` DataFrame.to_gbq ` .
368+ Use ` pandas_gbq.read_gbq ` and ` pandas_gbq.to_gbq ` , instead.
369+
363370## Out-of-core
364371
365372### [ Bodo] ( https://bodo.ai/ )
@@ -513,6 +520,13 @@ Arrays](https://awkward-array.org/) inside pandas' Series and
513520DataFrame. It also provides an accessor for using awkward functions
514521on Series that are of awkward type.
515522
523+ ### [ db-dtypes] ( https://github.com/googleapis/python-db-dtypes-pandas )
524+
525+ db-dtypes provides an extension types for working with types like
526+ DATE, TIME, and JSON from database systems. This package is used
527+ by pandas-gbq to provide natural dtypes for BigQuery data types without
528+ a natural numpy type.
529+
516530### [ Pandas-Genomics] ( https://pandas-genomics.readthedocs.io/en/latest/ )
517531
518532Pandas-Genomics provides an extension type and extension array for working
You can’t perform that action at this time.
0 commit comments