Skip to content

Commit 4435762

Browse files
Merge remote-tracking branch 'upstream/main' into ci-enable-doctests
2 parents 0c93864 + a067fff commit 4435762

File tree

7 files changed

+55
-17
lines changed

7 files changed

+55
-17
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ details, see the commit logs at https://github.com/pandas-dev/pandas.
115115
## Dependencies
116116
- [NumPy - Adds support for large, multi-dimensional arrays, matrices and high-level mathematical functions to operate on these arrays](https://www.numpy.org)
117117
- [python-dateutil - Provides powerful extensions to the standard datetime module](https://dateutil.readthedocs.io/en/stable/index.html)
118-
- [pytz - Brings the Olson tz database into Python which allows accurate and cross platform timezone calculations](https://github.com/stub42/pytz)
118+
- [tzdata - Provides an IANA time zone database](https://tzdata.readthedocs.io/en/latest/)
119119

120120
See the [full installation instructions](https://pandas.pydata.org/pandas-docs/stable/install.html#dependencies) for minimum supported versions of required, recommended and optional dependencies.
121121

doc/source/user_guide/migration-3-strings.rst

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -118,12 +118,17 @@ through the ``str`` accessor will work the same:
118118
Overview of behavior differences and how to address them
119119
---------------------------------------------------------
120120

121-
The dtype is no longer object dtype
122-
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
121+
The dtype is no longer a numpy "object" dtype
122+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
123123

124124
When inferring or reading string data, the data type of the resulting DataFrame
125125
column or Series will silently start being the new ``"str"`` dtype instead of
126-
``"object"`` dtype, and this can have some impact on your code.
126+
the numpy ``"object"`` dtype, and this can have some impact on your code.
127+
128+
The new string dtype is a pandas data type ("extension dtype"), and no longer a
129+
numpy ``np.dtype`` instance. Therefore, passing the dtype of a string column to
130+
numpy functions will no longer work (e.g. passing it to a ``dtype=`` argument
131+
of a numpy function, or using ``np.issubdtype`` to check the dtype).
127132

128133
Checking the dtype
129134
^^^^^^^^^^^^^^^^^^

doc/source/whatsnew/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Version 2.3
2424
.. toctree::
2525
:maxdepth: 2
2626

27+
v2.3.2
2728
v2.3.1
2829
v2.3.0
2930

doc/source/whatsnew/v2.3.2.rst

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
.. _whatsnew_232:
2+
3+
What's new in 2.3.2 (August XX, 2025)
4+
-------------------------------------
5+
6+
These are the changes in pandas 2.3.2. See :ref:`release` for a full changelog
7+
including other versions of pandas.
8+
9+
{{ header }}
10+
11+
.. ---------------------------------------------------------------------------
12+
.. _whatsnew_232.string_fixes:
13+
14+
Improvements and fixes for the StringDtype
15+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
16+
17+
Most changes in this release are related to :class:`StringDtype` which will
18+
become the default string dtype in pandas 3.0. See
19+
:ref:`whatsnew_230.upcoming_changes` for more details.
20+
21+
.. _whatsnew_232.string_fixes.bugs:
22+
23+
Bug fixes
24+
^^^^^^^^^
25+
- Fix :meth:`~DataFrame.to_json` with ``orient="table"`` to correctly use the
26+
"string" type in the JSON Table Schema for :class:`StringDtype` columns
27+
(:issue:`61889`)
28+
29+
30+
.. ---------------------------------------------------------------------------
31+
.. _whatsnew_232.contributors:
32+
33+
Contributors
34+
~~~~~~~~~~~~

pandas/io/json/_table_schema.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,6 @@ def as_json_table_type(x: DtypeObj) -> str:
9090
return "datetime"
9191
elif lib.is_np_dtype(x, "m"):
9292
return "duration"
93-
elif isinstance(x, ExtensionDtype):
94-
return "any"
9593
elif is_string_dtype(x):
9694
return "string"
9795
else:
@@ -197,7 +195,7 @@ def convert_json_field_to_pandas_type(field) -> str | CategoricalDtype:
197195
"""
198196
typ = field["type"]
199197
if typ == "string":
200-
return "object"
198+
return field.get("extDtype", None)
201199
elif typ == "integer":
202200
return field.get("extDtype", "int64")
203201
elif typ == "number":

pandas/tests/io/json/test_json_table_schema.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def test_build_table_schema(self, df_schema, using_infer_string):
7070
"primaryKey": ["idx"],
7171
}
7272
if using_infer_string:
73-
expected["fields"][2] = {"name": "B", "type": "any", "extDtype": "str"}
73+
expected["fields"][2] = {"name": "B", "type": "string", "extDtype": "str"}
7474
assert result == expected
7575
result = build_table_schema(df_schema)
7676
assert "pandas_version" in result
@@ -120,10 +120,10 @@ def test_multiindex(self, df_schema, using_infer_string):
120120
if using_infer_string:
121121
expected["fields"][0] = {
122122
"name": "level_0",
123-
"type": "any",
123+
"type": "string",
124124
"extDtype": "str",
125125
}
126-
expected["fields"][3] = {"name": "B", "type": "any", "extDtype": "str"}
126+
expected["fields"][3] = {"name": "B", "type": "string", "extDtype": "str"}
127127
assert result == expected
128128

129129
df.index.names = ["idx0", None]
@@ -303,7 +303,7 @@ def test_to_json(self, df_table, using_infer_string):
303303
]
304304

305305
if using_infer_string:
306-
fields[2] = {"name": "B", "type": "any", "extDtype": "str"}
306+
fields[2] = {"name": "B", "type": "string", "extDtype": "str"}
307307

308308
schema = {"fields": fields, "primaryKey": ["idx"]}
309309
data = [
@@ -547,7 +547,7 @@ def test_convert_pandas_type_to_json_field_categorical(self, kind, ordered):
547547
},
548548
CategoricalDtype(categories=["a", "b", "c"], ordered=True),
549549
),
550-
({"type": "string"}, "object"),
550+
({"type": "string"}, None),
551551
],
552552
)
553553
def test_convert_json_field_to_pandas_type(self, inp, exp):

pandas/tests/io/json/test_json_table_schema_ext_dtype.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def test_build_table_schema(self):
5050
{"name": "index", "type": "integer"},
5151
{"name": "A", "type": "any", "extDtype": "DateDtype"},
5252
{"name": "B", "type": "number", "extDtype": "decimal"},
53-
{"name": "C", "type": "any", "extDtype": "string"},
53+
{"name": "C", "type": "string", "extDtype": "string"},
5454
{"name": "D", "type": "integer", "extDtype": "Int64"},
5555
],
5656
"primaryKey": ["index"],
@@ -80,10 +80,10 @@ def test_as_json_table_type_ext_decimal_dtype(self):
8080
@pytest.mark.parametrize("box", [lambda x: x, Series])
8181
def test_as_json_table_type_ext_string_array_dtype(self, box):
8282
string_data = box(array(["pandas"], dtype="string"))
83-
assert as_json_table_type(string_data.dtype) == "any"
83+
assert as_json_table_type(string_data.dtype) == "string"
8484

8585
def test_as_json_table_type_ext_string_dtype(self):
86-
assert as_json_table_type(StringDtype()) == "any"
86+
assert as_json_table_type(StringDtype()) == "string"
8787

8888
@pytest.mark.parametrize("box", [lambda x: x, Series])
8989
def test_as_json_table_type_ext_integer_array_dtype(self, box):
@@ -176,7 +176,7 @@ def test_build_string_series(self, sa):
176176

177177
fields = [
178178
{"name": "id", "type": "integer"},
179-
{"name": "a", "type": "any", "extDtype": "string"},
179+
{"name": "a", "type": "string", "extDtype": "string"},
180180
]
181181

182182
schema = {"fields": fields, "primaryKey": ["id"]}
@@ -235,7 +235,7 @@ def test_to_json(self, da, dc, sa, ia):
235235
OrderedDict({"name": "idx", "type": "integer"}),
236236
OrderedDict({"name": "A", "type": "any", "extDtype": "DateDtype"}),
237237
OrderedDict({"name": "B", "type": "number", "extDtype": "decimal"}),
238-
OrderedDict({"name": "C", "type": "any", "extDtype": "string"}),
238+
OrderedDict({"name": "C", "type": "string", "extDtype": "string"}),
239239
OrderedDict({"name": "D", "type": "integer", "extDtype": "Int64"}),
240240
]
241241

0 commit comments

Comments
 (0)