update tests

jorisvandenbossche · jorisvandenbossche · commit d01326f58c2f · 2025-08-15T14:03:26.000+02:00
diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py
@@ -736,9 +736,7 @@ def test_interval(self):
 
     def test_categorical_extension_array_nullable(self, nulls_fixture):
         # GH:
-        arr = pd.arrays.StringArray._from_sequence(
-            [nulls_fixture] * 2, dtype=pd.StringDtype()
-        )
+        arr = pd.array([nulls_fixture] * 2, dtype=pd.StringDtype())
         result = Categorical(arr)
         assert arr.dtype == result.categories.dtype
         expected = Categorical(Series([pd.NA, pd.NA], dtype=arr.dtype))
diff --git a/pandas/tests/copy_view/test_array.py b/pandas/tests/copy_view/test_array.py
@@ -128,7 +128,7 @@ def test_dataframe_array_ea_dtypes():
 
 
 def test_dataframe_array_string_dtype():
-    df = DataFrame({"a": ["a", "b"]}, dtype="string")
+    df = DataFrame({"a": ["a", "b"]}, dtype="string[python]")
     arr = np.asarray(df)
     assert np.shares_memory(arr, get_array(df, "a"))
     assert arr.flags.writeable is False
diff --git a/pandas/tests/copy_view/test_astype.py b/pandas/tests/copy_view/test_astype.py
@@ -83,7 +83,7 @@ def test_astype_numpy_to_ea():
 
 
 @pytest.mark.parametrize(
-    "dtype, new_dtype", [("object", "string"), ("string", "object")]
+    "dtype, new_dtype", [("object", "string[python]"), ("string[python]", "object")]
 )
 def test_astype_string_and_object(dtype, new_dtype):
     df = DataFrame({"a": ["a", "b", "c"]}, dtype=dtype)
@@ -96,7 +96,7 @@ def test_astype_string_and_object(dtype, new_dtype):
 
 
 @pytest.mark.parametrize(
-    "dtype, new_dtype", [("object", "string"), ("string", "object")]
+    "dtype, new_dtype", [("object", "string[python]"), ("string[python]", "object")]
 )
 def test_astype_string_and_object_update_original(dtype, new_dtype):
     df = DataFrame({"a": ["a", "b", "c"]}, dtype=dtype)
diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py
@@ -117,7 +117,7 @@ def test_period_dtype(self, dtype):
     "float": np.dtype(np.float64),
     "object": np.dtype(object),
     "category": com.pandas_dtype("category"),
-    "string": pd.StringDtype(),
+    "string": pd.StringDtype("python"),
 }
 
 
diff --git a/pandas/tests/frame/methods/test_convert_dtypes.py b/pandas/tests/frame/methods/test_convert_dtypes.py
@@ -199,7 +199,7 @@ def test_convert_dtypes_avoid_block_splitting(self):
             {
                 "a": [1, 2, 3],
                 "b": [4, 5, 6],
-                "c": pd.Series(["a"] * 3, dtype="string[python]"),
+                "c": pd.Series(["a"] * 3, dtype="string"),
             }
         )
         tm.assert_frame_equal(result, expected)
@@ -209,7 +209,7 @@ def test_convert_dtypes_from_arrow(self):
         # GH#56581
         df = pd.DataFrame([["a", datetime.time(18, 12)]], columns=["a", "b"])
         result = df.convert_dtypes()
-        expected = df.astype({"a": "string[python]"})
+        expected = df.astype({"a": "string"})
         tm.assert_frame_equal(result, expected)
 
     def test_convert_dtype_pyarrow_timezone_preserve(self):
diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
@@ -657,6 +657,10 @@ def test_dtype_backend(self, read_ext, dtype_backend, engine, tmp_excel):
                     for col in df.columns
                 }
             )
+
+            # pandas uses large_string by default, but pyarrow infers string
+            expected["d"] = expected["d"].astype(pd.ArrowDtype(pa.string()))
+            expected["h"] = expected["h"].astype(pd.ArrowDtype(pa.string()))
             # pyarrow by default infers timestamp resolution as us, not ns
             expected["i"] = ArrowExtensionArray(
                 expected["i"].array._pa_array.cast(pa.timestamp(unit="us"))
diff --git a/pandas/tests/io/test_orc.py b/pandas/tests/io/test_orc.py
@@ -12,7 +12,6 @@
 import pandas as pd
 from pandas import read_orc
 import pandas._testing as tm
-from pandas.core.arrays import StringArray
 
 pytest.importorskip("pyarrow.orc")
 
@@ -368,13 +367,9 @@ def test_orc_dtype_backend_numpy_nullable():
 
     expected = pd.DataFrame(
         {
-            "string": StringArray(np.array(["a", "b", "c"], dtype=np.object_)),
-            "string_with_nan": StringArray(
-                np.array(["a", pd.NA, "c"], dtype=np.object_)
-            ),
-            "string_with_none": StringArray(
-                np.array(["a", pd.NA, "c"], dtype=np.object_)
-            ),
+            "string": pd.array(np.array(["a", "b", "c"], dtype=np.object_)),
+            "string_with_nan": pd.array(np.array(["a", pd.NA, "c"], dtype=np.object_)),
+            "string_with_none": pd.array(np.array(["a", pd.NA, "c"], dtype=np.object_)),
             "int": pd.Series([1, 2, 3], dtype="Int64"),
             "int_with_nan": pd.Series([1, pd.NA, 3], dtype="Int64"),
             "na_only": pd.Series([pd.NA, pd.NA, pd.NA], dtype="Int64"),
diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
@@ -2137,7 +2137,9 @@ def test_series_string_inference_storage_definition(self):
         # but after PDEP-14 (string dtype), it was decided to keep dtype="string"
         # returning the NA string dtype, so expected is changed from
         # "string[pyarrow_numpy]" to "string[python]"
-        expected = Series(["a", "b"], dtype="string[python]")
+        expected = Series(
+            ["a", "b"], dtype="string[pyarrow]" if HAS_PYARROW else "string[python]"
+        )
         with pd.option_context("future.infer_string", True):
             result = Series(["a", "b"], dtype="string")
         tm.assert_series_equal(result, expected)

Original file line number	Diff line number	Diff line change
`@@ -83,7 +83,7 @@ def test_astype_numpy_to_ea():`
`83`	`83`
`84`	`84`
`85`	`85`	`@pytest.mark.parametrize(`
`86`		`- "dtype, new_dtype", [("object", "string"), ("string", "object")]`
	`86`	`+ "dtype, new_dtype", [("object", "string[python]"), ("string[python]", "object")]`
`87`	`87`	`)`
`88`	`88`	`def test_astype_string_and_object(dtype, new_dtype):`
`89`	`89`	`df = DataFrame({"a": ["a", "b", "c"]}, dtype=dtype)`
`@@ -96,7 +96,7 @@ def test_astype_string_and_object(dtype, new_dtype):`
`96`	`96`
`97`	`97`
`98`	`98`	`@pytest.mark.parametrize(`
`99`		`- "dtype, new_dtype", [("object", "string"), ("string", "object")]`
	`99`	`+ "dtype, new_dtype", [("object", "string[python]"), ("string[python]", "object")]`
`100`	`100`	`)`
`101`	`101`	`def test_astype_string_and_object_update_original(dtype, new_dtype):`
`102`	`102`	`df = DataFrame({"a": ["a", "b", "c"]}, dtype=dtype)`
Original file line number	Diff line number	Diff line change
`@@ -117,7 +117,7 @@ def test_period_dtype(self, dtype):`
`117`	`117`	`"float": np.dtype(np.float64),`
`118`	`118`	`"object": np.dtype(object),`
`119`	`119`	`"category": com.pandas_dtype("category"),`
`120`		`- "string": pd.StringDtype(),`
	`120`	`+ "string": pd.StringDtype("python"),`
`121`	`121`	`}`
`122`	`122`
`123`	`123`
Original file line number	Diff line number	Diff line change
`@@ -199,7 +199,7 @@ def test_convert_dtypes_avoid_block_splitting(self):`
`199`	`199`	`{`
`200`	`200`	`"a": [1, 2, 3],`
`201`	`201`	`"b": [4, 5, 6],`
`202`		`- "c": pd.Series(["a"] * 3, dtype="string[python]"),`
	`202`	`+ "c": pd.Series(["a"] * 3, dtype="string"),`
`203`	`203`	`}`
`204`	`204`	`)`
`205`	`205`	`tm.assert_frame_equal(result, expected)`
`@@ -209,7 +209,7 @@ def test_convert_dtypes_from_arrow(self):`
`209`	`209`	`# GH#56581`
`210`	`210`	`df = pd.DataFrame([["a", datetime.time(18, 12)]], columns=["a", "b"])`
`211`	`211`	`result = df.convert_dtypes()`
`212`		`- expected = df.astype({"a": "string[python]"})`
	`212`	`+ expected = df.astype({"a": "string"})`
`213`	`213`	`tm.assert_frame_equal(result, expected)`
`214`	`214`
`215`	`215`	`def test_convert_dtype_pyarrow_timezone_preserve(self):`
Original file line number	Diff line number	Diff line change
`@@ -657,6 +657,10 @@ def test_dtype_backend(self, read_ext, dtype_backend, engine, tmp_excel):`
`657`	`657`	`for col in df.columns`
`658`	`658`	`}`
`659`	`659`	`)`
	`660`	`+`
	`661`	`+ # pandas uses large_string by default, but pyarrow infers string`
	`662`	`+ expected["d"] = expected["d"].astype(pd.ArrowDtype(pa.string()))`
	`663`	`+ expected["h"] = expected["h"].astype(pd.ArrowDtype(pa.string()))`
`660`	`664`	`# pyarrow by default infers timestamp resolution as us, not ns`
`661`	`665`	`expected["i"] = ArrowExtensionArray(`
`662`	`666`	`expected["i"].array._pa_array.cast(pa.timestamp(unit="us"))`