|
17 | 17 | pa_version_under13p0,
|
18 | 18 | pa_version_under15p0,
|
19 | 19 | pa_version_under17p0,
|
| 20 | + pa_version_under18p0, |
20 | 21 | )
|
21 | 22 |
|
22 | 23 | import pandas as pd
|
@@ -1144,6 +1145,26 @@ def test_infer_string_large_string_type(self, tmp_path, pa):
|
1144 | 1145 | )
|
1145 | 1146 | tm.assert_frame_equal(result, expected)
|
1146 | 1147 |
|
| 1148 | + @pytest.mark.skipif(pa_version_under18p0, reason="not supported before 18.0") |
| 1149 | + def test_infer_string_string_view_type(self, tmp_path, pa): |
| 1150 | + # GH#54798 |
| 1151 | + import pyarrow as pa |
| 1152 | + import pyarrow.parquet as pq |
| 1153 | + |
| 1154 | + path = tmp_path / "string_view.parquet" |
| 1155 | + |
| 1156 | + table = pa.table({"a": pa.array([None, "b", "c"], pa.string_view())}) |
| 1157 | + pq.write_table(table, path) |
| 1158 | + |
| 1159 | + with pd.option_context("future.infer_string", True): |
| 1160 | + result = read_parquet(path) |
| 1161 | + expected = pd.DataFrame( |
| 1162 | + data={"a": [None, "b", "c"]}, |
| 1163 | + dtype=pd.StringDtype(na_value=np.nan), |
| 1164 | + columns=pd.Index(["a"], dtype=pd.StringDtype(na_value=np.nan)), |
| 1165 | + ) |
| 1166 | + tm.assert_frame_equal(result, expected) |
| 1167 | + |
1147 | 1168 | # NOTE: this test is not run by default, because it requires a lot of memory (>5GB)
|
1148 | 1169 | # @pytest.mark.slow
|
1149 | 1170 | # def test_string_column_above_2GB(self, tmp_path, pa):
|
|
0 commit comments