@@ -56,6 +56,17 @@ def test_read_arrow(naturalearth_lowres_all_ext):
5656 assert_geodataframe_equal (result , expected , check_less_precise = check_less_precise )
5757
5858
59+ @pytest .mark .parametrize ("columns" , [None , [], ["continent" ], ["iso_a3" , "pop_est" ]])
60+ def test_read_arrow_columns (naturalearth_lowres , columns ):
61+ meta , _table = read_arrow (naturalearth_lowres , columns = columns )
62+ assert meta ["fields" ] is not None
63+ if columns is None :
64+ expected_fields = ["pop_est" , "continent" , "name" , "iso_a3" , "gdp_md_est" ]
65+ else :
66+ expected_fields = columns
67+ assert sorted (meta ["fields" ]) == sorted (expected_fields )
68+
69+
5970def test_read_arrow_unspecified_layer_warning (data_dir ):
6071 """Reading a multi-layer file without specifying a layer gives a warning."""
6172 with pytest .warns (UserWarning , match = "More than one layer found " ):
@@ -107,7 +118,7 @@ def test_read_arrow_skip_features_max_features(
107118 assert len (table ) == expected
108119
109120
110- def test_read_arrow_fid (naturalearth_lowres_all_ext ):
121+ def test_read_df_arrow_fid (naturalearth_lowres_all_ext ):
111122 kwargs = {"use_arrow" : True , "where" : "fid >= 2 AND fid <= 3" }
112123
113124 df = read_dataframe (naturalearth_lowres_all_ext , fid_as_index = False , ** kwargs )
@@ -117,12 +128,12 @@ def test_read_arrow_fid(naturalearth_lowres_all_ext):
117128 assert_index_equal (df .index , pd .Index ([2 , 3 ], name = "fid" ))
118129
119130
120- def test_read_arrow_columns (naturalearth_lowres ):
131+ def test_read_df_arrow_columns (naturalearth_lowres ):
121132 result = read_dataframe (naturalearth_lowres , use_arrow = True , columns = ["continent" ])
122133 assert result .columns .tolist () == ["continent" , "geometry" ]
123134
124135
125- def test_read_arrow_ignore_geometry (naturalearth_lowres ):
136+ def test_read_df_arrow_ignore_geometry (naturalearth_lowres ):
126137 result = read_dataframe (naturalearth_lowres , use_arrow = True , read_geometry = False )
127138 assert type (result ) is pd .DataFrame
128139
@@ -132,7 +143,7 @@ def test_read_arrow_ignore_geometry(naturalearth_lowres):
132143 assert_frame_equal (result , expected )
133144
134145
135- def test_read_arrow_to_pandas_kwargs (no_geometry_file ):
146+ def test_read_df_arrow_to_pandas_kwargs (no_geometry_file ):
136147 # with arrow, list types are supported
137148 arrow_to_pandas_kwargs = {"strings_to_categorical" : True }
138149 df = read_dataframe (
@@ -216,6 +227,30 @@ def test_open_arrow_batch_size(naturalearth_lowres):
216227 assert len (tables [0 ]) == batch_size , "First table should match the batch size"
217228
218229
230+ @pytest .mark .parametrize (
231+ "descr, columns, exp_columns" ,
232+ [
233+ ("all" , None , ["pop_est" , "continent" , "name" , "iso_a3" , "gdp_md_est" ]),
234+ ("case_sensitive" , ["NAME" ], []),
235+ ("repeats_dropped" , ["continent" , "continent" , "name" ], ["continent" , "name" ]),
236+ ("keep_original_order" , ["continent" , "pop_est" ], ["pop_est" , "continent" ]),
237+ ],
238+ )
239+ def test_open_arrow_columns (naturalearth_lowres , descr , columns , exp_columns ):
240+ with open_arrow (naturalearth_lowres , columns = columns ) as (meta , reader ):
241+ assert isinstance (meta , dict )
242+ assert isinstance (reader , pyogrio ._io ._ArrowStream )
243+
244+ result = pyarrow .table (reader )
245+
246+ # Check metadata
247+ assert np .array_equal (meta ["fields" ], exp_columns ), f"Failed for { descr } "
248+
249+ # Check columns in table
250+ exp_columns_with_geom = exp_columns + ["wkb_geometry" ]
251+ assert result .column_names == exp_columns_with_geom , f"Failed for { descr } "
252+
253+
219254@pytest .mark .skipif (
220255 __gdal_version__ >= (3 , 8 , 0 ),
221256 reason = "skip_features supported by Arrow stream API for GDAL>=3.8.0" ,
0 commit comments