6
6
from pyogrio .util import get_vsi_path
7
7
8
8
with GDALEnv ():
9
- from pyogrio ._io import ogr_read , ogr_read_arrow , ogr_write
9
+ from pyogrio ._io import ogr_open_arrow , ogr_read , ogr_write
10
10
from pyogrio ._ogr import (
11
11
get_gdal_version ,
12
12
get_gdal_version_string ,
@@ -179,7 +179,100 @@ def read_arrow(
179
179
"""
180
180
Read OGR data source into a pyarrow Table.
181
181
182
- See docstring of `read` for details.
182
+ See docstring of `read` for parameters.
183
+
184
+ Returns
185
+ -------
186
+ (dict, pyarrow.Table)
187
+
188
+ Returns a tuple of meta information about the data source in a dict,
189
+ and a pyarrow Table with data.
190
+
191
+ Meta is: {
192
+ "crs": "<crs>",
193
+ "fields": <ndarray of field names>,
194
+ "encoding": "<encoding>",
195
+ "geometry_type": "<geometry_type>",
196
+ "geometry_name": "<name of geometry column in arrow table>",
197
+ }
198
+ """
199
+ with open_arrow (
200
+ path_or_buffer ,
201
+ layer = layer ,
202
+ encoding = encoding ,
203
+ columns = columns ,
204
+ read_geometry = read_geometry ,
205
+ force_2d = force_2d ,
206
+ skip_features = skip_features ,
207
+ max_features = max_features ,
208
+ where = where ,
209
+ bbox = bbox ,
210
+ fids = fids ,
211
+ sql = sql ,
212
+ sql_dialect = sql_dialect ,
213
+ return_fids = return_fids ,
214
+ ** kwargs ,
215
+ ) as source :
216
+ meta , reader = source
217
+ table = reader .read_all ()
218
+
219
+ return meta , table
220
+
221
+
222
+ def open_arrow (
223
+ path_or_buffer ,
224
+ / ,
225
+ layer = None ,
226
+ encoding = None ,
227
+ columns = None ,
228
+ read_geometry = True ,
229
+ force_2d = False ,
230
+ skip_features = 0 ,
231
+ max_features = None ,
232
+ where = None ,
233
+ bbox = None ,
234
+ fids = None ,
235
+ sql = None ,
236
+ sql_dialect = None ,
237
+ return_fids = False ,
238
+ batch_size = 65_536 ,
239
+ ** kwargs ,
240
+ ):
241
+ """
242
+ Open OGR data source as a stream of pyarrow record batches.
243
+
244
+ See docstring of `read` for parameters.
245
+
246
+ The RecordBatchStreamReader is reading from a stream provided by OGR and must not be
247
+ accessed after the OGR dataset has been closed, i.e. after the context manager has
248
+ been closed.
249
+
250
+ Examples
251
+ --------
252
+
253
+ >>> from pyogrio.raw import open_arrow
254
+ >>> import pyarrow as pa
255
+ >>> import shapely
256
+ >>>
257
+ >>> with open_arrow(path) as source:
258
+ >>> meta, reader = source
259
+ >>> for table in reader:
260
+ >>> geometries = shapely.from_wkb(table[meta["geometry_name"]])
261
+
262
+ Returns
263
+ -------
264
+ (dict, pyarrow.RecordBatchStreamReader)
265
+
266
+ Returns a tuple of meta information about the data source in a dict,
267
+ and a pyarrow RecordBatchStreamReader with data.
268
+
269
+ Meta is: {
270
+ "crs": "<crs>",
271
+ "fields": <ndarray of field names>,
272
+ "encoding": "<encoding>",
273
+ "geometry_type": "<geometry_type>",
274
+ "geometry_name": "<name of geometry column in arrow table>",
275
+ }
183
276
"""
184
277
try :
185
278
import pyarrow # noqa
@@ -191,7 +284,7 @@ def read_arrow(
191
284
dataset_kwargs = _preprocess_options_key_value (kwargs ) if kwargs else {}
192
285
193
286
try :
194
- result = ogr_read_arrow (
287
+ return ogr_open_arrow (
195
288
path ,
196
289
layer = layer ,
197
290
encoding = encoding ,
@@ -207,13 +300,12 @@ def read_arrow(
207
300
sql_dialect = sql_dialect ,
208
301
return_fids = return_fids ,
209
302
dataset_kwargs = dataset_kwargs ,
303
+ batch_size = batch_size ,
210
304
)
211
305
finally :
212
306
if buffer is not None :
213
307
remove_virtual_file (path )
214
308
215
- return result
216
-
217
309
218
310
def detect_driver (path ):
219
311
# try to infer driver from path
0 commit comments