diff --git a/CHANGES.md b/CHANGES.md index ee4a01f5..4087e3cb 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -11,6 +11,7 @@ - Add listing of GDAL data types and subtypes to `read_info` (#556). - Add support to read list fields without arrow (#558). +- Unlock the gil during GDAL functions that can take significant time (#572). ### Bug fixes diff --git a/pyogrio/_geometry.pyx b/pyogrio/_geometry.pyx index 87b7ea5a..1fd9a508 100644 --- a/pyogrio/_geometry.pyx +++ b/pyogrio/_geometry.pyx @@ -84,7 +84,9 @@ cdef str get_geometry_type(void *ogr_layer): cdef OGRwkbGeometryType ogr_type try: - ogr_featuredef = check_pointer(OGR_L_GetLayerDefn(ogr_layer)) + with nogil: + ogr_featuredef = OGR_L_GetLayerDefn(ogr_layer) + ogr_featuredef = check_pointer(ogr_featuredef) except NullPointerError: raise DataLayerError("Could not get layer definition") diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index a4aed843..46a2ac67 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -15,7 +15,6 @@ from pathlib import Path from libc.stdint cimport uint8_t, uintptr_t from libc.stdlib cimport malloc, free -from libc.string cimport strlen from libc.math cimport isnan from cpython.pycapsule cimport PyCapsule_GetPointer @@ -226,6 +225,7 @@ cdef void* ogr_open(const char* path_c, int mode, char** options) except NULL: """ cdef void *ogr_dataset = NULL cdef ErrorHandler errors + cdef int flags # Force linear approximations in all cases OGRSetNonLinearGeometriesEnabledFlag(0) @@ -240,9 +240,10 @@ cdef void* ogr_open(const char* path_c, int mode, char** options) except NULL: # WARNING: GDAL logs warnings about invalid open options to stderr # instead of raising an error with capture_errors() as errors: - ogr_dataset = GDALOpenEx( - path_c, flags, NULL, options, NULL - ) + with nogil: + ogr_dataset = GDALOpenEx( + path_c, flags, NULL, options, NULL + ) return errors.check_pointer(ogr_dataset, True) except NullPointerError: @@ -295,15 +296,23 @@ cdef OGRLayerH get_ogr_layer(GDALDatasetH ogr_dataset, layer) except NULL: pointer to OGR layer """ cdef OGRLayerH ogr_layer = NULL + cdef char *layer_c + cdef int layer_int + cdef char *sql_c try: if isinstance(layer, str): - name_b = layer.encode("utf-8") - name_c = name_b - ogr_layer = check_pointer(GDALDatasetGetLayerByName(ogr_dataset, name_c)) + layer_b = layer.encode("utf-8") + layer_c = layer_b + with nogil: + ogr_layer = GDALDatasetGetLayerByName(ogr_dataset, layer_c) + ogr_layer = check_pointer(ogr_layer) elif isinstance(layer, int): - ogr_layer = check_pointer(GDALDatasetGetLayer(ogr_dataset, layer)) + layer_int = layer + with nogil: + ogr_layer = GDALDatasetGetLayer(ogr_dataset, layer_int) + ogr_layer = check_pointer(ogr_layer) else: raise ValueError( f"'layer' parameter must be a str or int, got {type(layer)}" @@ -325,7 +334,8 @@ cdef OGRLayerH get_ogr_layer(GDALDatasetH ogr_dataset, layer) except NULL: sql_b = f"SET interest_layers = {layer_name}".encode("utf-8") sql_c = sql_b - GDALDatasetExecuteSQL(ogr_dataset, sql_c, NULL, NULL) + with nogil: + GDALDatasetExecuteSQL(ogr_dataset, sql_c, NULL, NULL) return ogr_layer @@ -347,18 +357,23 @@ cdef OGRLayerH execute_sql( ------- pointer to OGR layer """ + cdef char * sql_c + cdef char * sql_dialect_c + cdef OGRLayerH retval = NULL try: sql_b = sql.encode("utf-8") sql_c = sql_b if sql_dialect is None: - return check_pointer(GDALDatasetExecuteSQL(ogr_dataset, sql_c, NULL, NULL)) + with nogil: + retval = GDALDatasetExecuteSQL(ogr_dataset, sql_c, NULL, NULL) + return check_pointer(retval) sql_dialect_b = sql_dialect.encode("utf-8") sql_dialect_c = sql_dialect_b - return check_pointer(GDALDatasetExecuteSQL( - ogr_dataset, sql_c, NULL, sql_dialect_c) - ) + with nogil: + retval = GDALDatasetExecuteSQL(ogr_dataset, sql_c, NULL, sql_dialect_c) + return check_pointer(retval) # GDAL does not always raise exception messages in this case except NullPointerError: @@ -386,7 +401,9 @@ cdef str get_crs(OGRLayerH ogr_layer): cdef char *ogr_wkt = NULL try: - ogr_crs = check_pointer(OGR_L_GetSpatialRef(ogr_layer)) + with nogil: + ogr_crs = OGR_L_GetSpatialRef(ogr_layer) + ogr_crs = check_pointer(ogr_crs) except NullPointerError: # No coordinate system defined. @@ -433,7 +450,9 @@ cdef get_driver(OGRDataSourceH ogr_dataset): cdef void *ogr_driver try: - ogr_driver = check_pointer(GDALGetDatasetDriver(ogr_dataset)) + with nogil: + ogr_driver = GDALGetDatasetDriver(ogr_dataset) + ogr_driver = check_pointer(ogr_driver) except NullPointerError: raise DataLayerError(f"Could not detect driver of dataset") from None @@ -464,19 +483,25 @@ cdef get_feature_count(OGRLayerH ogr_layer, int force): """ cdef OGRFeatureH ogr_feature = NULL - cdef int feature_count = OGR_L_GetFeatureCount(ogr_layer, force) + cdef int feature_count + + with nogil: + feature_count = OGR_L_GetFeatureCount(ogr_layer, force) # if GDAL refuses to give us the feature count, we have to loop over all # features ourselves and get the count. This can happen for some drivers # (e.g., OSM) or if a where clause is invalid but not rejected as error if force and feature_count == -1: # make sure layer is read from beginning - OGR_L_ResetReading(ogr_layer) + with nogil: + OGR_L_ResetReading(ogr_layer) feature_count = 0 while True: try: - ogr_feature = check_pointer(OGR_L_GetNextFeature(ogr_layer)) + with nogil: + ogr_feature = OGR_L_GetNextFeature(ogr_layer) + ogr_feature = check_pointer(ogr_feature) feature_count +=1 except NullPointerError: @@ -519,10 +544,11 @@ cdef get_total_bounds(OGRLayerH ogr_layer, int force): tuple of (xmin, ymin, xmax, ymax) or None The total bounds of the layer, or None if they could not be determined. """ - cdef OGREnvelope ogr_envelope - if OGR_L_GetExtent(ogr_layer, &ogr_envelope, force) == OGRERR_NONE: + with nogil: + err = OGR_L_GetExtent(ogr_layer, &ogr_envelope, force) + if err == OGRERR_NONE: bounds = ( ogr_envelope.MinX, ogr_envelope.MinY, ogr_envelope.MaxX, ogr_envelope.MaxY ) @@ -677,7 +703,9 @@ cdef get_fields(OGRLayerH ogr_layer, str encoding, use_arrow=False): cdef const char *key_c try: - ogr_featuredef = check_pointer(OGR_L_GetLayerDefn(ogr_layer)) + with nogil: + ogr_featuredef = OGR_L_GetLayerDefn(ogr_layer) + ogr_featuredef = check_pointer(ogr_featuredef) except NullPointerError: raise DataLayerError("Could not get layer definition") from None @@ -751,10 +779,14 @@ cdef apply_where_filter(OGRLayerH ogr_layer, str where): ------ ValueError: if SQL query is not valid """ + cdef const char* where_c + cdef int err where_b = where.encode("utf-8") where_c = where_b - err = OGR_L_SetAttributeFilter(ogr_layer, where_c) + with nogil: + err = OGR_L_SetAttributeFilter(ogr_layer, where_c) + # WARNING: GDAL does not raise this error for GPKG but instead only # logs to stderr if err != OGRERR_NONE: @@ -781,12 +813,14 @@ cdef apply_bbox_filter(OGRLayerH ogr_layer, bbox): ValueError: if bbox is not a list or tuple or does not have proper number of items """ + cdef double xmin, ymin, xmax, ymax if not (isinstance(bbox, (tuple, list)) and len(bbox) == 4): raise ValueError(f"Invalid bbox: {bbox}") xmin, ymin, xmax, ymax = bbox - OGR_L_SetSpatialFilterRect(ogr_layer, xmin, ymin, xmax, ymax) + with nogil: + OGR_L_SetSpatialFilterRect(ogr_layer, xmin, ymin, xmax, ymax) cdef apply_geometry_filter(OGRLayerH ogr_layer, wkb): @@ -807,7 +841,8 @@ cdef apply_geometry_filter(OGRLayerH ogr_layer, wkb): OGR_G_DestroyGeometry(ogr_geometry) raise GeometryError("Could not create mask geometry") from None - OGR_L_SetSpatialFilter(ogr_layer, ogr_geometry) + with nogil: + OGR_L_SetSpatialFilter(ogr_layer, ogr_geometry) OGR_G_DestroyGeometry(ogr_geometry) @@ -819,7 +854,9 @@ cdef apply_skip_features(OGRLayerH ogr_layer, int skip_features): ogr_layer : pointer to open OGR layer wskip_features : int """ - err = OGR_L_SetNextByIndex(ogr_layer, skip_features) + with nogil: + err = OGR_L_SetNextByIndex(ogr_layer, skip_features) + # GDAL can raise an error (depending on the format) for out-of-bound index, # but `validate_feature_range()` should ensure we only pass a valid number if err != OGRERR_NONE: @@ -1076,14 +1113,14 @@ cdef get_features( uint8_t return_fids, bint datetime_as_string ): - cdef OGRFeatureH ogr_feature = NULL cdef int n_fields cdef int i cdef int field_index # make sure layer is read from beginning - OGR_L_ResetReading(ogr_layer) + with nogil: + OGR_L_ResetReading(ogr_layer) if skip_features > 0: apply_skip_features(ogr_layer, skip_features) @@ -1128,7 +1165,9 @@ cdef get_features( break try: - ogr_feature = check_pointer(OGR_L_GetNextFeature(ogr_layer)) + with nogil: + ogr_feature = OGR_L_GetNextFeature(ogr_layer) + ogr_feature = check_pointer(ogr_feature) except NullPointerError: # No more rows available, so stop reading @@ -1196,7 +1235,8 @@ cdef get_features_by_fid( cdef int count = len(fids) # make sure layer is read from beginning - OGR_L_ResetReading(ogr_layer) + with nogil: + OGR_L_ResetReading(ogr_layer) if read_geometry: geometries = np.empty(shape=(count, ), dtype="object") @@ -1226,7 +1266,9 @@ cdef get_features_by_fid( fid = fids[i] try: - ogr_feature = check_pointer(OGR_L_GetFeature(ogr_layer, fid)) + with nogil: + ogr_feature = OGR_L_GetFeature(ogr_layer, fid) + ogr_feature = check_pointer(ogr_feature) except NullPointerError: raise FeatureError(f"Could not read feature with fid {fid}") from None @@ -1258,7 +1300,8 @@ cdef get_bounds(OGRLayerH ogr_layer, int skip_features, int num_features): cdef int i # make sure layer is read from beginning - OGR_L_ResetReading(ogr_layer) + with nogil: + OGR_L_ResetReading(ogr_layer) if skip_features > 0: apply_skip_features(ogr_layer, skip_features) @@ -1276,7 +1319,9 @@ cdef get_bounds(OGRLayerH ogr_layer, int skip_features, int num_features): break try: - ogr_feature = check_pointer(OGR_L_GetNextFeature(ogr_layer)) + with nogil: + ogr_feature = OGR_L_GetNextFeature(ogr_layer) + ogr_feature = check_pointer(ogr_feature) except NullPointerError: # No more rows available, so stop reading @@ -1457,7 +1502,8 @@ def ogr_read( field_c = field_b fields_c = CSLAddString(fields_c, field_c) - OGR_L_SetIgnoredFields(ogr_layer, fields_c) + with nogil: + OGR_L_SetIgnoredFields(ogr_layer, fields_c) geometry_type = get_geometry_type(ogr_layer) @@ -1802,7 +1848,8 @@ def ogr_open_arrow( field_c = field_b fields_c = CSLAddString(fields_c, field_c) - OGR_L_SetIgnoredFields(ogr_layer, fields_c) + with nogil: + OGR_L_SetIgnoredFields(ogr_layer, fields_c) if not return_fids: options = CSLSetNameValue(options, "INCLUDE_FID", "NO") @@ -1823,12 +1870,15 @@ def ogr_open_arrow( ) # make sure layer is read from beginning - OGR_L_ResetReading(ogr_layer) + with nogil: + OGR_L_ResetReading(ogr_layer) # allocate the stream struct and wrap in capsule to ensure clean-up on error capsule = alloc_c_stream(&stream) - if not OGR_L_GetArrowStream(ogr_layer, stream, options): + with nogil: + reval = OGR_L_GetArrowStream(ogr_layer, stream, options) + if not reval: raise RuntimeError("Failed to open ArrowArrayStream from Layer") if skip_features > 0: @@ -1836,7 +1886,8 @@ def ogr_open_arrow( # the Arrow stream # use `OGR_L_SetNextByIndex` directly and not `apply_skip_features` # to ignore errors in case skip_features == feature_count - OGR_L_SetNextByIndex(ogr_layer, skip_features) + with nogil: + OGR_L_SetNextByIndex(ogr_layer, skip_features) if use_pyarrow: import pyarrow as pa @@ -2145,12 +2196,14 @@ cdef get_layer_names(OGRDataSourceH ogr_dataset): """ cdef OGRLayerH ogr_layer = NULL - layer_count = GDALDatasetGetLayerCount(ogr_dataset) + with nogil: + layer_count = GDALDatasetGetLayerCount(ogr_dataset) data = np.empty(shape=(layer_count, 2), dtype=object) data_view = data[:] for i in range(layer_count): - ogr_layer = GDALDatasetGetLayer(ogr_dataset, i) + with nogil: + ogr_layer = GDALDatasetGetLayer(ogr_dataset, i) data_view[i, 0] = get_string(OGR_L_GetName(ogr_layer)) data_view[i, 1] = get_geometry_type(ogr_layer) diff --git a/pyogrio/_ogr.pxd b/pyogrio/_ogr.pxd index 681190ca..fb4737cc 100644 --- a/pyogrio/_ogr.pxd +++ b/pyogrio/_ogr.pxd @@ -387,25 +387,27 @@ cdef extern from "ogr_api.h": const char* OGR_L_GetGeometryColumn(OGRLayerH layer) OGRErr OGR_L_GetExtent(OGRLayerH layer, OGREnvelope *psExtent, - int bForce) + int bForce) nogil - OGRSpatialReferenceH OGR_L_GetSpatialRef(OGRLayerH layer) + OGRSpatialReferenceH OGR_L_GetSpatialRef(OGRLayerH layer) nogil int OGR_L_TestCapability(OGRLayerH layer, const char *name) - OGRFeatureDefnH OGR_L_GetLayerDefn(OGRLayerH layer) - OGRFeatureH OGR_L_GetNextFeature(OGRLayerH layer) - OGRFeatureH OGR_L_GetFeature(OGRLayerH layer, int nFeatureId) - void OGR_L_ResetReading(OGRLayerH layer) + OGRFeatureDefnH OGR_L_GetLayerDefn(OGRLayerH layer) nogil + OGRFeatureH OGR_L_GetNextFeature(OGRLayerH layer) nogil + OGRFeatureH OGR_L_GetFeature(OGRLayerH layer, int nFeatureId) nogil + void OGR_L_ResetReading(OGRLayerH layer) nogil OGRErr OGR_L_SetAttributeFilter(OGRLayerH hLayer, - const char* pszQuery) - OGRErr OGR_L_SetNextByIndex(OGRLayerH layer, int nIndex) - int OGR_L_GetFeatureCount(OGRLayerH layer, int m) + const char* pszQuery) nogil + OGRErr OGR_L_SetNextByIndex(OGRLayerH layer, int nIndex) nogil + int OGR_L_GetFeatureCount(OGRLayerH layer, int m) nogil void OGR_L_SetSpatialFilterRect(OGRLayerH layer, double xmin, double ymin, double xmax, - double ymax) - void OGR_L_SetSpatialFilter(OGRLayerH layer, OGRGeometryH geometry) - OGRErr OGR_L_SetIgnoredFields(OGRLayerH layer, const char** fields) + double ymax) nogil + void OGR_L_SetSpatialFilter(OGRLayerH layer, + OGRGeometryH geometry) nogil + OGRErr OGR_L_SetIgnoredFields(OGRLayerH layer, + const char** fields) nogil void OGRSetNonLinearGeometriesEnabledFlag(int bFlag) int OGRGetNonLinearGeometriesEnabledFlag() @@ -424,7 +426,7 @@ IF CTE_GDAL_VERSION >= (3, 6, 0): cdef extern from "ogr_api.h": bint OGR_L_GetArrowStream( OGRLayerH hLayer, ArrowArrayStream *out_stream, char** papszOptions - ) + ) nogil IF CTE_GDAL_VERSION >= (3, 8, 0): @@ -483,21 +485,21 @@ cdef extern from "gdal.h": int GDALDatasetDeleteLayer(GDALDatasetH hDS, int iLayer) - GDALDriverH GDALGetDatasetDriver(GDALDatasetH ds) + GDALDriverH GDALGetDatasetDriver(GDALDatasetH ds) nogil GDALDriverH GDALGetDriverByName(const char * pszName) GDALDatasetH GDALOpenEx(const char * pszFilename, unsigned int nOpenFlags, const char *const *papszAllowedDrivers, const char *const *papszOpenOptions, - const char *const *papszSiblingFiles) + const char *const *papszSiblingFiles) nogil - int GDALDatasetGetLayerCount(GDALDatasetH ds) - OGRLayerH GDALDatasetGetLayer(GDALDatasetH ds, int iLayer) - OGRLayerH GDALDatasetGetLayerByName(GDALDatasetH ds, char * pszName) + int GDALDatasetGetLayerCount(GDALDatasetH ds) nogil + OGRLayerH GDALDatasetGetLayer(GDALDatasetH ds, int iLayer) nogil + OGRLayerH GDALDatasetGetLayerByName(GDALDatasetH ds, char * pszName) nogil OGRLayerH GDALDatasetExecuteSQL(GDALDatasetH ds, const char* pszStatement, OGRGeometryH hSpatialFilter, - const char* pszDialect) + const char* pszDialect) nogil void GDALDatasetReleaseResultSet(GDALDatasetH, OGRLayerH) OGRErr GDALDatasetStartTransaction(GDALDatasetH ds, int bForce) OGRErr GDALDatasetCommitTransaction(GDALDatasetH ds)