Skip to content

Commit dc211d2

Browse files
authored
ENH: Add basic support for data layer metadata IO (#237)
1 parent c63f7cb commit dc211d2

File tree

7 files changed

+153
-2
lines changed

7 files changed

+153
-2
lines changed

CHANGES.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@
2020
- Add access to low-level pyarrow `RecordBatchReader` via
2121
`pyogrio.raw.open_arrow`, which allows iterating over batches of Arrow
2222
tables (#205).
23+
- Add support for writing dataset and layer metadata (where supported by
24+
driver) to `write` and `write_dataframe`, and add support for reading
25+
dataset and layer metadata in `read_info` (#237).
2326

2427
### Packaging
2528

pyogrio/_io.pyx

Lines changed: 60 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,13 +116,17 @@ cdef char** dict_to_options(object values):
116116
Parameters
117117
----------
118118
values: dict
119+
all keys and values must be strings
119120
120121
Returns
121122
-------
122123
char**
123124
"""
124125
cdef char **options = NULL
125126

127+
if values is None:
128+
return NULL
129+
126130
for k, v in values.items():
127131
k = k.encode('UTF-8')
128132
v = v.encode('UTF-8')
@@ -305,6 +309,54 @@ cdef get_driver(OGRDataSourceH ogr_dataset):
305309
return driver
306310

307311

312+
cdef set_metadata(GDALMajorObjectH obj, object metadata):
313+
"""Set metadata on a dataset or layer
314+
315+
Parameters
316+
----------
317+
obj : pointer to dataset or layer
318+
metadata : dict, optional (default None)
319+
keys and values must be strings
320+
"""
321+
322+
cdef char **metadata_items = NULL
323+
cdef int err = 0
324+
325+
metadata_items = dict_to_options(metadata)
326+
if metadata_items != NULL:
327+
# only default namepace is currently supported
328+
err = GDALSetMetadata(obj, metadata_items, NULL)
329+
330+
CSLDestroy(metadata_items)
331+
metadata_items = NULL
332+
333+
if err:
334+
raise RuntimeError("Could not set metadata") from None
335+
336+
cdef get_metadata(GDALMajorObjectH obj):
337+
"""Get metadata for a dataset or layer
338+
339+
Parameters
340+
----------
341+
obj : pointer to dataset or layer
342+
343+
Returns
344+
-------
345+
dict or None
346+
metadata as key, value pairs
347+
"""
348+
# only default namespace is currently supported
349+
cdef char **metadata = GDALGetMetadata(obj, NULL)
350+
351+
if metadata != NULL:
352+
return dict(
353+
metadata[i].decode('UTF-8').split('=', 1)
354+
for i in range(CSLCount(metadata))
355+
)
356+
357+
return None
358+
359+
308360
cdef detect_encoding(OGRDataSourceH ogr_dataset, OGRLayerH ogr_layer):
309361
"""Attempt to detect the encoding of the layer.
310362
If it supports UTF-8, use that.
@@ -1274,7 +1326,9 @@ def ogr_read_info(
12741326
"random_read": OGR_L_TestCapability(ogr_layer, OLCRandomRead),
12751327
"fast_set_next_by_index": OGR_L_TestCapability(ogr_layer, OLCFastSetNextByIndex),
12761328
"fast_spatial_filter": OGR_L_TestCapability(ogr_layer, OLCFastSpatialFilter),
1277-
}
1329+
},
1330+
'layer_metadata': get_metadata(ogr_layer),
1331+
'dataset_metadata': get_metadata(ogr_dataset),
12781332
}
12791333

12801334
finally:
@@ -1419,7 +1473,7 @@ def ogr_write(
14191473
str path, str layer, str driver, geometry, fields, field_data, field_mask,
14201474
str crs, str geometry_type, str encoding, object dataset_kwargs,
14211475
object layer_kwargs, bint promote_to_multi=False, bint nan_as_null=True,
1422-
bint append=False
1476+
bint append=False, dataset_metadata=None, layer_metadata=None
14231477
):
14241478
cdef const char *path_c = NULL
14251479
cdef const char *layer_c = NULL
@@ -1564,6 +1618,10 @@ def ogr_write(
15641618
else:
15651619
ogr_layer = exc_wrap_pointer(get_ogr_layer(ogr_dataset, layer))
15661620

1621+
# Set dataset and layer metadata
1622+
set_metadata(ogr_dataset, dataset_metadata)
1623+
set_metadata(ogr_layer, layer_metadata)
1624+
15671625
except Exception as exc:
15681626
OGRReleaseDataSource(ogr_dataset)
15691627
ogr_dataset = NULL

pyogrio/_ogr.pxd

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ cdef extern from "cpl_string.h":
3838
char** CSLSetNameValue(char **list, const char *name, const char *value)
3939
void CSLDestroy(char **list)
4040
char** CSLAddString(char **list, const char *string)
41+
int CSLCount(char **list)
4142

4243

4344
cdef extern from "cpl_vsi.h" nogil:
@@ -372,7 +373,9 @@ cdef extern from "gdal.h":
372373
OGRErr GDALDatasetStartTransaction(GDALDatasetH ds, int bForce)
373374
OGRErr GDALDatasetCommitTransaction(GDALDatasetH ds)
374375
OGRErr GDALDatasetRollbackTransaction(GDALDatasetH ds)
376+
char** GDALGetMetadata(GDALMajorObjectH obj, const char *pszDomain)
375377
const char* GDALGetMetadataItem(GDALMajorObjectH obj, const char *pszName, const char *pszDomain)
378+
OGRErr GDALSetMetadata(GDALMajorObjectH obj, char **metadata, const char *pszDomain)
376379
const char* GDALVersionInfo(const char *pszRequest)
377380

378381

pyogrio/core.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,8 @@ def read_info(path_or_buffer, /, layer=None, encoding=None, **kwargs):
180180
"geometry": "<geometry type>",
181181
"features": <feature count>,
182182
"driver": "<driver>",
183+
"dataset_metadata" "<dict of dataset metadata or None>"
184+
"layer_metadata" "<dict of layer metadata or None>"
183185
}
184186
"""
185187
path, buffer = get_vsi_path(path_or_buffer)

pyogrio/geopandas.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,9 @@ def write_dataframe(
204204
promote_to_multi=None,
205205
nan_as_null=True,
206206
append=False,
207+
dataset_metadata=None,
208+
layer_metadata=None,
209+
metadata=None,
207210
dataset_options=None,
208211
layer_options=None,
209212
**kwargs,
@@ -261,6 +264,16 @@ def write_dataframe(
261264
driver supports appending to an existing data source, will cause the
262265
data to be appended to the existing records in the data source.
263266
NOTE: append support is limited to specific drivers and GDAL versions.
267+
dataset_metadata : dict, optional (default: None)
268+
Metadata to be stored at the dataset level in the output file; limited
269+
to drivers that support writing metadata, such as GPKG, and silently
270+
ignored otherwise. Keys and values must be strings.
271+
layer_metadata : dict, optional (default: None)
272+
Metadata to be stored at the layer level in the output file; limited to
273+
drivers that support writing metadata, such as GPKG, and silently
274+
ignored otherwise. Keys and values must be strings.
275+
metadata : dict, optional (default: None)
276+
alias of layer_metadata
264277
dataset_options : dict, optional
265278
Dataset creation option (format specific) passed to OGR. Specify as
266279
a key-value dictionary.
@@ -409,6 +422,9 @@ def write_dataframe(
409422
promote_to_multi=promote_to_multi,
410423
nan_as_null=nan_as_null,
411424
append=append,
425+
dataset_metadata=dataset_metadata,
426+
layer_metadata=layer_metadata,
427+
metadata=metadata,
412428
dataset_options=dataset_options,
413429
layer_options=layer_options,
414430
**kwargs,

pyogrio/raw.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,9 @@ def write(
380380
promote_to_multi=None,
381381
nan_as_null=True,
382382
append=False,
383+
dataset_metadata=None,
384+
layer_metadata=None,
385+
metadata=None,
383386
dataset_options=None,
384387
layer_options=None,
385388
**kwargs,
@@ -403,6 +406,21 @@ def write(
403406
"append to FlatGeobuf is not supported for GDAL <= 3.5.0 due to segfault"
404407
)
405408

409+
if metadata is not None:
410+
if layer_metadata is not None:
411+
raise ValueError("Cannot pass both metadata and layer_metadata")
412+
layer_metadata = metadata
413+
414+
# validate metadata types
415+
for metadata in [dataset_metadata, layer_metadata]:
416+
if metadata is not None:
417+
for k, v in metadata.items():
418+
if not isinstance(k, str):
419+
raise ValueError(f"metadata key {k} must be a string")
420+
421+
if not isinstance(v, str):
422+
raise ValueError(f"metadata value {v} must be a string")
423+
406424
if promote_to_multi is None:
407425
promote_to_multi = (
408426
geometry_type.startswith("Multi")
@@ -449,6 +467,8 @@ def write(
449467
promote_to_multi=promote_to_multi,
450468
nan_as_null=nan_as_null,
451469
append=append,
470+
dataset_metadata=dataset_metadata,
471+
layer_metadata=layer_metadata,
452472
dataset_kwargs=dataset_kwargs,
453473
layer_kwargs=layer_kwargs,
454474
)

pyogrio/tests/test_geopandas_io.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1059,3 +1059,52 @@ def test_write_nullable_dtypes(tmp_path):
10591059
expected["col4"] = expected["col4"].astype("float64")
10601060
expected["col5"] = expected["col5"].astype(object)
10611061
assert_geodataframe_equal(output_gdf, expected)
1062+
1063+
1064+
@pytest.mark.parametrize(
1065+
"metadata_type", ["dataset_metadata", "layer_metadata", "metadata"]
1066+
)
1067+
def test_metadata_io(tmpdir, naturalearth_lowres, metadata_type):
1068+
metadata = {"level": metadata_type}
1069+
1070+
df = read_dataframe(naturalearth_lowres)
1071+
1072+
filename = os.path.join(str(tmpdir), "test.gpkg")
1073+
write_dataframe(df, filename, **{metadata_type: metadata})
1074+
1075+
metadata_key = "layer_metadata" if metadata_type == "metadata" else metadata_type
1076+
1077+
assert read_info(filename)[metadata_key] == metadata
1078+
1079+
1080+
@pytest.mark.parametrize("metadata_type", ["dataset_metadata", "layer_metadata"])
1081+
@pytest.mark.parametrize(
1082+
"metadata",
1083+
[
1084+
{1: 2},
1085+
{"key": None},
1086+
{"key": 1},
1087+
],
1088+
)
1089+
def test_invalid_metadata(tmpdir, naturalearth_lowres, metadata_type, metadata):
1090+
with pytest.raises(ValueError, match="must be a string"):
1091+
filename = os.path.join(str(tmpdir), "test.gpkg")
1092+
write_dataframe(
1093+
read_dataframe(naturalearth_lowres), filename, **{metadata_type: metadata}
1094+
)
1095+
1096+
1097+
@pytest.mark.parametrize("metadata_type", ["dataset_metadata", "layer_metadata"])
1098+
def test_metadata_unsupported(tmpdir, naturalearth_lowres, metadata_type):
1099+
"""metadata is silently ignored"""
1100+
1101+
filename = os.path.join(str(tmpdir), "test.geojson")
1102+
write_dataframe(
1103+
read_dataframe(naturalearth_lowres),
1104+
filename,
1105+
**{metadata_type: {"key": "value"}},
1106+
)
1107+
1108+
metadata_key = "layer_metadata" if metadata_type == "metadata" else metadata_type
1109+
1110+
assert read_info(filename)[metadata_key] is None

0 commit comments

Comments
 (0)