Skip to content

Commit 510c5d4

Browse files
authored
Merge pull request #122 from OpenEnergyPlatform/feature-121-implement-oemv202-conversion
Implement v2.0.3 conversion
2 parents e05937f + e969eea commit 510c5d4

File tree

14 files changed

+542
-203
lines changed

14 files changed

+542
-203
lines changed

README.rst

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,9 @@ of the oemetadata-specification to help users stick with the latest enhancements
8686
To ease the conversion of oemetadata from any outdated version to the latest version, we provide a
8787
conversion functionality. The following example shows how to convert the oemetadata from v1.6 to v2.0.
8888

89+
Starting form v2 we do not support conversions for patch versions. This means you can convert from v1.6 to v2.0 but not from v2.0.0 to v2.0.1.
90+
The oemetadata release procedure requires to only add breaking changes to major or minor version. Only these changes will require a conversion.
91+
8992
CLI - oemetadata conversion::
9093

9194
# Not implemented yet
@@ -112,7 +115,7 @@ Module usage - In python scripts you can use the conversion::
112115
meta = read_json_file(file_path)
113116

114117
# use omi to convert it to the latest release
115-
converted = convert_metadata(meta, "OEMetadata-2.0.1")
118+
converted = convert_metadata(meta, "OEMetadata-2.0")
116119

117120
# now you can store the result as json file
118121
with open("result.json", "w", encoding="utf-8") as json_file:
@@ -129,7 +132,7 @@ two arguments the first one is the metadata and the second optional one is the s
129132
the validation will try to get the matching schema for the current metadata.
130133

131134

132-
CLI - oemetadata conversion::
135+
CLI - oemetadata validation::
133136

134137
# Not implemented yet
135138

poetry.lock

Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/omi/base.py

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,17 @@
44

55
import json
66
import pathlib
7+
import re
78
from dataclasses import dataclass
89

910
import requests
10-
from metadata import v20, v152, v160
11+
from oemetadata.v1 import v152, v160
12+
from oemetadata.v2 import v20
1113

1214
from .settings import OEP_URL
1315

1416
# Order matters! First entry equals latest version of metadata format
15-
METADATA_FORMATS = {"OEP": ["OEMetadata-2.0.1", "OEP-1.6.0", "OEP-1.5.2"], "INSPIRE": []}
17+
METADATA_FORMATS = {"OEP": ["OEMetadata-2.0", "OEP-1.6.0", "OEP-1.5.2"], "INSPIRE": []}
1618
METADATA_VERSIONS = {version: md_format for md_format, versions in METADATA_FORMATS.items() for version in versions}
1719

1820

@@ -70,13 +72,28 @@ def get_metadata_version(metadata: dict) -> str:
7072
"""
7173
# For OEP metadata
7274
try:
73-
return metadata["metaMetadata"]["metadataVersion"]
75+
return __normalize_metadata_version(metadata["metaMetadata"]["metadataVersion"])
7476
except KeyError:
7577
pass
7678
msg = "Could not extract metadata version from metadata."
7779
raise MetadataError(msg)
7880

7981

82+
def __normalize_metadata_version(version: str) -> str:
83+
"""
84+
Normalize a metadata version string by stripping patch numbers.
85+
86+
For example, "OEMetadata-2.0.4" becomes "OEMetadata-2.0".
87+
"""
88+
if not isinstance(version, str):
89+
raise MetadataError(f"Metadata version must be a string, not {type(version)}.")
90+
# This regex captures "OEMetadata-2.0" from "OEMetadata-2.0.4" or similar
91+
m = re.match(r"^(OEMetadata-2\.\d+)(?:\.\d+)?$", version)
92+
if m:
93+
return m.group(1)
94+
return version
95+
96+
8097
def get_latest_metadata_version(metadata_format: str) -> str:
8198
"""
8299
Return the latest metadata version of a given metadata format.
@@ -148,7 +165,7 @@ def __get_metadata_specs_for_oep(metadata_version: str) -> MetadataSpecification
148165
MetadataSpecification
149166
Metadata schema for given metadata version including template and example.
150167
"""
151-
metadata_modules = {"OEP-1.5.2": v152, "OEP-1.6.0": v160, "OEMetadata-2.0.1": v20}
168+
metadata_modules = {"OEP-1.5.2": v152, "OEP-1.6.0": v160, "OEMetadata-2.0": v20}
152169
metadata_module = metadata_modules[metadata_version]
153170
module_path = pathlib.Path(metadata_module.__file__).parent
154171
specs = {}

src/omi/conversion.py

Lines changed: 5 additions & 177 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@
44

55
from copy import deepcopy
66

7-
from omi.base import get_metadata_specification, get_metadata_version
7+
from omi.base import get_metadata_version
8+
from omi.conversions.v152_to_v160 import convert_oep_152_to_160
9+
from omi.conversions.v160_to_v20 import convert_oep_160_to_20
810

911

1012
class ConversionError(Exception):
@@ -77,181 +79,7 @@ def get_chain(current_version: str) -> list[str] | None:
7779
raise ConversionError(f"No conversion chain found from {source_version} to {target_version}.")
7880

7981

80-
def __convert_oep_152_to_160(metadata: dict) -> dict:
81-
"""
82-
Convert metadata with version "OEP-1.5.2" to "OEP-1.6.0".
83-
84-
Parameters
85-
----------
86-
metadata: dict
87-
Metadata
88-
89-
Returns
90-
-------
91-
dict
92-
Updated metadata
93-
"""
94-
# No changes in metadata fields
95-
metadata["metaMetadata"]["metadataVersion"] = "OEP-1.6.0"
96-
return metadata
97-
98-
99-
def __convert_oep_160_to_200(metadata: dict) -> dict:
100-
"""
101-
Convert metadata with version "OEP-1.6.0" to "OEMetadata-2.0.1" using the v2.0 template.
102-
103-
Parameters
104-
----------
105-
metadata: dict
106-
Metadata dictionary in v1.6 format
107-
108-
Returns
109-
-------
110-
dict
111-
Updated metadata dictionary in v2.0 format
112-
"""
113-
metadata_v2 = deepcopy(get_metadata_specification("OEMetadata-2.0.1").template)
114-
metadata_v2["name"] = metadata_v2["title"] = metadata_v2["id"] = metadata_v2["description"] = None
115-
116-
# Populate metadata v2 resources
117-
for i, resource in enumerate(metadata.get("resources", [])):
118-
resource_v2 = ___v2_ensure_resource_entry(metadata_v2, i)
119-
___v2_populate_resource_v2(resource_v2, metadata, resource)
120-
121-
# Update metaMetadata section
122-
metadata_v2["metaMetadata"]["metadataVersion"] = "OEMetadata-2.0.1"
123-
metadata_v2["metaMetadata"]["metadataLicense"] = metadata.get("metaMetadata", {}).get("metadataLicense")
124-
125-
return metadata_v2
126-
127-
128-
def ___v2_ensure_resource_entry(metadata_v2: dict, index: int) -> dict:
129-
"""Ensure a resource entry exists in metadata_v2 resources for the given index."""
130-
if index >= len(metadata_v2["resources"]):
131-
metadata_v2["resources"].append(deepcopy(metadata_v2["resources"][0]))
132-
return metadata_v2["resources"][index]
133-
134-
135-
def ___v2_populate_resource_v2(resource_v2: dict, metadata: dict, resource: dict) -> None:
136-
"""Populate resource_v2 fields based on metadata and resource from v1.6."""
137-
# Bulk update keys without
138-
resource_v2.update(
139-
{
140-
"@id": metadata.get("@id"),
141-
"@context": metadata.get("@context"),
142-
"name": resource.get("name").split(".")[1],
143-
"topics": [resource.get("name", "").split(".")[0]],
144-
"title": metadata.get("title"),
145-
"path": metadata.get("id"),
146-
"description": metadata.get("description"),
147-
"languages": metadata.get("language", []),
148-
"subject": metadata.get("subject", []),
149-
"keywords": metadata.get("keywords", []),
150-
"publicationDate": metadata.get("publicationDate"),
151-
"context": metadata.get("context", {}),
152-
"temporal": metadata.get("temporal", {}),
153-
"type": None,
154-
"format": resource.get("format"),
155-
"encoding": resource.get("encoding"),
156-
"schema": {
157-
"fields": resource.get("schema", {}).get("fields", []),
158-
"primaryKey": resource.get("schema", {}).get("primaryKey", []),
159-
"foreignKeys": resource.get("schema", {}).get("foreignKeys", []),
160-
},
161-
"dialect": resource.get("dialect", {}),
162-
"review": metadata.get("review", {}),
163-
},
164-
)
165-
166-
resource_v2["context"]["publisher"] = None
167-
168-
resource_v2["embargoPeriod"]["start"] = None
169-
resource_v2["embargoPeriod"]["end"] = None
170-
171-
# Set to null to avoid validation errors: URI
172-
resource_v2["spatial"]["location"]["@id"] = None
173-
resource_v2["spatial"]["location"]["address"] = metadata.get("spatial", {}).get("location")
174-
resource_v2["spatial"]["location"]["latitude"] = None
175-
resource_v2["spatial"]["location"]["longitude"] = None
176-
# Set to null to avoid validation errors: URI
177-
resource_v2["spatial"]["extent"]["name"] = metadata.get("spatial", {}).get("extent")
178-
resource_v2["spatial"]["extent"]["@id"] = None
179-
resource_v2["spatial"]["extent"]["resolutionValue"], resource_v2["spatial"]["extent"]["resolutionUnit"] = (
180-
metadata.get("spatial", {}).get("resolution", "").split(" ", 1)
181-
)
182-
resource_v2["spatial"]["extent"]["crs"] = None
183-
184-
___v2_populate_sources(resource_v2, metadata.get("sources", []))
185-
___v2_populate_contributors(resource_v2, metadata.get("contributors", []))
186-
___v2_populate_licenses(resource_v2, metadata.get("licenses", []))
187-
___v2_populate_schema_fields(resource_v2, resource)
188-
189-
190-
def ___v2_populate_sources(resource_v2: dict, sources: list) -> None:
191-
"""Populate sources in resource_v2 from sources in v1.6."""
192-
for i_source, source in enumerate(sources):
193-
if i_source >= len(resource_v2["sources"]):
194-
resource_v2["sources"].append(deepcopy(resource_v2["sources"][0]))
195-
source_v2 = resource_v2["sources"][i_source]
196-
source_v2.update(
197-
{
198-
"title": source.get("title"),
199-
"description": source.get("description"),
200-
"path": source.get("path"),
201-
"publicationYear": None,
202-
"authors": [],
203-
},
204-
)
205-
___v2_populate_source_licenses(source_v2, source.get("licenses", []))
206-
207-
208-
def ___v2_populate_source_licenses(source_v2: dict, licenses: list) -> None:
209-
"""Populate licenses in source_v2 from licenses in v1.6."""
210-
for i_license, license_entry in enumerate(licenses):
211-
if i_license >= len(source_v2["licenses"]):
212-
source_v2["licenses"].append(deepcopy(source_v2["licenses"][0]))
213-
source_v2["licenses"][i_license].update(license_entry)
214-
source_v2["licenses"][i_license]["copyrightStatement"] = None
215-
216-
217-
def ___v2_populate_contributors(resource_v2: dict, contributors: list) -> None:
218-
"""Populate contributors in resource_v2 from contributors in v1.6."""
219-
for i_contribution, contributor in enumerate(contributors):
220-
if i_contribution >= len(resource_v2["contributors"]):
221-
resource_v2["contributors"].append(deepcopy(resource_v2["contributors"][0]))
222-
contributor_v2 = resource_v2["contributors"][i_contribution]
223-
contributor_v2.update(
224-
{
225-
"title": contributor.get("title"),
226-
"path": contributor.get("path"),
227-
"organization": contributor.get("organization"),
228-
"date": contributor.get("date"),
229-
"object": contributor.get("object"),
230-
"comment": contributor.get("comment"),
231-
},
232-
)
233-
234-
235-
def ___v2_populate_licenses(resource_v2: dict, licenses: list) -> None:
236-
"""Populate licenses in resource_v2 from licenses in v1.6."""
237-
for i_license, license_entry in enumerate(licenses):
238-
if i_license >= len(resource_v2["licenses"]):
239-
resource_v2["licenses"].append(deepcopy(resource_v2["licenses"][0]))
240-
resource_v2["licenses"][i_license].update(license_entry)
241-
resource_v2["licenses"][i_license]["copyrightStatement"] = None
242-
243-
244-
def ___v2_populate_schema_fields(resource_v2: dict, resource: dict) -> None:
245-
"""Populate schema fields in resource_v2 from resource in v1.6."""
246-
for i_field, field in enumerate(resource.get("schema", {}).get("fields", [])):
247-
if i_field >= len(resource_v2["schema"]["fields"]):
248-
resource_v2["schema"]["fields"].append(deepcopy(resource_v2["schema"]["fields"][0]))
249-
schema_field_v2 = resource_v2["schema"]["fields"][i_field]
250-
schema_field_v2.update(field)
251-
schema_field_v2["nullable"] = None
252-
253-
25482
METADATA_CONVERSIONS = {
255-
("OEP-1.5.2", "OEP-1.6.0"): __convert_oep_152_to_160,
256-
("OEP-1.6.0", "OEMetadata-2.0.1"): __convert_oep_160_to_200,
83+
("OEP-1.5.2", "OEP-1.6.0"): convert_oep_152_to_160,
84+
("OEP-1.6.0", "OEMetadata-2.0"): convert_oep_160_to_20,
25785
}

src/omi/conversions/README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Conversions
2+
3+
This module is used to collect all existing OEMetaData version conversions. Each step in the conversion chain is stored in its own sub module. OMI supports the OEMetaData starting from v1.5.2 previous version are only supported by omi version > v1.0.0.
4+
5+
Since OEMetaData version 2 we decided to use patch versions to only update content or documentation parts of the metadata specification. Therefore OMI will only implement conversion steps for minor versions since they will include all minor structural changes like changing JSON key names or adding new key:value pairs. More substantial changes to the JSON structure will be reflected in a major version change this would include changing the nested structure of the metadata.

src/omi/conversions/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
__version__ = "1.0.0"

src/omi/conversions/utils.py

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
"""Utility functions for data conversion."""
2+
3+
import re
4+
5+
6+
def find_temporal_resolution_value_and_unit(resolution: str) -> tuple[str, str]:
7+
"""
8+
Find temporal resolution value and unit from a resolution string.
9+
10+
For temporal resolution, if the string starts with a number, this function will extract the number
11+
as the value and any following alphabetical characters as the unit. If no leading numeric value is found,
12+
the whole string is treated as a descriptive resolution with an empty unit.
13+
14+
Possible formats:
15+
- "yearly"
16+
- "hourly"
17+
- "1 h"
18+
- "5 years"
19+
- "1h"
20+
21+
Parameters
22+
----------
23+
resolution: str
24+
Temporal resolution string.
25+
26+
Returns
27+
-------
28+
tuple[str, str]
29+
Temporal resolution value and unit.
30+
"""
31+
# Try matching a number (with optional decimals) and an optional unit, allowing for spaces in between.
32+
match = re.match(r"^\s*(\d+(?:\.\d+)?)(?:\s*([a-zA-Z]+))?\s*$", resolution)
33+
if match:
34+
value = match.group(1)
35+
unit = match.group(2) if match.group(2) is not None else ""
36+
return value, unit
37+
38+
# If no numeric pattern is detected, return the entire trimmed string as the value.
39+
return resolution.strip(), ""
40+
41+
42+
def find_spatial_resolution_value_and_unit(resolution: str) -> tuple[str, str]:
43+
"""
44+
Find spatial resolution value and unit from a resolution string.
45+
46+
For spatial resolution, this function attempts to extract a numeric value with a 'm' (meters) unit,
47+
as in "100 m" or even when embedded in a longer string like "vector, 10 m". If such a pattern is found,
48+
the numeric part is returned as the value and the unit is set to "m". Otherwise, the entire string
49+
is returned as a descriptive resolution (value) with an empty unit.
50+
51+
Possible formats:
52+
- "vector, 10 m"
53+
- "100 m"
54+
- "Germany"
55+
- "NUTS-0"
56+
- "MVGD"
57+
- "Regionale Planungsgemeinschaften und Berlin"
58+
- "national"
59+
- "country"
60+
61+
Parameters
62+
----------
63+
resolution: str
64+
Spatial resolution string.
65+
66+
Returns
67+
-------
68+
tuple[str, str]
69+
Spatial resolution value and unit (unit is expected to be 'm' when a numeric resolution is provided).
70+
"""
71+
# Search for a numeric value followed by optional whitespace and an 'm' unit (case-insensitive).
72+
match = re.search(r"(\d+(?:\.\d+)?)\s*m\b", resolution, re.IGNORECASE)
73+
if match:
74+
value = match.group(1)
75+
unit = "m"
76+
return value, unit
77+
78+
# If no numeric pattern is detected, return the entire trimmed string as the value.
79+
return resolution.strip(), ""
80+
81+
82+
license_cc_by_4 = {
83+
"name": "CC-BY-4.0",
84+
"title": "Creative Commons Attribution 4.0 International",
85+
"path": "https://creativecommons.org/licenses/by/4.0/legalcode",
86+
"instruction": "You are free to share and adapt, but you must attribute and cant add additional restrictions. See https://creativecommons.org/licenses/by/4.0/deed.en for further information.", # noqa: E501
87+
"attribution": "",
88+
"copyrightStatement": "",
89+
}

0 commit comments

Comments
 (0)