|
4 | 4 |
|
5 | 5 | from copy import deepcopy |
6 | 6 |
|
7 | | -from omi.base import get_metadata_specification, get_metadata_version |
| 7 | +from omi.base import get_metadata_version |
| 8 | +from omi.conversions.v152_to_v160 import convert_oep_152_to_160 |
| 9 | +from omi.conversions.v160_to_v20 import convert_oep_160_to_20 |
8 | 10 |
|
9 | 11 |
|
10 | 12 | class ConversionError(Exception): |
@@ -77,181 +79,7 @@ def get_chain(current_version: str) -> list[str] | None: |
77 | 79 | raise ConversionError(f"No conversion chain found from {source_version} to {target_version}.") |
78 | 80 |
|
79 | 81 |
|
80 | | -def __convert_oep_152_to_160(metadata: dict) -> dict: |
81 | | - """ |
82 | | - Convert metadata with version "OEP-1.5.2" to "OEP-1.6.0". |
83 | | -
|
84 | | - Parameters |
85 | | - ---------- |
86 | | - metadata: dict |
87 | | - Metadata |
88 | | -
|
89 | | - Returns |
90 | | - ------- |
91 | | - dict |
92 | | - Updated metadata |
93 | | - """ |
94 | | - # No changes in metadata fields |
95 | | - metadata["metaMetadata"]["metadataVersion"] = "OEP-1.6.0" |
96 | | - return metadata |
97 | | - |
98 | | - |
99 | | -def __convert_oep_160_to_200(metadata: dict) -> dict: |
100 | | - """ |
101 | | - Convert metadata with version "OEP-1.6.0" to "OEMetadata-2.0.1" using the v2.0 template. |
102 | | -
|
103 | | - Parameters |
104 | | - ---------- |
105 | | - metadata: dict |
106 | | - Metadata dictionary in v1.6 format |
107 | | -
|
108 | | - Returns |
109 | | - ------- |
110 | | - dict |
111 | | - Updated metadata dictionary in v2.0 format |
112 | | - """ |
113 | | - metadata_v2 = deepcopy(get_metadata_specification("OEMetadata-2.0.1").template) |
114 | | - metadata_v2["name"] = metadata_v2["title"] = metadata_v2["id"] = metadata_v2["description"] = None |
115 | | - |
116 | | - # Populate metadata v2 resources |
117 | | - for i, resource in enumerate(metadata.get("resources", [])): |
118 | | - resource_v2 = ___v2_ensure_resource_entry(metadata_v2, i) |
119 | | - ___v2_populate_resource_v2(resource_v2, metadata, resource) |
120 | | - |
121 | | - # Update metaMetadata section |
122 | | - metadata_v2["metaMetadata"]["metadataVersion"] = "OEMetadata-2.0.1" |
123 | | - metadata_v2["metaMetadata"]["metadataLicense"] = metadata.get("metaMetadata", {}).get("metadataLicense") |
124 | | - |
125 | | - return metadata_v2 |
126 | | - |
127 | | - |
128 | | -def ___v2_ensure_resource_entry(metadata_v2: dict, index: int) -> dict: |
129 | | - """Ensure a resource entry exists in metadata_v2 resources for the given index.""" |
130 | | - if index >= len(metadata_v2["resources"]): |
131 | | - metadata_v2["resources"].append(deepcopy(metadata_v2["resources"][0])) |
132 | | - return metadata_v2["resources"][index] |
133 | | - |
134 | | - |
135 | | -def ___v2_populate_resource_v2(resource_v2: dict, metadata: dict, resource: dict) -> None: |
136 | | - """Populate resource_v2 fields based on metadata and resource from v1.6.""" |
137 | | - # Bulk update keys without |
138 | | - resource_v2.update( |
139 | | - { |
140 | | - "@id": metadata.get("@id"), |
141 | | - "@context": metadata.get("@context"), |
142 | | - "name": resource.get("name").split(".")[1], |
143 | | - "topics": [resource.get("name", "").split(".")[0]], |
144 | | - "title": metadata.get("title"), |
145 | | - "path": metadata.get("id"), |
146 | | - "description": metadata.get("description"), |
147 | | - "languages": metadata.get("language", []), |
148 | | - "subject": metadata.get("subject", []), |
149 | | - "keywords": metadata.get("keywords", []), |
150 | | - "publicationDate": metadata.get("publicationDate"), |
151 | | - "context": metadata.get("context", {}), |
152 | | - "temporal": metadata.get("temporal", {}), |
153 | | - "type": None, |
154 | | - "format": resource.get("format"), |
155 | | - "encoding": resource.get("encoding"), |
156 | | - "schema": { |
157 | | - "fields": resource.get("schema", {}).get("fields", []), |
158 | | - "primaryKey": resource.get("schema", {}).get("primaryKey", []), |
159 | | - "foreignKeys": resource.get("schema", {}).get("foreignKeys", []), |
160 | | - }, |
161 | | - "dialect": resource.get("dialect", {}), |
162 | | - "review": metadata.get("review", {}), |
163 | | - }, |
164 | | - ) |
165 | | - |
166 | | - resource_v2["context"]["publisher"] = None |
167 | | - |
168 | | - resource_v2["embargoPeriod"]["start"] = None |
169 | | - resource_v2["embargoPeriod"]["end"] = None |
170 | | - |
171 | | - # Set to null to avoid validation errors: URI |
172 | | - resource_v2["spatial"]["location"]["@id"] = None |
173 | | - resource_v2["spatial"]["location"]["address"] = metadata.get("spatial", {}).get("location") |
174 | | - resource_v2["spatial"]["location"]["latitude"] = None |
175 | | - resource_v2["spatial"]["location"]["longitude"] = None |
176 | | - # Set to null to avoid validation errors: URI |
177 | | - resource_v2["spatial"]["extent"]["name"] = metadata.get("spatial", {}).get("extent") |
178 | | - resource_v2["spatial"]["extent"]["@id"] = None |
179 | | - resource_v2["spatial"]["extent"]["resolutionValue"], resource_v2["spatial"]["extent"]["resolutionUnit"] = ( |
180 | | - metadata.get("spatial", {}).get("resolution", "").split(" ", 1) |
181 | | - ) |
182 | | - resource_v2["spatial"]["extent"]["crs"] = None |
183 | | - |
184 | | - ___v2_populate_sources(resource_v2, metadata.get("sources", [])) |
185 | | - ___v2_populate_contributors(resource_v2, metadata.get("contributors", [])) |
186 | | - ___v2_populate_licenses(resource_v2, metadata.get("licenses", [])) |
187 | | - ___v2_populate_schema_fields(resource_v2, resource) |
188 | | - |
189 | | - |
190 | | -def ___v2_populate_sources(resource_v2: dict, sources: list) -> None: |
191 | | - """Populate sources in resource_v2 from sources in v1.6.""" |
192 | | - for i_source, source in enumerate(sources): |
193 | | - if i_source >= len(resource_v2["sources"]): |
194 | | - resource_v2["sources"].append(deepcopy(resource_v2["sources"][0])) |
195 | | - source_v2 = resource_v2["sources"][i_source] |
196 | | - source_v2.update( |
197 | | - { |
198 | | - "title": source.get("title"), |
199 | | - "description": source.get("description"), |
200 | | - "path": source.get("path"), |
201 | | - "publicationYear": None, |
202 | | - "authors": [], |
203 | | - }, |
204 | | - ) |
205 | | - ___v2_populate_source_licenses(source_v2, source.get("licenses", [])) |
206 | | - |
207 | | - |
208 | | -def ___v2_populate_source_licenses(source_v2: dict, licenses: list) -> None: |
209 | | - """Populate licenses in source_v2 from licenses in v1.6.""" |
210 | | - for i_license, license_entry in enumerate(licenses): |
211 | | - if i_license >= len(source_v2["licenses"]): |
212 | | - source_v2["licenses"].append(deepcopy(source_v2["licenses"][0])) |
213 | | - source_v2["licenses"][i_license].update(license_entry) |
214 | | - source_v2["licenses"][i_license]["copyrightStatement"] = None |
215 | | - |
216 | | - |
217 | | -def ___v2_populate_contributors(resource_v2: dict, contributors: list) -> None: |
218 | | - """Populate contributors in resource_v2 from contributors in v1.6.""" |
219 | | - for i_contribution, contributor in enumerate(contributors): |
220 | | - if i_contribution >= len(resource_v2["contributors"]): |
221 | | - resource_v2["contributors"].append(deepcopy(resource_v2["contributors"][0])) |
222 | | - contributor_v2 = resource_v2["contributors"][i_contribution] |
223 | | - contributor_v2.update( |
224 | | - { |
225 | | - "title": contributor.get("title"), |
226 | | - "path": contributor.get("path"), |
227 | | - "organization": contributor.get("organization"), |
228 | | - "date": contributor.get("date"), |
229 | | - "object": contributor.get("object"), |
230 | | - "comment": contributor.get("comment"), |
231 | | - }, |
232 | | - ) |
233 | | - |
234 | | - |
235 | | -def ___v2_populate_licenses(resource_v2: dict, licenses: list) -> None: |
236 | | - """Populate licenses in resource_v2 from licenses in v1.6.""" |
237 | | - for i_license, license_entry in enumerate(licenses): |
238 | | - if i_license >= len(resource_v2["licenses"]): |
239 | | - resource_v2["licenses"].append(deepcopy(resource_v2["licenses"][0])) |
240 | | - resource_v2["licenses"][i_license].update(license_entry) |
241 | | - resource_v2["licenses"][i_license]["copyrightStatement"] = None |
242 | | - |
243 | | - |
244 | | -def ___v2_populate_schema_fields(resource_v2: dict, resource: dict) -> None: |
245 | | - """Populate schema fields in resource_v2 from resource in v1.6.""" |
246 | | - for i_field, field in enumerate(resource.get("schema", {}).get("fields", [])): |
247 | | - if i_field >= len(resource_v2["schema"]["fields"]): |
248 | | - resource_v2["schema"]["fields"].append(deepcopy(resource_v2["schema"]["fields"][0])) |
249 | | - schema_field_v2 = resource_v2["schema"]["fields"][i_field] |
250 | | - schema_field_v2.update(field) |
251 | | - schema_field_v2["nullable"] = None |
252 | | - |
253 | | - |
254 | 82 | METADATA_CONVERSIONS = { |
255 | | - ("OEP-1.5.2", "OEP-1.6.0"): __convert_oep_152_to_160, |
256 | | - ("OEP-1.6.0", "OEMetadata-2.0.1"): __convert_oep_160_to_200, |
| 83 | + ("OEP-1.5.2", "OEP-1.6.0"): convert_oep_152_to_160, |
| 84 | + ("OEP-1.6.0", "OEMetadata-2.0"): convert_oep_160_to_20, |
257 | 85 | } |
0 commit comments