Skip to content

Commit 7e4da10

Browse files
authored
Merge pull request #2056 from OpenEnergyPlatform/feature/1931-update-the-opr-to-use-oemtadata-v2-specification
Update the OPR to use metadata v2
2 parents 5169b74 + 6db55e3 commit 7e4da10

File tree

1 file changed

+72
-247
lines changed

1 file changed

+72
-247
lines changed

dataedit/views.py

Lines changed: 72 additions & 247 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,12 @@
2828
#
2929
# SPDX-License-Identifier: AGPL-3.0-or-later
3030

31+
3132
import csv
3233
import json
3334
import logging
3435
import os
3536
import re
36-
from collections import defaultdict
3737
from functools import reduce
3838
from io import TextIOWrapper
3939
from itertools import chain
@@ -54,10 +54,9 @@
5454
from django.utils.encoding import smart_str
5555
from django.views.decorators.cache import never_cache
5656
from django.views.generic import View
57-
from oemetadata.v1.v160.schema import OEMETADATA_V160_SCHEMA
5857

5958
# from oemetadata.v1.v160.template import OEMETADATA_V160_TEMPLATE
60-
# from oemetadata.v2.v20.schema import OEMETADATA_V20_SCHEMA
59+
from oemetadata.v2.v20.schema import OEMETADATA_V20_SCHEMA
6160
from oemetadata.v2.v20.template import OEMETADATA_V20_TEMPLATE
6261

6362
# from oemetadata.v2.v20.example import OEMETADATA_V20_EXAMPLE
@@ -67,6 +66,9 @@
6766
import api.parser
6867
from api.actions import describe_columns
6968

69+
# from oemetadata.v1.v160.schema import OEMETADATA_V160_SCHEMA
70+
71+
7072
try:
7173
import oeplatform.securitysettings as sec
7274
except Exception:
@@ -1978,7 +1980,7 @@ def load_json_schema(self):
19781980
Returns:
19791981
dict: JSON schema.
19801982
"""
1981-
json_schema = OEMETADATA_V160_SCHEMA
1983+
json_schema = OEMETADATA_V20_SCHEMA
19821984
return json_schema
19831985

19841986
def parse_keys(self, val, old=""):
@@ -2015,261 +2017,79 @@ def parse_keys(self, val, old=""):
20152017

20162018
def sort_in_category(self, schema, table, oemetadata):
20172019
"""
2018-
Groups metadata fields by top-level categories and subgroups within them.
2019-
If a field has no dot (.), it's considered flat and shown directly.
2020-
If a field has a dot and includes an index (e.g., sources.0.name),
2021-
then all fields with the same index are grouped together and shown in order.
2022-
Adds display_prefix for human-readable (1-based) indexing.
2023-
"""
2024-
import re
2020+
Group flattened OEMetadata v2 fields into thematic buckets and attach
2021+
placeholders required by the review UI.
20252022
2026-
def _plus_one_if_digit(txt: str) -> str:
2027-
return str(int(txt) + 1) if txt.isdigit() else txt
2023+
Each entry in the resulting lists has **five** keys:
20282024
2029-
val = self.parse_keys(oemetadata)
2030-
2031-
main_categories = {
2032-
"general": [],
2033-
"spatial": [],
2034-
"temporal": [],
2035-
"source": [],
2036-
"license": [],
2037-
"resource": [],
2025+
```json
2026+
{
2027+
"field": "<dot‑notation path>",
2028+
"value": "<current value>",
2029+
"newValue": "",
2030+
"reviewer_suggestion": "",
2031+
"suggestion_comment": ""
20382032
}
2033+
```
20392034
2040-
for item in val:
2041-
field = item["field"]
2042-
top_key = field.split(".")[0]
2043-
2044-
if top_key in {
2045-
"name",
2046-
"title",
2047-
"id",
2048-
"description",
2049-
"language",
2050-
"subject",
2051-
"keywords",
2052-
"publicationDate",
2053-
"context",
2054-
}:
2055-
main_categories["general"].append(item)
2056-
elif top_key == "spatial":
2057-
main_categories["spatial"].append(item)
2058-
elif top_key == "temporal":
2059-
main_categories["temporal"].append(item)
2060-
elif top_key == "sources":
2061-
main_categories["source"].append(item)
2062-
elif top_key == "licenses":
2063-
main_categories["license"].append(item)
2064-
elif top_key == "resources":
2065-
main_categories["resource"].append(item)
2066-
2067-
def extract_index(prefix):
2068-
"""
2069-
Return the numeric list index found at the end of *prefix*.
2070-
2071-
Works for both 'sources.0' *and* 'Sources 0'.
2072-
If no trailing index exists, ``-1`` is returned to keep such
2073-
prefixes at the beginning of the ordered result.
2074-
"""
2075-
match = re.search(r"(?:\.|\s)([0-9]+)$", prefix)
2076-
return int(match.group(1)) if match else -1
2077-
2078-
# ------------------------------------------------------------------
2079-
# Helper: inside each "Sources N" split into second‑level list groups
2080-
# e.g. sources.<src_idx>.licenses.0.url → "Licenses 0"
2081-
# sources.<src_idx>.contacts.1.name → "Contacts 1"
2082-
# Any path with pattern <listname>.<index> after the 2‑nd segment
2083-
# becomes its own accordion; everything else stays flat.
2084-
# ------------------------------------------------------------------
2085-
def nest_sublist_groups(source_items):
2086-
"""
2087-
Turn one Source‑block (list of dicts) into:
2088-
{
2089-
"flat": [ ... items without <listname>.<idx> ... ],
2090-
"grouped": { "<Listname> 1": [...], "<Listname> 2": [...], ... }
2091-
}
2092-
2093-
Accepts arbitrary list names, not just 'licenses'.
2094-
"""
2095-
nested = {"flat": [], "grouped": defaultdict(list)}
2096-
2097-
for itm in source_items:
2098-
parts = itm["field"].split(".")
2099-
# expect pattern: sources.<src_idx>.<listname>.<index>.<rest>
2100-
if (
2101-
len(parts) >= 4
2102-
and parts[2].isidentifier() # list name
2103-
and parts[3].isdigit() # index
2104-
):
2105-
list_name = parts[2] # e.g. "licenses"
2106-
idx = parts[3] # e.g. "0"
2107-
display_idx = int(idx) + 1
2108-
group_key = f"{list_name.capitalize()} {display_idx}"
2109-
display_field = ".".join(parts[4:]) or "value"
2110-
2111-
enriched = itm.copy()
2112-
enriched["display_field"] = display_field
2113-
enriched["display_prefix"] = group_key
2114-
enriched["display_index"] = idx
2115-
nested["grouped"][group_key].append(enriched)
2116-
else:
2117-
# keep as flat inside this Source block
2118-
trimmed = ".".join(parts[2:]) if len(parts) > 2 else itm["field"]
2119-
enriched = itm.copy()
2120-
enriched["display_field"] = _plus_one_if_digit(trimmed)
2121-
nested["flat"].append(enriched)
2122-
2123-
# sort groups numerically (… 1, 2, 3 …) within each list name
2124-
nested["grouped"] = dict(
2125-
sorted(
2126-
nested["grouped"].items(),
2127-
key=lambda kv: (
2128-
kv[0].split()[0], # list name
2129-
int(kv[0].split()[-1]), # numeric index
2130-
),
2131-
)
2132-
)
2133-
return nested
2134-
2135-
def group_index_only(items):
2136-
result = {"flat": [], "grouped": defaultdict(list)}
2137-
2138-
for item in items:
2139-
field = item["field"]
2140-
parts = field.split(".")
2141-
2142-
list_idx = None
2143-
list_name = None
2144-
idx_pos = None
2145-
for pos in range(1, len(parts)):
2146-
if parts[pos].isdigit():
2147-
list_idx = parts[pos] # '0'
2148-
list_name = parts[pos - 1] # 'timeseries'
2149-
idx_pos = pos
2150-
break
2035+
Buckets returned:
21512036
2152-
if list_idx is not None:
2153-
# «Timeseries 1», «Bbox 2» …
2154-
display_idx = int(list_idx) + 1 # show 1‑based index
2155-
group_key = f"{list_name.capitalize()} {display_idx}"
2156-
display_field = (
2157-
".".join(parts[idx_pos + 1 :])
2158-
if idx_pos + 1 < len(parts)
2159-
else ""
2160-
)
2037+
* general
2038+
* spatial
2039+
* temporal
2040+
* source
2041+
* license
2042+
"""
21612043

2162-
enriched = item.copy()
2163-
enriched["display_field"] = display_field
2164-
enriched["display_prefix"] = group_key
2165-
enriched["display_index"] = list_idx
2166-
result["grouped"][group_key].append(enriched)
2167-
else:
2168-
trimmed = field.split(".", 1)[1] if "." in field else field
2169-
item["display_field"] = _plus_one_if_digit(trimmed)
2170-
item.pop("display_index", None)
2171-
result["flat"].append(item)
2044+
from collections import defaultdict
21722045

2173-
result["grouped"] = dict(
2174-
sorted(result["grouped"].items(), key=lambda kv: int(kv[0].split()[-1]))
2175-
)
2176-
return result
2177-
2178-
def group_by_index(items):
2179-
"""
2180-
Organise *items* into
2181-
* ``flat`` – fields without any nesting,
2182-
* ``grouped`` – dict whose keys are human‑readable list titles
2183-
such as 'Timeseries 1', 'Sources 2', …
2184-
2185-
All fields that share the same list index (e.g. timeseries.0.*)
2186-
are collected under one group. The groups are ordered by their
2187-
numeric index so that 1, 2, 3 … appear in sequence.
2188-
"""
2189-
result = {"flat": [], "grouped": defaultdict(list)}
2190-
2191-
for item in items:
2192-
field = item["field"]
2193-
parts = field.split(".")
2194-
2195-
# Handle list elements like timeseries.0.start
2196-
if len(parts) >= 3 and parts[1].isdigit():
2197-
index = parts[1] # '0'
2198-
display_idx = int(index) + 1
2199-
group_key = (
2200-
f"{parts[0].capitalize()} {display_idx}" # 'Timeseries 1'
2201-
)
2202-
display_field = ".".join(parts[2:]) # 'start'
2046+
# Flatten the nested JSON into [{'field': k, 'value': v}, ...]
2047+
flattened = self.parse_keys(oemetadata)
2048+
flattened = [
2049+
item for item in flattened if item["field"].startswith("resources.")
2050+
]
22032051

2204-
enriched = item.copy()
2205-
enriched["display_field"] = display_field
2206-
enriched["display_prefix"] = group_key
2207-
enriched["display_index"] = index
2052+
bucket_map = {
2053+
"spatial": "spatial",
2054+
"temporal": "temporal",
2055+
"sources": "source",
2056+
"licenses": "license",
2057+
}
22082058

2209-
result["grouped"][group_key].append(enriched)
2059+
tmp = defaultdict(list)
22102060

2211-
# Handle nested (but non‑list) structures, e.g. spatial.epsg
2212-
elif "." in field:
2213-
group_key = field.split(".")[0] # 'spatial'
2214-
enriched = item.copy()
2215-
raw_tail = ".".join(field.split(".")[1:])
2216-
enriched["display_field"] = _plus_one_if_digit(raw_tail)
2217-
enriched["display_prefix"] = group_key
2218-
enriched.pop("display_index", None)
2061+
for item in flattened:
2062+
raw_key = item["field"]
2063+
parts = raw_key.split(".")
22192064

2220-
result["grouped"][group_key].append(enriched)
2065+
# Detect v2 resource path → resources.<idx>.<root>.…
2066+
if parts[0] == "resources" and len(parts) >= 3:
2067+
root = parts[2]
2068+
else:
2069+
root = parts[0]
22212070

2222-
# Handle completely flat fields
2223-
else:
2224-
item["display_field"] = field
2225-
item["display_field"] = _plus_one_if_digit(item["display_field"])
2226-
item.pop("display_index", None)
2227-
result["flat"].append(item)
2228-
2229-
# Sort grouped entries by their numeric index (Timeseries 1, 2, 3 …)
2230-
sorted_grouped = dict(
2231-
sorted(result["grouped"].items(), key=lambda kv: extract_index(kv[0]))
2232-
)
2233-
return {"flat": result["flat"], "grouped": sorted_grouped}
2234-
2235-
grouped_meta = {}
2236-
for cat, items in main_categories.items():
2237-
if cat in {"spatial", "temporal"}:
2238-
grouped = group_index_only(items) # only list‑index grouping
2239-
elif cat == "source":
2240-
# First‑level grouping: Source 0, Source 1, …
2241-
src_level = group_index_only(items)
2242-
2243-
# For every 'Sources N' list build inner sublist groups
2244-
nested_grouped = {}
2245-
for src_key, src_items in src_level["grouped"].items():
2246-
nested_grouped[src_key] = nest_sublist_groups(src_items)
2247-
2248-
grouped = {
2249-
"flat": src_level["flat"],
2250-
"grouped": nested_grouped,
2251-
}
2252-
elif cat == "license":
2253-
# First‑level grouping: License 0, License 1, …
2254-
lic_level = group_index_only(items)
2255-
2256-
# For every 'Licenses N' entry build inner sub‑list groups
2257-
nested_grouped_lic = {}
2258-
for lic_key, lic_items in lic_level["grouped"].items():
2259-
nested_grouped_lic[lic_key] = nest_sublist_groups(lic_items)
2260-
2261-
grouped = {
2262-
"flat": lic_level["flat"],
2263-
"grouped": nested_grouped_lic,
2071+
bucket = bucket_map.get(root, "general")
2072+
2073+
# Extend structure with placeholders expected by review workflow
2074+
tmp[bucket].append(
2075+
{
2076+
"field": raw_key,
2077+
"value": item["value"],
2078+
"newValue": "",
2079+
"reviewer_suggestion": "",
2080+
"suggestion_comment": "",
22642081
}
2265-
else:
2266-
grouped = group_by_index(items) # previous behaviour
2267-
grouped_meta[cat] = {
2268-
"flat": grouped["flat"],
2269-
"grouped": grouped["grouped"],
2270-
}
2082+
)
22712083

2272-
return grouped_meta
2084+
# Guarantee keys exist even when empty
2085+
buckets = {
2086+
"general": tmp["general"],
2087+
"spatial": tmp["spatial"],
2088+
"temporal": tmp["temporal"],
2089+
"source": tmp["source"],
2090+
"license": tmp["license"],
2091+
}
2092+
return buckets
22732093

22742094
def get_all_field_descriptions(self, json_schema, prefix=""):
22752095
"""
@@ -2296,12 +2116,17 @@ def extract_descriptions(properties, prefix=""):
22962116

22972117
if any(
22982118
attr in value
2299-
for attr in ["description", "example", "badge", "title"]
2119+
for attr in ["description", "examples", "example", "badge", "title"]
23002120
):
23012121
field_descriptions[key] = {}
23022122
if "description" in value:
23032123
field_descriptions[key]["description"] = value["description"]
2304-
if "example" in value:
2124+
# Prefer v2 "examples" (array) over v1 "example" (single value)
2125+
if "examples" in value and value["examples"]:
2126+
# v2: first item of the examples array
2127+
field_descriptions[key]["example"] = value["examples"][0]
2128+
elif "example" in value:
2129+
# v1 fallback
23052130
field_descriptions[key]["example"] = value["example"]
23062131
if "badge" in value:
23072132
field_descriptions[key]["badge"] = value["badge"]

0 commit comments

Comments
 (0)