2828#
2929# SPDX-License-Identifier: AGPL-3.0-or-later
3030
31+
3132import csv
3233import json
3334import logging
3435import os
3536import re
36- from collections import defaultdict
3737from functools import reduce
3838from io import TextIOWrapper
3939from itertools import chain
5454from django .utils .encoding import smart_str
5555from django .views .decorators .cache import never_cache
5656from django .views .generic import View
57- from oemetadata .v1 .v160 .schema import OEMETADATA_V160_SCHEMA
5857
5958# from oemetadata.v1.v160.template import OEMETADATA_V160_TEMPLATE
60- # from oemetadata.v2.v20.schema import OEMETADATA_V20_SCHEMA
59+ from oemetadata .v2 .v20 .schema import OEMETADATA_V20_SCHEMA
6160from oemetadata .v2 .v20 .template import OEMETADATA_V20_TEMPLATE
6261
6362# from oemetadata.v2.v20.example import OEMETADATA_V20_EXAMPLE
6766import api .parser
6867from api .actions import describe_columns
6968
69+ # from oemetadata.v1.v160.schema import OEMETADATA_V160_SCHEMA
70+
71+
7072try :
7173 import oeplatform .securitysettings as sec
7274except Exception :
@@ -1978,7 +1980,7 @@ def load_json_schema(self):
19781980 Returns:
19791981 dict: JSON schema.
19801982 """
1981- json_schema = OEMETADATA_V160_SCHEMA
1983+ json_schema = OEMETADATA_V20_SCHEMA
19821984 return json_schema
19831985
19841986 def parse_keys (self , val , old = "" ):
@@ -2015,261 +2017,79 @@ def parse_keys(self, val, old=""):
20152017
20162018 def sort_in_category (self , schema , table , oemetadata ):
20172019 """
2018- Groups metadata fields by top-level categories and subgroups within them.
2019- If a field has no dot (.), it's considered flat and shown directly.
2020- If a field has a dot and includes an index (e.g., sources.0.name),
2021- then all fields with the same index are grouped together and shown in order.
2022- Adds display_prefix for human-readable (1-based) indexing.
2023- """
2024- import re
2020+ Group flattened OEMetadata v2 fields into thematic buckets and attach
2021+ placeholders required by the review UI.
20252022
2026- def _plus_one_if_digit (txt : str ) -> str :
2027- return str (int (txt ) + 1 ) if txt .isdigit () else txt
2023+ Each entry in the resulting lists has **five** keys:
20282024
2029- val = self .parse_keys (oemetadata )
2030-
2031- main_categories = {
2032- "general" : [],
2033- "spatial" : [],
2034- "temporal" : [],
2035- "source" : [],
2036- "license" : [],
2037- "resource" : [],
2025+ ```json
2026+ {
2027+ "field": "<dot‑notation path>",
2028+ "value": "<current value>",
2029+ "newValue": "",
2030+ "reviewer_suggestion": "",
2031+ "suggestion_comment": ""
20382032 }
2033+ ```
20392034
2040- for item in val :
2041- field = item ["field" ]
2042- top_key = field .split ("." )[0 ]
2043-
2044- if top_key in {
2045- "name" ,
2046- "title" ,
2047- "id" ,
2048- "description" ,
2049- "language" ,
2050- "subject" ,
2051- "keywords" ,
2052- "publicationDate" ,
2053- "context" ,
2054- }:
2055- main_categories ["general" ].append (item )
2056- elif top_key == "spatial" :
2057- main_categories ["spatial" ].append (item )
2058- elif top_key == "temporal" :
2059- main_categories ["temporal" ].append (item )
2060- elif top_key == "sources" :
2061- main_categories ["source" ].append (item )
2062- elif top_key == "licenses" :
2063- main_categories ["license" ].append (item )
2064- elif top_key == "resources" :
2065- main_categories ["resource" ].append (item )
2066-
2067- def extract_index (prefix ):
2068- """
2069- Return the numeric list index found at the end of *prefix*.
2070-
2071- Works for both 'sources.0' *and* 'Sources 0'.
2072- If no trailing index exists, ``-1`` is returned to keep such
2073- prefixes at the beginning of the ordered result.
2074- """
2075- match = re .search (r"(?:\.|\s)([0-9]+)$" , prefix )
2076- return int (match .group (1 )) if match else - 1
2077-
2078- # ------------------------------------------------------------------
2079- # Helper: inside each "Sources N" split into second‑level list groups
2080- # e.g. sources.<src_idx>.licenses.0.url → "Licenses 0"
2081- # sources.<src_idx>.contacts.1.name → "Contacts 1"
2082- # Any path with pattern <listname>.<index> after the 2‑nd segment
2083- # becomes its own accordion; everything else stays flat.
2084- # ------------------------------------------------------------------
2085- def nest_sublist_groups (source_items ):
2086- """
2087- Turn one Source‑block (list of dicts) into:
2088- {
2089- "flat": [ ... items without <listname>.<idx> ... ],
2090- "grouped": { "<Listname> 1": [...], "<Listname> 2": [...], ... }
2091- }
2092-
2093- Accepts arbitrary list names, not just 'licenses'.
2094- """
2095- nested = {"flat" : [], "grouped" : defaultdict (list )}
2096-
2097- for itm in source_items :
2098- parts = itm ["field" ].split ("." )
2099- # expect pattern: sources.<src_idx>.<listname>.<index>.<rest>
2100- if (
2101- len (parts ) >= 4
2102- and parts [2 ].isidentifier () # list name
2103- and parts [3 ].isdigit () # index
2104- ):
2105- list_name = parts [2 ] # e.g. "licenses"
2106- idx = parts [3 ] # e.g. "0"
2107- display_idx = int (idx ) + 1
2108- group_key = f"{ list_name .capitalize ()} { display_idx } "
2109- display_field = "." .join (parts [4 :]) or "value"
2110-
2111- enriched = itm .copy ()
2112- enriched ["display_field" ] = display_field
2113- enriched ["display_prefix" ] = group_key
2114- enriched ["display_index" ] = idx
2115- nested ["grouped" ][group_key ].append (enriched )
2116- else :
2117- # keep as flat inside this Source block
2118- trimmed = "." .join (parts [2 :]) if len (parts ) > 2 else itm ["field" ]
2119- enriched = itm .copy ()
2120- enriched ["display_field" ] = _plus_one_if_digit (trimmed )
2121- nested ["flat" ].append (enriched )
2122-
2123- # sort groups numerically (… 1, 2, 3 …) within each list name
2124- nested ["grouped" ] = dict (
2125- sorted (
2126- nested ["grouped" ].items (),
2127- key = lambda kv : (
2128- kv [0 ].split ()[0 ], # list name
2129- int (kv [0 ].split ()[- 1 ]), # numeric index
2130- ),
2131- )
2132- )
2133- return nested
2134-
2135- def group_index_only (items ):
2136- result = {"flat" : [], "grouped" : defaultdict (list )}
2137-
2138- for item in items :
2139- field = item ["field" ]
2140- parts = field .split ("." )
2141-
2142- list_idx = None
2143- list_name = None
2144- idx_pos = None
2145- for pos in range (1 , len (parts )):
2146- if parts [pos ].isdigit ():
2147- list_idx = parts [pos ] # '0'
2148- list_name = parts [pos - 1 ] # 'timeseries'
2149- idx_pos = pos
2150- break
2035+ Buckets returned:
21512036
2152- if list_idx is not None :
2153- # «Timeseries 1», «Bbox 2» …
2154- display_idx = int (list_idx ) + 1 # show 1‑based index
2155- group_key = f"{ list_name .capitalize ()} { display_idx } "
2156- display_field = (
2157- "." .join (parts [idx_pos + 1 :])
2158- if idx_pos + 1 < len (parts )
2159- else ""
2160- )
2037+ * general
2038+ * spatial
2039+ * temporal
2040+ * source
2041+ * license
2042+ """
21612043
2162- enriched = item .copy ()
2163- enriched ["display_field" ] = display_field
2164- enriched ["display_prefix" ] = group_key
2165- enriched ["display_index" ] = list_idx
2166- result ["grouped" ][group_key ].append (enriched )
2167- else :
2168- trimmed = field .split ("." , 1 )[1 ] if "." in field else field
2169- item ["display_field" ] = _plus_one_if_digit (trimmed )
2170- item .pop ("display_index" , None )
2171- result ["flat" ].append (item )
2044+ from collections import defaultdict
21722045
2173- result ["grouped" ] = dict (
2174- sorted (result ["grouped" ].items (), key = lambda kv : int (kv [0 ].split ()[- 1 ]))
2175- )
2176- return result
2177-
2178- def group_by_index (items ):
2179- """
2180- Organise *items* into
2181- * ``flat`` – fields without any nesting,
2182- * ``grouped`` – dict whose keys are human‑readable list titles
2183- such as 'Timeseries 1', 'Sources 2', …
2184-
2185- All fields that share the same list index (e.g. timeseries.0.*)
2186- are collected under one group. The groups are ordered by their
2187- numeric index so that 1, 2, 3 … appear in sequence.
2188- """
2189- result = {"flat" : [], "grouped" : defaultdict (list )}
2190-
2191- for item in items :
2192- field = item ["field" ]
2193- parts = field .split ("." )
2194-
2195- # Handle list elements like timeseries.0.start
2196- if len (parts ) >= 3 and parts [1 ].isdigit ():
2197- index = parts [1 ] # '0'
2198- display_idx = int (index ) + 1
2199- group_key = (
2200- f"{ parts [0 ].capitalize ()} { display_idx } " # 'Timeseries 1'
2201- )
2202- display_field = "." .join (parts [2 :]) # 'start'
2046+ # Flatten the nested JSON into [{'field': k, 'value': v}, ...]
2047+ flattened = self .parse_keys (oemetadata )
2048+ flattened = [
2049+ item for item in flattened if item ["field" ].startswith ("resources." )
2050+ ]
22032051
2204- enriched = item .copy ()
2205- enriched ["display_field" ] = display_field
2206- enriched ["display_prefix" ] = group_key
2207- enriched ["display_index" ] = index
2052+ bucket_map = {
2053+ "spatial" : "spatial" ,
2054+ "temporal" : "temporal" ,
2055+ "sources" : "source" ,
2056+ "licenses" : "license" ,
2057+ }
22082058
2209- result [ "grouped" ][ group_key ]. append ( enriched )
2059+ tmp = defaultdict ( list )
22102060
2211- # Handle nested (but non‑list) structures, e.g. spatial.epsg
2212- elif "." in field :
2213- group_key = field .split ("." )[0 ] # 'spatial'
2214- enriched = item .copy ()
2215- raw_tail = "." .join (field .split ("." )[1 :])
2216- enriched ["display_field" ] = _plus_one_if_digit (raw_tail )
2217- enriched ["display_prefix" ] = group_key
2218- enriched .pop ("display_index" , None )
2061+ for item in flattened :
2062+ raw_key = item ["field" ]
2063+ parts = raw_key .split ("." )
22192064
2220- result ["grouped" ][group_key ].append (enriched )
2065+ # Detect v2 resource path → resources.<idx>.<root>.…
2066+ if parts [0 ] == "resources" and len (parts ) >= 3 :
2067+ root = parts [2 ]
2068+ else :
2069+ root = parts [0 ]
22212070
2222- # Handle completely flat fields
2223- else :
2224- item ["display_field" ] = field
2225- item ["display_field" ] = _plus_one_if_digit (item ["display_field" ])
2226- item .pop ("display_index" , None )
2227- result ["flat" ].append (item )
2228-
2229- # Sort grouped entries by their numeric index (Timeseries 1, 2, 3 …)
2230- sorted_grouped = dict (
2231- sorted (result ["grouped" ].items (), key = lambda kv : extract_index (kv [0 ]))
2232- )
2233- return {"flat" : result ["flat" ], "grouped" : sorted_grouped }
2234-
2235- grouped_meta = {}
2236- for cat , items in main_categories .items ():
2237- if cat in {"spatial" , "temporal" }:
2238- grouped = group_index_only (items ) # only list‑index grouping
2239- elif cat == "source" :
2240- # First‑level grouping: Source 0, Source 1, …
2241- src_level = group_index_only (items )
2242-
2243- # For every 'Sources N' list build inner sublist groups
2244- nested_grouped = {}
2245- for src_key , src_items in src_level ["grouped" ].items ():
2246- nested_grouped [src_key ] = nest_sublist_groups (src_items )
2247-
2248- grouped = {
2249- "flat" : src_level ["flat" ],
2250- "grouped" : nested_grouped ,
2251- }
2252- elif cat == "license" :
2253- # First‑level grouping: License 0, License 1, …
2254- lic_level = group_index_only (items )
2255-
2256- # For every 'Licenses N' entry build inner sub‑list groups
2257- nested_grouped_lic = {}
2258- for lic_key , lic_items in lic_level ["grouped" ].items ():
2259- nested_grouped_lic [lic_key ] = nest_sublist_groups (lic_items )
2260-
2261- grouped = {
2262- "flat" : lic_level ["flat" ],
2263- "grouped" : nested_grouped_lic ,
2071+ bucket = bucket_map .get (root , "general" )
2072+
2073+ # Extend structure with placeholders expected by review workflow
2074+ tmp [bucket ].append (
2075+ {
2076+ "field" : raw_key ,
2077+ "value" : item ["value" ],
2078+ "newValue" : "" ,
2079+ "reviewer_suggestion" : "" ,
2080+ "suggestion_comment" : "" ,
22642081 }
2265- else :
2266- grouped = group_by_index (items ) # previous behaviour
2267- grouped_meta [cat ] = {
2268- "flat" : grouped ["flat" ],
2269- "grouped" : grouped ["grouped" ],
2270- }
2082+ )
22712083
2272- return grouped_meta
2084+ # Guarantee keys exist even when empty
2085+ buckets = {
2086+ "general" : tmp ["general" ],
2087+ "spatial" : tmp ["spatial" ],
2088+ "temporal" : tmp ["temporal" ],
2089+ "source" : tmp ["source" ],
2090+ "license" : tmp ["license" ],
2091+ }
2092+ return buckets
22732093
22742094 def get_all_field_descriptions (self , json_schema , prefix = "" ):
22752095 """
@@ -2296,12 +2116,17 @@ def extract_descriptions(properties, prefix=""):
22962116
22972117 if any (
22982118 attr in value
2299- for attr in ["description" , "example" , "badge" , "title" ]
2119+ for attr in ["description" , "examples" , " example" , "badge" , "title" ]
23002120 ):
23012121 field_descriptions [key ] = {}
23022122 if "description" in value :
23032123 field_descriptions [key ]["description" ] = value ["description" ]
2304- if "example" in value :
2124+ # Prefer v2 "examples" (array) over v1 "example" (single value)
2125+ if "examples" in value and value ["examples" ]:
2126+ # v2: first item of the examples array
2127+ field_descriptions [key ]["example" ] = value ["examples" ][0 ]
2128+ elif "example" in value :
2129+ # v1 fallback
23052130 field_descriptions [key ]["example" ] = value ["example" ]
23062131 if "badge" in value :
23072132 field_descriptions [key ]["badge" ] = value ["badge" ]
0 commit comments