1010from owslib import util
1111from owslib .iso import get_namespaces
1212
13- ACCESS_CONSTRAINTS_URL = "http://inspire.ec.europa.eu/metadata-codelist/LimitationsOnPublicAccess/noLimitations"
13+ logger = logging . getLogger ( __name__ )
1414
1515
1616def rndt_parser (xml , uuid = "" , vals = {}, regions = [], keywords = [], custom = {}):
@@ -25,21 +25,96 @@ def rndt_parser(xml, uuid="", vals={}, regions=[], keywords=[], custom={}):
2525 exml = list (exml )[0 ]
2626
2727 rndt_parser = RNDTMetadataParser (exml )
28-
28+ rndt_parser . get_freq ( vals )
2929 keywords , discarded = rndt_parser .resolve_keywords ()
3030 custom ["rejected_keywords" ] = discarded
3131
32- custom ["rndt" ] = {}
33-
34- use_constr = rndt_parser .get_access_costraints (custom )
35- rndt_parser .get_use_costraints (vals , use_constr )
36- rndt_parser .get_resolutions (custom )
37- rndt_parser .get_accuracy (custom )
38- rndt_parser .get_freq (vals )
32+ # Next calls parse and store metadata in a jsonschema compliant way (geonode5)
33+ jsoninstance = custom .setdefault ("jsoninstance" , {})
34+ resolver = RNDTMetadataResolver (jsoninstance )
35+ resolver .resolve_constraints (rndt_parser .parse_constraints ())
36+ resolver .resolve_resolution (rndt_parser .parse_resolution ())
37+ resolver .resolve_accuracy (rndt_parser .parse_accuracy ())
3938
4039 return uuid , vals , regions , keywords , custom
4140
4241
42+ class RNDTMetadataResolver :
43+ def __init__ (self , jsoninstance :dict ):
44+ self .jsoninstance = jsoninstance
45+
46+ def resolve_constraints (self , constraints :list ):
47+ freetext = ""
48+ access = None
49+ use = None
50+
51+ for constr in constraints :
52+ logger .debug (f"Resolving constraint: --> { constr } " )
53+
54+ code = constr ["code" ]
55+ if code not in ("otherRestrictions" , "limitation not listed" ):
56+ logger .debug (f"Skipping constraint { constr } " )
57+ continue
58+
59+ href = constr ["href" ]
60+ text = constr ["text" ]
61+
62+ if not href :
63+ logger .debug (f"Collecting text from { constr } " )
64+ freetext = f"{ freetext } \n { text } "
65+ continue
66+
67+ # rndt_LimitationsOnPublicAccess -> url
68+ # rndt_ConditionsApplyingToAccessAndUse -> text or url
69+
70+ t = ThesaurusKeyword .objects .filter (about = href ).filter (
71+ thesaurus__identifier = "LimitationsOnPublicAccess"
72+ ).first ()
73+ if t :
74+ if access :
75+ logger .warning (f"Duplicate LimitationsOnPublicAccess overridden { access } " )
76+ access = {"id" :href , "label" :t .alt_label }
77+ continue
78+
79+ t = ThesaurusKeyword .objects .filter (about = href ).filter (
80+ thesaurus__identifier = "ConditionsApplyingToAccessAndUse"
81+ ).first ()
82+ if t :
83+ if use :
84+ logger .warning (f"Duplicate ConditionsApplyingToAccessAndUse overridden { use } " )
85+ use = {"inspire_url" : True , "url" : href }
86+ continue
87+
88+ logger .warning (f"Skipping unknown URL { constr } " )
89+ # we may try and parse license URLs: that's beyond RNDT requirements, but it would be nice
90+ #endfor
91+
92+ if access :
93+ self .jsoninstance ["rndt_LimitationsOnPublicAccess" ] = access
94+ else :
95+ logger .info ("LimitationsOnPublicAccess not found" )
96+
97+ if use :
98+ self .jsoninstance ["rndt_ConditionsApplyingToAccessAndUse" ] = use
99+ if freetext :
100+ logger .warning (f"Ignoring freetext constraint [{ freetext } ]" )
101+ else :
102+ if freetext :
103+ self .jsoninstance ["rndt_ConditionsApplyingToAccessAndUse" ] = {
104+ "inspire_url" : False , "freetext" : freetext
105+ }
106+ else :
107+ logger .info ("ConditionsApplyingToAccessAndUse not found" )
108+
109+ def resolve_resolution (self , val ):
110+ if val is not None :
111+ self .jsoninstance ["rndt_resolution" ] = val
112+
113+ def resolve_accuracy (self , val ):
114+ if val is not None :
115+ self .jsoninstance ["rndt_accuracy" ] = val
116+
117+
43118class RNDTMetadataParser :
44119 """
45120 A metadata parser compliant with the RNDT specification
@@ -55,136 +130,112 @@ def __init__(self, exml):
55130 )
56131 )
57132
58- def get_freq (self , vals ):
59- freq_elem = self .exml .find (
60- util .nspath_eval (
61- "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:resourceMaintenance/gmd:MD_MaintenanceInformation/gmd:maintenanceAndUpdateFrequency/gmd:MD_MaintenanceFrequencyCode" ,
62- self .namespaces ,
63- )
64- )
133+ def parse_codelist (self , xpath ):
134+ """ This should be moved into the base parser """
135+ elem = self .exml .find (util .nspath_eval (xpath , self .namespaces ,))
136+ return (elem .attrib .get ("codeListValue" , None ), elem .text ) if elem is not None else None
65137
66- freq = freq_elem .attrib .get ("codeListValue" , None ) if freq_elem is not None else None
67- vals ["maintenance_frequency" ] = freq or "unknown"
68-
69- def get_access_costraints (self , custom ):
138+ def parse_constraints (self ) -> list :
70139 """
71- Function responsible to get the access constraints compliant with RNDT
72- - will take all the instances of LegalConstraints
73- - if the restriction MD_RestrictionCode under accessConstraints has a codeListValue = otherRestrictions
74- - If is an anchor item,
75- - will put in the vals dictionary under constraints_other the thesaurus label if exists
76- - otherwise will put in contraints_other the URL parsed
77- - if is a charstring:
78- - will save the value extracted in a variable since is required for get the use_constrains
140+ Function responsible to parse the access constraints elements
141+ - returns a list of dict:
142+ - code: restriction codeListValue
143+ - href: if gmx:Anchor in gmd:otherConstraints, its href
144+ - text: text content of gmd:otherConstraints, either if CharacterString or Anchor
79145 """
80- use_constrs = ""
81146 access_constraints = self .exml .findall (
82147 util .nspath_eval (
83148 "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:resourceConstraints/gmd:MD_LegalConstraints" ,
84149 self .namespaces ,
85150 )
86151 )
87- for item in access_constraints :
88- md_restriction_code = item .find (
152+ ret = []
153+ for node in access_constraints :
154+ constr = {}
155+ logger .debug (f"Parsing constraint: --> { node } " )
156+
157+ md_restriction_code = node .find (
89158 util .nspath_eval ("gmd:accessConstraints/gmd:MD_RestrictionCode" , self .namespaces )
90159 )
91- if (
92- md_restriction_code is not None
93- and md_restriction_code .attrib .get ("codeListValue" , "" ) == "otherRestrictions"
94- ):
95- acc_constr = item .find (util .nspath_eval ("gmd:otherConstraints/gmx:Anchor" , self .namespaces ))
96- if acc_constr is not None :
97- url = acc_constr .attrib .get ("{http://www.w3.org/1999/xlink}href" )
98- t = ThesaurusKeyword .objects .filter (about = url ).filter (
99- thesaurus__identifier = "LimitationsOnPublicAccess"
100- )
101- if t .exists ():
102- custom ["rndt" ] = {"constraints_other" : url }
103- else :
104- custom ["rndt" ] = {"constraints_other" : ACCESS_CONSTRAINTS_URL }
160+ if md_restriction_code is not None :
161+ constr ["type" ] = "accessConstraints"
162+ else :
163+ md_restriction_code = node .find (
164+ util .nspath_eval ("gmd:useConstraints/gmd:MD_RestrictionCode" , self .namespaces )
165+ )
166+ if md_restriction_code is not None :
167+ constr ["type" ] = "useConstraints"
105168 else :
106- use_constrs = item .find (
169+ logger .warning ("Missing known restrictioncode" )
170+ continue
171+
172+ constr ["code" ] = md_restriction_code .attrib .get ("codeListValue" , "" )
173+
174+ anchor = node .find (util .nspath_eval ("gmd:otherConstraints/gmx:Anchor" , self .namespaces ))
175+ if anchor is not None :
176+ constr ["href" ] = anchor .attrib .get ("{http://www.w3.org/1999/xlink}href" )
177+ constr ["text" ] = anchor .text
178+ else :
179+ charstring = node .find (
107180 util .nspath_eval ("gmd:otherConstraints/gco:CharacterString" , self .namespaces )
108181 ).text
109- return use_constrs
182+ constr ["href" ] = None
183+ constr ["text" ] = charstring
110184
111- def get_use_costraints (self , vals , acc_constr ):
112- """
113- Function responsible to get the use constraints compliant with RNDT
114- - will take all the instances of LegalConstraints
115- - if the restriction MD_RestrictionCode under useConstraints has a codeListValue = otherRestrictions
116- - If is an anchor item,
117- - will put in the custom dictionary under rndt the thesaurus label if exists
118- - otherwise will put in custom[rndt] the text and the information extracted in the previous step
119- - if is a charstring:
120- - will put in custom[rndt] the text and the information extracted in the previous step
121- """
122- use_constraints = self .exml .findall (
123- util .nspath_eval (
124- "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:resourceConstraints/gmd:MD_LegalConstraints" ,
125- self .namespaces ,
126- )
127- )
128- for item in use_constraints :
129- md_restriction_code = item .find (
130- util .nspath_eval ("gmd:useConstraints/gmd:MD_RestrictionCode" , self .namespaces )
131- )
132- if (
133- md_restriction_code is not None
134- and md_restriction_code .attrib .get ("codeListValue" , "" ) == "otherRestrictions"
135- ):
136- use_constr = item .find (util .nspath_eval ("gmd:otherConstraints/gmx:Anchor" , self .namespaces ))
137- if use_constr is not None :
138- url = use_constr .attrib .get ("{http://www.w3.org/1999/xlink}href" )
139- t = ThesaurusKeyword .objects .filter (about = url ).filter (
140- thesaurus__identifier = "ConditionsApplyingToAccessAndUse"
141- )
142- if t .exists ():
143- vals ["constraints_other" ] = url
144- else :
145- vals ["constraints_other" ] = f"{ use_constr .text } { acc_constr } "
146- else :
147- use_constr = item .find (
148- util .nspath_eval ("gmd:otherConstraints/gco:CharacterString" , self .namespaces )
149- )
150- if use_constr is not None :
151- vals ["constraints_other" ] = f"{ use_constr .text } { acc_constr } "
152- else :
153- vals ["constraints_other" ] = acc_constr
154- return vals
185+ ret .append (constr )
155186
156- def get_resolutions (self , custom ):
187+ return ret
188+
189+ def parse_resolution (self , default = None ):
157190 resolution = self .exml .find (
158191 util .nspath_eval (
159192 "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:spatialResolution/gmd:MD_Resolution/gmd:distance/gco:Distance" ,
160193 self .namespaces ,
161194 )
162195 )
163196
164- if resolution is not None :
165- custom ["rndt" ]["resolution" ] = (
166- resolution if isinstance (resolution , float ) else ast .literal_eval (resolution .text )
167- ) or 0
168- else :
169- logging .error ("Resolution cannot be None, using default value 0" )
170- custom ["rndt" ]["resolution" ] = 0
171- return custom
197+ if resolution is None :
198+ logger .info (f"Resolution not found" )
199+ return default
200+ if isinstance (resolution , (float , int )):
201+ return resolution
172202
173- def get_accuracy (self , custom ):
174- accuracy = self .exml .find (
203+ try :
204+ res = ast .literal_eval (resolution .text )
205+ if isinstance (res , (float , int )):
206+ return res
207+ except ValueError as e :
208+ logger .warning (f"Error parsing resolution '{ resolution .text } ': { e } " )
209+ return default
210+
211+ logger .warning (f"Resolution cannot be parsed: [{ resolution } ]" )
212+ return default
213+
214+ def parse_accuracy (self , default = None ):
215+ acc = self .exml .find (
175216 util .nspath_eval (
176217 "gmd:dataQualityInfo/gmd:DQ_DataQuality/gmd:report/gmd:DQ_AbsoluteExternalPositionalAccuracy/gmd:result/gmd:DQ_QuantitativeResult/gmd:value/gco:Record/gco:Real" ,
177218 self .namespaces ,
178219 )
179220 )
180- if accuracy is not None :
181- custom ["rndt" ]["accuracy" ] = (
182- accuracy if isinstance (accuracy , float ) else ast .literal_eval (accuracy .text )
183- ) or 0
184- else :
185- logging .error ("accuracy cannot be None, using default value 0" )
186- custom ["rndt" ]["accuracy" ] = 0
187- return custom
221+
222+ if acc is None :
223+ logger .info (f"Accuracy not found" )
224+ return default
225+ if isinstance (acc , (float , int )):
226+ return float (acc )
227+
228+ try :
229+ eval_acc = ast .literal_eval (acc .text )
230+ if isinstance (eval_acc , (float , int )):
231+ return float (eval_acc )
232+ except ValueError as e :
233+ logger .warning (f"Error parsing resolution '{ acc .text } ': { e } " )
234+ return default
235+
236+ logger .warning (f"Accuracy cannot be parsed: [{ acc } ]" )
237+ return default
238+
188239
189240 def resolve_keywords (self ):
190241 """
@@ -282,3 +333,14 @@ def _get_keywords(keywords, thesaurus_info):
282333 else :
283334 not_tkey .append (text )
284335 return available , not_tkey , discarded
336+
337+ def parse_frequency (self ):
338+ """ This should be moved into the base parser """
339+ return self .parse_codelist ("gmd:identificationInfo/gmd:MD_DataIdentification/gmd:resourceMaintenance/gmd:MD_MaintenanceInformation/gmd:maintenanceAndUpdateFrequency/gmd:MD_MaintenanceFrequencyCode" )
340+
341+ def get_freq (self , vals ):
342+ freq = self .parse_frequency ()
343+ code = freq [0 ] if freq else None
344+ if freq is None :
345+ logger .info (f"Frequency not found" )
346+ vals ["maintenance_frequency" ] = code or "unknown"
0 commit comments