1
1
"""Classes for reading from a JSON schema"""
2
2
3
3
from __future__ import print_function
4
+ from __future__ import unicode_literals
4
5
from collections import OrderedDict
6
+ from six .moves import UserDict
5
7
import jsonref
6
8
from warnings import warn
7
9
from flattentool .sheet import Sheet
@@ -15,6 +17,46 @@ def get_property_type_set(property_schema_dict):
15
17
return set (property_type )
16
18
17
19
20
+ class TitleLookup (UserDict ):
21
+ property_name = None
22
+
23
+ def lookup_header (self , title_header ):
24
+ # Ignore titles with a / in, as they may contain types
25
+ # https://github.com/OpenDataServices/flatten-tool/issues/56
26
+ if '/' in title_header :
27
+ return title_header
28
+ return self .lookup_header_list (title_header .split (':' ))
29
+
30
+ def lookup_header_list (self , title_header_list ):
31
+ first_title = title_header_list [0 ]
32
+ remaining_titles = title_header_list [1 :]
33
+ if first_title in self :
34
+ if remaining_titles :
35
+ return self [first_title ].property_name + '/' + self [first_title ].lookup_header_list (remaining_titles )
36
+ else :
37
+ return self [first_title ].property_name
38
+ else :
39
+ # If we can't look up the title, treat it and any children as
40
+ # field names directly.
41
+ # Strip spaces off these.
42
+ return '/' .join (x .strip (' ' ) for x in title_header_list )
43
+
44
+ def __setitem__ (self , key , value ):
45
+ self .data [key .replace (' ' , '' ).lower ()] = value
46
+
47
+ def __getitem__ (self , key ):
48
+ if key is None :
49
+ raise KeyError
50
+ else :
51
+ return self .data [key .replace (' ' , '' ).lower ()]
52
+
53
+ def __contains__ (self , key ):
54
+ if key is None :
55
+ return False
56
+ else :
57
+ return key .replace (' ' , '' ).lower () in self .data
58
+
59
+
18
60
class SchemaParser (object ):
19
61
"""Parse the fields of a JSON schema into a flattened structure."""
20
62
@@ -26,6 +68,7 @@ def __init__(self, schema_filename=None, root_schema_dict=None, main_sheet_name=
26
68
self .rollup = rollup
27
69
self .root_id = root_id
28
70
self .use_titles = use_titles
71
+ self .title_lookup = TitleLookup ()
29
72
30
73
if root_schema_dict is None and schema_filename is None :
31
74
raise ValueError ('One of schema_filename or root_schema_dict must be supplied' )
@@ -52,11 +95,10 @@ def parse(self):
52
95
self .main_sheet .append (title )
53
96
else :
54
97
self .main_sheet .append (field )
55
- if title :
56
- self .main_sheet .titles [title ] = field
57
98
58
- def parse_schema_dict (self , parent_name , schema_dict , parent_id_fields = None ):
99
+ def parse_schema_dict (self , parent_name , schema_dict , parent_id_fields = None , title_lookup = None ):
59
100
parent_id_fields = parent_id_fields or []
101
+ title_lookup = self .title_lookup if title_lookup is None else title_lookup
60
102
if 'properties' in schema_dict :
61
103
if 'id' in schema_dict ['properties' ]:
62
104
id_fields = parent_id_fields + [parent_name + '/id' ]
@@ -67,11 +109,21 @@ def parse_schema_dict(self, parent_name, schema_dict, parent_id_fields=None):
67
109
property_type_set = get_property_type_set (property_schema_dict )
68
110
69
111
title = property_schema_dict .get ('title' )
112
+ if title :
113
+ title_lookup [title ] = TitleLookup ()
114
+ title_lookup [title ].property_name = property_name
70
115
71
116
if 'object' in property_type_set :
72
- for field , child_title in self .parse_schema_dict (parent_name + '/' + property_name , property_schema_dict ,
73
- parent_id_fields = id_fields ):
74
- yield property_name + '/' + field , (title + ':' + child_title if title and child_title else None ) # TODO ambiguous use of "title"
117
+ for field , child_title in self .parse_schema_dict (
118
+ parent_name + '/' + property_name ,
119
+ property_schema_dict ,
120
+ parent_id_fields = id_fields ,
121
+ title_lookup = title_lookup .get (title )):
122
+ yield (
123
+ property_name + '/' + field ,
124
+ # TODO ambiguous use of "title"
125
+ (title + ':' + child_title if title and child_title else None )
126
+ )
75
127
76
128
elif 'array' in property_type_set :
77
129
type_set = get_property_type_set (property_schema_dict ['items' ])
@@ -83,6 +135,8 @@ def parse_schema_dict(self, parent_name, schema_dict, parent_id_fields=None):
83
135
else :
84
136
raise ValueError
85
137
elif 'object' in type_set :
138
+ if title :
139
+ title_lookup [title ].property_name = property_name + '[]'
86
140
if hasattr (property_schema_dict ['items' ], '__reference__' ):
87
141
sub_sheet_name = property_schema_dict ['items' ].__reference__ ['$ref' ].split ('/' )[- 1 ]
88
142
else :
@@ -93,12 +147,14 @@ def parse_schema_dict(self, parent_name, schema_dict, parent_id_fields=None):
93
147
if sub_sheet_name not in self .sub_sheets :
94
148
self .sub_sheets [sub_sheet_name ] = Sheet (root_id = self .root_id , name = sub_sheet_name )
95
149
sub_sheet = self .sub_sheets [sub_sheet_name ]
150
+ sub_sheet .title_lookup = title_lookup .get (title )
96
151
97
152
for field in id_fields :
98
153
sub_sheet .add_field (field + ':' + property_name , id_field = True )
99
154
fields = self .parse_schema_dict (parent_name + '/' + property_name + '[]' ,
100
155
property_schema_dict ['items' ],
101
- parent_id_fields = id_fields )
156
+ parent_id_fields = id_fields ,
157
+ title_lookup = title_lookup .get (title ))
102
158
103
159
rolledUp = set ()
104
160
@@ -110,8 +166,6 @@ def parse_schema_dict(self, parent_name, schema_dict, parent_id_fields=None):
110
166
sub_sheet .add_field (child_title )
111
167
else :
112
168
sub_sheet .add_field (field )
113
- if child_title :
114
- self .sub_sheets [sub_sheet_name ].titles [child_title ] = field
115
169
if self .rollup and 'rollUp' in property_schema_dict and field in property_schema_dict ['rollUp' ]:
116
170
rolledUp .add (field )
117
171
yield property_name + '[]/' + field , (title + ':' + child_title if title and child_title else None )
0 commit comments