Skip to content

Commit 802adbb

Browse files
committed
[#50] Lookup titles individually, instead of the whole header
This commit fixes the tests in test_input_SpreadsheetInput_unflatten.py but breaks the tests in test_unflatten.py and test_roundtrip.py (the TODOs in input.py explain how).
1 parent 4e740c8 commit 802adbb

File tree

3 files changed

+65
-26
lines changed

3 files changed

+65
-26
lines changed

flattentool/input.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,15 +36,19 @@ class SpreadsheetInput(object):
3636
or csv).
3737
3838
"""
39-
def convert_dict_titles(self, dicts, titles):
39+
def convert_dict_titles(self, dicts):
4040
"""
41-
Replace titles with field names in the given list of dictionaries (``dicts``) using the mapping in ``titles``.
41+
Replace titles with field names in the given list of dictionaries
42+
(``dicts``) using the titles lookup in the schema parser.
4243
4344
"""
44-
titles = titles or {}
45-
titles_map = {title.replace(' ', '').lower(): title for title in titles}
46-
for d in dicts:
47-
yield { (titles[titles_map[k.replace(' ', '').lower()]] if k.replace(' ', '').lower() in titles_map else (k if '/' in k else k.replace(':','/'))):v for k,v in d.items() }
45+
# TODO add this to TitleLookup. Breaks the tests in test_unflatten
46+
# titles_map = {title.replace(' ', '').lower(): title for title in titles}
47+
if self.parser:
48+
for d in dicts:
49+
yield { self.parser.title_lookup.lookup_header(k):v for k,v in d.items() }
50+
else:
51+
return dicts
4852

4953
def __init__(self, input_name='', main_sheet_name='', timezone_name='UTC', root_id='ocid', convert_titles=False):
5054
self.input_name = input_name
@@ -53,17 +57,21 @@ def __init__(self, input_name='', main_sheet_name='', timezone_name='UTC', root_
5357
self.timezone = pytz.timezone(timezone_name)
5458
self.root_id = root_id
5559
self.convert_titles = convert_titles
60+
self.parser = None
5661

5762
def get_main_sheet_lines(self):
5863
if self.convert_titles:
59-
return self.convert_dict_titles(self.get_sheet_lines(self.main_sheet_name), self.parser.main_sheet.titles)
64+
return self.convert_dict_titles(self.get_sheet_lines(self.main_sheet_name))
6065
else:
6166
return self.get_sheet_lines(self.main_sheet_name)
6267

6368
def get_sub_sheets_lines(self):
6469
for sub_sheet_name in self.sub_sheet_names:
6570
if self.convert_titles:
66-
yield sub_sheet_name, self.convert_dict_titles(self.get_sheet_lines(sub_sheet_name), self.parser.sub_sheets[sub_sheet_name].titles if sub_sheet_name in self.parser.sub_sheets else None)
71+
# TODO: This won't work properly any more (breaks roundtrip
72+
# tests, but we should also have something more like unit
73+
# tests!)
74+
yield sub_sheet_name, self.convert_dict_titles(self.get_sheet_lines(sub_sheet_name))
6775
else:
6876
yield sub_sheet_name, self.get_sheet_lines(sub_sheet_name)
6977

flattentool/schema.py

Lines changed: 38 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
"""Classes for reading from a JSON schema"""
22

33
from __future__ import print_function
4+
from __future__ import unicode_literals
45
from collections import OrderedDict
6+
from six.moves import UserDict
57
import jsonref
68
from warnings import warn
79
from flattentool.sheet import Sheet
@@ -15,6 +17,24 @@ def get_property_type_set(property_schema_dict):
1517
return set(property_type)
1618

1719

20+
class TitleLookup(UserDict):
21+
property_name = None
22+
23+
def lookup_header(self, title_header):
24+
return self.lookup_header_list(title_header.split(':'))
25+
26+
def lookup_header_list(self, title_header_list):
27+
first_title = title_header_list[0]
28+
remaining_titles = title_header_list[1:]
29+
if first_title in self:
30+
if remaining_titles:
31+
return self[first_title].property_name + '/' + self[first_title].lookup_header_list(remaining_titles)
32+
else:
33+
return self[first_title].property_name
34+
else:
35+
return '/'.join(title_header_list)
36+
37+
1838
class SchemaParser(object):
1939
"""Parse the fields of a JSON schema into a flattened structure."""
2040

@@ -26,6 +46,7 @@ def __init__(self, schema_filename=None, root_schema_dict=None, main_sheet_name=
2646
self.rollup = rollup
2747
self.root_id = root_id
2848
self.use_titles = use_titles
49+
self.title_lookup = TitleLookup()
2950

3051
if root_schema_dict is None and schema_filename is None:
3152
raise ValueError('One of schema_filename or root_schema_dict must be supplied')
@@ -52,11 +73,10 @@ def parse(self):
5273
self.main_sheet.append(title)
5374
else:
5475
self.main_sheet.append(field)
55-
if title:
56-
self.main_sheet.titles[title] = field
5776

58-
def parse_schema_dict(self, parent_name, schema_dict, parent_id_fields=None):
77+
def parse_schema_dict(self, parent_name, schema_dict, parent_id_fields=None, title_lookup=None):
5978
parent_id_fields = parent_id_fields or []
79+
title_lookup = self.title_lookup if title_lookup is None else title_lookup
6080
if 'properties' in schema_dict:
6181
if 'id' in schema_dict['properties']:
6282
id_fields = parent_id_fields + [parent_name+'/id']
@@ -67,11 +87,20 @@ def parse_schema_dict(self, parent_name, schema_dict, parent_id_fields=None):
6787
property_type_set = get_property_type_set(property_schema_dict)
6888

6989
title = property_schema_dict.get('title')
90+
title_lookup[title] = TitleLookup()
91+
title_lookup[title].property_name = property_name
7092

7193
if 'object' in property_type_set:
72-
for field, child_title in self.parse_schema_dict(parent_name+'/'+property_name, property_schema_dict,
73-
parent_id_fields=id_fields):
74-
yield property_name+'/'+field, (title+':'+child_title if title and child_title else None) # TODO ambiguous use of "title"
94+
for field, child_title in self.parse_schema_dict(
95+
parent_name+'/'+property_name,
96+
property_schema_dict,
97+
parent_id_fields=id_fields,
98+
title_lookup=title_lookup[title]):
99+
yield (
100+
property_name+'/'+field,
101+
# TODO ambiguous use of "title"
102+
(title+':'+child_title if title and child_title else None)
103+
)
75104

76105
elif 'array' in property_type_set:
77106
type_set = get_property_type_set(property_schema_dict['items'])
@@ -83,6 +112,7 @@ def parse_schema_dict(self, parent_name, schema_dict, parent_id_fields=None):
83112
else:
84113
raise ValueError
85114
elif 'object' in type_set:
115+
title_lookup[title].property_name = property_name+'[]'
86116
if hasattr(property_schema_dict['items'], '__reference__'):
87117
sub_sheet_name = property_schema_dict['items'].__reference__['$ref'].split('/')[-1]
88118
else:
@@ -98,7 +128,8 @@ def parse_schema_dict(self, parent_name, schema_dict, parent_id_fields=None):
98128
sub_sheet.add_field(field+':'+property_name, id_field=True)
99129
fields = self.parse_schema_dict(parent_name+'/'+property_name+'[]',
100130
property_schema_dict['items'],
101-
parent_id_fields=id_fields)
131+
parent_id_fields=id_fields,
132+
title_lookup=title_lookup[title])
102133

103134
rolledUp = set()
104135

@@ -110,8 +141,6 @@ def parse_schema_dict(self, parent_name, schema_dict, parent_id_fields=None):
110141
sub_sheet.add_field(child_title)
111142
else:
112143
sub_sheet.add_field(field)
113-
if child_title:
114-
self.sub_sheets[sub_sheet_name].titles[child_title] = field
115144
if self.rollup and 'rollUp' in property_schema_dict and field in property_schema_dict['rollUp']:
116145
rolledUp.add(field)
117146
yield property_name+'[]/'+field, (title+':'+child_title if title and child_title else None)

flattentool/tests/test_input_SpreadsheetInput_unflatten.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ def create_schema(root_id):
232232
}]
233233
),
234234
# Nested titles should be converted individually
235-
pytest.mark.xfail((
235+
(
236236
[{
237237
'ROOT_ID_TITLE': 1,
238238
'Identifier': 2,
@@ -244,7 +244,7 @@ def create_schema(root_id):
244244
'id': 2,
245245
'testB': {'testC': 3, 'Not in schema': 4}
246246
}]
247-
)),
247+
),
248248
# Unicode
249249
(
250250
[{
@@ -289,7 +289,7 @@ def create_schema(root_id):
289289
),
290290
# Properties of a single item array shouldn't need to be in rollUp list
291291
# for their titles to be converted
292-
pytest.mark.xfail((
292+
(
293293
[{
294294
'ROOT_ID_TITLE': 1,
295295
'Identifier': 2,
@@ -304,9 +304,9 @@ def create_schema(root_id):
304304
'testC': 4
305305
}]
306306
}]
307-
)),
307+
),
308308
# Single item array, titles should be converted individually
309-
pytest.mark.xfail((
309+
(
310310
[{
311311
'ROOT_ID_TITLE': 1,
312312
'Identifier': 2,
@@ -316,9 +316,12 @@ def create_schema(root_id):
316316
[{
317317
'ROOT_ID': 1,
318318
'id': 2,
319-
'testR': {'testC': 3, 'Not in schema': 4}
319+
'testR': [{
320+
'testC': 3,
321+
'Not in schema': 4
322+
}]
320323
}]
321-
)),
324+
),
322325
# Empty
323326
(
324327
[{
@@ -380,8 +383,7 @@ def test_unflatten(convert_titles, use_schema, root_id, root_id_kwargs, input_li
380383
root_schema_dict=create_schema(root_id) if use_schema else {},
381384
main_sheet_name='custom_main',
382385
root_id=root_id,
383-
rollup=True,
384-
use_titles=True
386+
rollup=True
385387
)
386388
parser.parse()
387389
spreadsheet_input.parser = parser

0 commit comments

Comments
 (0)