Skip to content

Commit d9e6265

Browse files
committed
[#75] Case and space insensitivity
All titles made space and case insensitive. Only main sheet names made case insensitive as sheet names have relevence when unflattening as to how objects are nested.
1 parent afb7653 commit d9e6265

6 files changed

+71
-5
lines changed

flattentool/input.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,9 @@
2727
class SpreadsheetInput(object):
2828
def convert_dict_titles(self, dicts, titles):
2929
titles = titles or {}
30+
titles_map = {title.replace(' ', '').lower(): title for title in titles}
3031
for d in dicts:
31-
yield { (titles[k] if k in titles else (k if '/' in k else k.replace(':','/'))):v for k,v in d.items() }
32+
yield { (titles[titles_map[k.replace(' ', '').lower()]] if k.replace(' ', '').lower() in titles_map else (k if '/' in k else k.replace(':','/'))):v for k,v in d.items() }
3233

3334
def __init__(self, input_name='', main_sheet_name='', timezone_name='UTC', root_id='ocid', convert_titles=False):
3435
self.input_name = input_name
@@ -214,14 +215,22 @@ def get_sheet_lines(self, sheet_name):
214215
class XLSXInput(SpreadsheetInput):
215216
def read_sheets(self):
216217
self.workbook = openpyxl.load_workbook(self.input_name, data_only=True)
217-
sheet_names = self.workbook.get_sheet_names()
218+
219+
self.sheet_names_map = {sheet_name: sheet_name for sheet_name in self.workbook.get_sheet_names()}
220+
# allow main sheet to be any case
221+
for sheet_name in list(self.sheet_names_map):
222+
if sheet_name.lower() == self.main_sheet_name.lower():
223+
self.sheet_names_map.pop(sheet_name)
224+
self.sheet_names_map[self.main_sheet_name] = sheet_name
225+
226+
sheet_names = list(self.sheet_names_map.keys())
218227
if self.main_sheet_name not in sheet_names:
219228
raise ValueError('Main sheet "{}" not found in workbook.'.format(self.main_sheet_name))
220229
sheet_names.remove(self.main_sheet_name)
221230
self.sub_sheet_names = sheet_names
222231

223232
def get_sheet_lines(self, sheet_name):
224-
worksheet = self.workbook[sheet_name]
233+
worksheet = self.workbook[self.sheet_names_map[sheet_name]]
225234
header_row = worksheet.rows[0]
226235
remaining_rows = worksheet.rows[1:]
227236
coli_to_header = ({i: x.value for i, x in enumerate(header_row) if x.value is not None})

flattentool/tests/fixtures/WellcomeTrust-grants_fixed_2_grants.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
}
1919
],
2020
"dateModified": "13-03-2015",
21-
"Data source": "http://www.wellcome.ac.uk/Managing-a-grant/Grants-awarded/index.htm",
21+
"dataSource": "http://www.wellcome.ac.uk/Managing-a-grant/Grants-awarded/index.htm",
2222
"plannedDates": [
2323
{
2424
"duration": 30
@@ -62,7 +62,7 @@
6262
}
6363
],
6464
"dateModified": "13-03-2015",
65-
"Data source": "http://www.wellcome.ac.uk/Managing-a-grant/Grants-awarded/index.htm",
65+
"dataSource": "http://www.wellcome.ac.uk/Managing-a-grant/Grants-awarded/index.htm",
6666
"plannedDates": [
6767
{
6868
"duration": 25
5.86 KB
Binary file not shown.
5.73 KB
Binary file not shown.
5.86 KB
Binary file not shown.
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
import json
2+
3+
from flattentool import unflatten
4+
5+
def test_360_main_sheetname_insensitive(tmpdir):
6+
input_name = 'flattentool/tests/fixtures/xlsx/WellcomeTrust-grants_2_grants.xlsx'
7+
unflatten(
8+
input_name=input_name,
9+
output_name=tmpdir.join('output_grant.json').strpath,
10+
input_format='xlsx',
11+
schema='flattentool/tests/fixtures/360-giving-schema.json',
12+
main_sheet_name='grants',
13+
root_list_path='grants',
14+
root_id='',
15+
convert_titles=True)
16+
output_json_grants = json.load(tmpdir.join('output_grant.json'))
17+
18+
input_name = 'flattentool/tests/fixtures/xlsx/WellcomeTrust-grants_2_Grants.xlsx'
19+
unflatten(
20+
input_name=input_name,
21+
output_name=tmpdir.join('output_Grant.json').strpath,
22+
input_format='xlsx',
23+
schema='flattentool/tests/fixtures/360-giving-schema.json',
24+
main_sheet_name='grants',
25+
root_list_path='grants',
26+
root_id='',
27+
convert_titles=True)
28+
output_json_Grants = json.load(tmpdir.join('output_Grant.json'))
29+
30+
assert output_json_grants == output_json_Grants
31+
32+
def test_360_fields_case_insensitive(tmpdir):
33+
input_name = 'flattentool/tests/fixtures/xlsx/WellcomeTrust-grants_2_grants.xlsx'
34+
unflatten(
35+
input_name=input_name,
36+
output_name=tmpdir.join('output_grant.json').strpath,
37+
input_format='xlsx',
38+
schema='flattentool/tests/fixtures/360-giving-schema.json',
39+
main_sheet_name='grants',
40+
root_list_path='grants',
41+
root_id='',
42+
convert_titles=True)
43+
output_json_grants = json.load(tmpdir.join('output_grant.json'))
44+
45+
input_name = 'flattentool/tests/fixtures/xlsx/WellcomeTrust-grants_2_grants_title_space_case.xlsx'
46+
unflatten(
47+
input_name=input_name,
48+
output_name=tmpdir.join('output_space_case.json').strpath,
49+
input_format='xlsx',
50+
schema='flattentool/tests/fixtures/360-giving-schema.json',
51+
main_sheet_name='grants',
52+
root_list_path='grants',
53+
root_id='',
54+
convert_titles=True)
55+
output_json_space_case = json.load(tmpdir.join('output_space_case.json'))
56+
57+
assert output_json_grants == output_json_space_case

0 commit comments

Comments
 (0)