Skip to content

Commit 2af8a00

Browse files
committed
Merge pull request #82 from OpenDataServices/75-case-insensitvity
[#75] Case and space insensitivity
2 parents afb7653 + d9e6265 commit 2af8a00

6 files changed

+71
-5
lines changed

flattentool/input.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,9 @@
2727
class SpreadsheetInput(object):
2828
def convert_dict_titles(self, dicts, titles):
2929
titles = titles or {}
30+
titles_map = {title.replace(' ', '').lower(): title for title in titles}
3031
for d in dicts:
31-
yield { (titles[k] if k in titles else (k if '/' in k else k.replace(':','/'))):v for k,v in d.items() }
32+
yield { (titles[titles_map[k.replace(' ', '').lower()]] if k.replace(' ', '').lower() in titles_map else (k if '/' in k else k.replace(':','/'))):v for k,v in d.items() }
3233

3334
def __init__(self, input_name='', main_sheet_name='', timezone_name='UTC', root_id='ocid', convert_titles=False):
3435
self.input_name = input_name
@@ -214,14 +215,22 @@ def get_sheet_lines(self, sheet_name):
214215
class XLSXInput(SpreadsheetInput):
215216
def read_sheets(self):
216217
self.workbook = openpyxl.load_workbook(self.input_name, data_only=True)
217-
sheet_names = self.workbook.get_sheet_names()
218+
219+
self.sheet_names_map = {sheet_name: sheet_name for sheet_name in self.workbook.get_sheet_names()}
220+
# allow main sheet to be any case
221+
for sheet_name in list(self.sheet_names_map):
222+
if sheet_name.lower() == self.main_sheet_name.lower():
223+
self.sheet_names_map.pop(sheet_name)
224+
self.sheet_names_map[self.main_sheet_name] = sheet_name
225+
226+
sheet_names = list(self.sheet_names_map.keys())
218227
if self.main_sheet_name not in sheet_names:
219228
raise ValueError('Main sheet "{}" not found in workbook.'.format(self.main_sheet_name))
220229
sheet_names.remove(self.main_sheet_name)
221230
self.sub_sheet_names = sheet_names
222231

223232
def get_sheet_lines(self, sheet_name):
224-
worksheet = self.workbook[sheet_name]
233+
worksheet = self.workbook[self.sheet_names_map[sheet_name]]
225234
header_row = worksheet.rows[0]
226235
remaining_rows = worksheet.rows[1:]
227236
coli_to_header = ({i: x.value for i, x in enumerate(header_row) if x.value is not None})

flattentool/tests/fixtures/WellcomeTrust-grants_fixed_2_grants.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
}
1919
],
2020
"dateModified": "13-03-2015",
21-
"Data source": "http://www.wellcome.ac.uk/Managing-a-grant/Grants-awarded/index.htm",
21+
"dataSource": "http://www.wellcome.ac.uk/Managing-a-grant/Grants-awarded/index.htm",
2222
"plannedDates": [
2323
{
2424
"duration": 30
@@ -62,7 +62,7 @@
6262
}
6363
],
6464
"dateModified": "13-03-2015",
65-
"Data source": "http://www.wellcome.ac.uk/Managing-a-grant/Grants-awarded/index.htm",
65+
"dataSource": "http://www.wellcome.ac.uk/Managing-a-grant/Grants-awarded/index.htm",
6666
"plannedDates": [
6767
{
6868
"duration": 25
5.86 KB
Binary file not shown.
5.73 KB
Binary file not shown.
5.86 KB
Binary file not shown.
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
import json
2+
3+
from flattentool import unflatten
4+
5+
def test_360_main_sheetname_insensitive(tmpdir):
6+
input_name = 'flattentool/tests/fixtures/xlsx/WellcomeTrust-grants_2_grants.xlsx'
7+
unflatten(
8+
input_name=input_name,
9+
output_name=tmpdir.join('output_grant.json').strpath,
10+
input_format='xlsx',
11+
schema='flattentool/tests/fixtures/360-giving-schema.json',
12+
main_sheet_name='grants',
13+
root_list_path='grants',
14+
root_id='',
15+
convert_titles=True)
16+
output_json_grants = json.load(tmpdir.join('output_grant.json'))
17+
18+
input_name = 'flattentool/tests/fixtures/xlsx/WellcomeTrust-grants_2_Grants.xlsx'
19+
unflatten(
20+
input_name=input_name,
21+
output_name=tmpdir.join('output_Grant.json').strpath,
22+
input_format='xlsx',
23+
schema='flattentool/tests/fixtures/360-giving-schema.json',
24+
main_sheet_name='grants',
25+
root_list_path='grants',
26+
root_id='',
27+
convert_titles=True)
28+
output_json_Grants = json.load(tmpdir.join('output_Grant.json'))
29+
30+
assert output_json_grants == output_json_Grants
31+
32+
def test_360_fields_case_insensitive(tmpdir):
33+
input_name = 'flattentool/tests/fixtures/xlsx/WellcomeTrust-grants_2_grants.xlsx'
34+
unflatten(
35+
input_name=input_name,
36+
output_name=tmpdir.join('output_grant.json').strpath,
37+
input_format='xlsx',
38+
schema='flattentool/tests/fixtures/360-giving-schema.json',
39+
main_sheet_name='grants',
40+
root_list_path='grants',
41+
root_id='',
42+
convert_titles=True)
43+
output_json_grants = json.load(tmpdir.join('output_grant.json'))
44+
45+
input_name = 'flattentool/tests/fixtures/xlsx/WellcomeTrust-grants_2_grants_title_space_case.xlsx'
46+
unflatten(
47+
input_name=input_name,
48+
output_name=tmpdir.join('output_space_case.json').strpath,
49+
input_format='xlsx',
50+
schema='flattentool/tests/fixtures/360-giving-schema.json',
51+
main_sheet_name='grants',
52+
root_list_path='grants',
53+
root_id='',
54+
convert_titles=True)
55+
output_json_space_case = json.load(tmpdir.join('output_space_case.json'))
56+
57+
assert output_json_grants == output_json_space_case

0 commit comments

Comments
 (0)