Skip to content

Commit 084608c

Browse files
authored
Merge branch 'master' into 151-xml-to-spreadsheet
2 parents b5c6574 + 948f6f0 commit 084608c

File tree

9 files changed

+71
-8
lines changed

9 files changed

+71
-8
lines changed
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
... DataErrorWarning: Conflict when merging field "name" for id "CAFE-HEALTH" in sheet b: "Healthy Cafe" != "Incorrect value". If you were not expecting merging you may have a duplicate ID.
1+
... DataErrorWarning: You may have a duplicate Identifier: We couldn't merge these rows with the id "CAFE-HEALTH": field "name" in sheet "b": one cell has the value: "Healthy Cafe", the other cell has the value: "Incorrect value"
22
DataErrorWarning)
3-
... DataErrorWarning: Conflict when merging field "number_of_tables" for id "CAFE-HEALTH" in sheet d: "3" != "4". If you were not expecting merging you may have a duplicate ID.
3+
... DataErrorWarning: You may have a duplicate Identifier: We couldn't merge these rows with the id "CAFE-HEALTH": field "number_of_tables" in sheet "d": one cell has the value: "3", the other cell has the value: "4"
44
DataErrorWarning)
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
... DataErrorWarning: Conflict when merging field "name" for id "CAFE-HEALTH" in sheet data: "Healthy Cafe" != "Vegetarian Cafe". If you were not expecting merging you may have a duplicate ID.
1+
... DataErrorWarning: You may have a duplicate Identifier: We couldn't merge these rows with the id "CAFE-HEALTH": field "name" in sheet "data": one cell has the value: "Healthy Cafe", the other cell has the value: "Vegetarian Cafe"
22
DataErrorWarning)
3-
... DataErrorWarning: Conflict when merging field "number_of_tables" for id "CAFE-HEALTH" in sheet data: "3" != "4". If you were not expecting merging you may have a duplicate ID.
3+
... DataErrorWarning: You may have a duplicate Identifier: We couldn't merge these rows with the id "CAFE-HEALTH": field "number_of_tables" in sheet "data": one cell has the value: "3", the other cell has the value: "4"
44
DataErrorWarning)

flattentool/input.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,8 @@ def convert_type(type_string, value, timezone = pytz.timezone('UTC')):
9494
elif type_string == '':
9595
if type(value) == datetime.datetime:
9696
return timezone.localize(value).isoformat()
97+
if type(value) == float and int(value) == value:
98+
return int(value)
9799
return value if type(value) in [int] else text_type(value)
98100
else:
99101
raise ValueError('Unrecognised type: "{}"'.format(type_string))
@@ -151,8 +153,8 @@ def merge(base, mergee, debug_info=None):
151153
if debug_info.get('root_id'):
152154
id_info = '{} "{}", '.format(debug_info.get('root_id'), debug_info.get('root_id_or_none'))+id_info
153155
warn(
154-
'Conflict when merging field "{}" for {} in sheet {}: "{}" != "{}". If you were not expecting merging you may have a duplicate ID.'.format(
155-
key, id_info, debug_info.get('sheet_name'), base_value, value),
156+
'You may have a duplicate Identifier: We couldn\'t merge these rows with the {}: field "{}" in sheet "{}": one cell has the value: "{}", the other cell has the value: "{}"'.format(
157+
id_info, key, debug_info.get('sheet_name'), base_value, value),
156158
DataErrorWarning)
157159
else:
158160
base[key].sub_cells.append(v)
@@ -572,7 +574,8 @@ def get_sheet_headings(self, sheet_name):
572574
sheet_configuration = {}
573575

574576
skip_rows = sheet_configuration.get("skipRows", 0)
575-
if sheet_configuration.get("ignore"):
577+
if (sheet_configuration.get("ignore") or
578+
(sheet_configuration.get("hashcomments") and sheet_name.startswith('#'))):
576579
# returning empty headers is a proxy for no data in the sheet.
577580
return []
578581

@@ -615,7 +618,13 @@ def get_sheet_lines(self, sheet_name):
615618
header_row = worksheet.rows[skip_rows + configuration_line]
616619
remaining_rows = worksheet.rows[skip_rows + configuration_line + header_rows:]
617620

618-
coli_to_header = ({i: x.value for i, x in enumerate(header_row) if x.value is not None})
621+
coli_to_header = {}
622+
for i, header in enumerate(header_row):
623+
if header.value is None:
624+
continue
625+
if sheet_configuration.get("hashcomments") and str(header.value).startswith('#'):
626+
continue
627+
coli_to_header[i] = header.value
619628
for row in remaining_rows:
620629
yield OrderedDict((coli_to_header[i], x.value) for i, x in enumerate(row) if i in coli_to_header)
621630

flattentool/lib.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,6 @@ def parse_sheet_configuration(configuration_list):
1515
configuration['headerRows'] = max(int(parts[1]), 1)
1616
if (len(parts) == 1 and parts[0].lower() == "ignore"):
1717
configuration['ignore'] = True
18+
if (len(parts) == 1 and parts[0].lower() in ("hashcomments", "hashcomment")):
19+
configuration['hashcomments'] = True
1820
return configuration
8.24 KB
Binary file not shown.
3.58 KB
Binary file not shown.

flattentool/tests/test_init.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1240,3 +1240,20 @@ def test_commands_ignore(tmpdir):
12401240
unflattened = json.load(tmpdir.join('command_single_unflattened.json'))
12411241

12421242
assert unflattened == {'main': [{'actual': 'actual', 'headings': 'data', 'some': 'some'}]}
1243+
1244+
def test_commands_hashcomments(tmpdir):
1245+
1246+
unflatten(
1247+
'flattentool/tests/fixtures/xlsx/commands_hashcomments.xlsx',
1248+
input_format='xlsx',
1249+
output_name=tmpdir.join('commands_hashcomments_unflattened.json').strpath,
1250+
cell_source_map=tmpdir.join('commands_hashcomments_source_map.json').strpath,
1251+
heading_source_map=tmpdir.join('commands_hashcomments_heading_source_map.json').strpath,
1252+
metatab_name='Meta',
1253+
metatab_vertical_orientation=True
1254+
)
1255+
1256+
unflattened = json.load(tmpdir.join('commands_hashcomments_unflattened.json'))
1257+
1258+
assert unflattened == {'main': [{'actual': 'actual', 'headings': 'data', 'some': 'some'}, {'actual': 'actual', 'headings': 'Other data', 'some': 'some'}],
1259+
'some': 'data'}

flattentool/tests/test_input_SpreadsheetInput.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,25 @@ def test_xlsx_input_integer(self):
9898

9999
assert list(xlsxinput.get_sheet_lines('main')) == \
100100
[{'colA': 1}]
101+
if sys.version_info[0] == 2:
102+
assert type(list(xlsxinput.get_sheet_lines('main'))[0]['colA']) == long
103+
else:
104+
assert type(list(xlsxinput.get_sheet_lines('main'))[0]['colA']) == int
101105
assert xlsxinput.sub_sheet_names == ['main']
102106

107+
def test_xlsx_input_integer2(self):
108+
xlsxinput = XLSXInput(input_name='flattentool/tests/fixtures/xlsx/integer2.xlsx')
109+
110+
xlsxinput.read_sheets()
111+
112+
assert list(xlsxinput.get_sheet_lines('Sheet1')) == \
113+
[{'activity-status/@code': 2}]
114+
# This is a float, but is converted to an int in the unflatten step, see
115+
# test_input_SpreadsheetInput_unflatten.py
116+
# 'Basic with float'
117+
assert type(list(xlsxinput.get_sheet_lines('Sheet1'))[0]['activity-status/@code']) == float
118+
assert xlsxinput.sub_sheet_names == ['Sheet1']
119+
103120
def test_xlsx_input_formula(self):
104121
""" When a forumla is present, we should use the value, rather than the
105122
formula itself. """

flattentool/tests/test_input_SpreadsheetInput_unflatten.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,24 @@ def inject_root_id(root_id, d):
6161
[],
6262
True
6363
),
64+
(
65+
'Basic with float',
66+
# 3.0 is converted to 3
67+
# This is needed to handle google docs xlsx properly
68+
# https://github.com/OpenDataServices/cove/issues/838
69+
[{
70+
'ROOT_ID': '1',
71+
'id': 2,
72+
'testA': 3.0
73+
}],
74+
[{
75+
'ROOT_ID': '1',
76+
'id': 2,
77+
'testA': 3
78+
}],
79+
[],
80+
True
81+
),
6482
(
6583
'Basic with zero',
6684
[{

0 commit comments

Comments
 (0)