Skip to content

Commit 9635bf1

Browse files
committed
[#96] Make order of sheets deterministic
For CSVs we process them in alphabetical order. For XLSX we use the order they appear in the spreadsheet.
1 parent f6d817e commit 9635bf1

File tree

3 files changed

+39
-46
lines changed

3 files changed

+39
-46
lines changed

flattentool/input.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,7 @@ class XLSXInput(SpreadsheetInput):
369369
def read_sheets(self):
370370
self.workbook = openpyxl.load_workbook(self.input_name, data_only=True)
371371

372-
self.sheet_names_map = {sheet_name: sheet_name for sheet_name in self.workbook.get_sheet_names()}
372+
self.sheet_names_map = OrderedDict((sheet_name, sheet_name) for sheet_name in self.workbook.get_sheet_names())
373373

374374
sheet_names = list(self.sheet_names_map.keys())
375375
self.sub_sheet_names = sheet_names

flattentool/tests/test_input_SpreadsheetInput.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -95,12 +95,6 @@ def test_csv_no_directory(self):
9595
with pytest.raises(OSError):
9696
csvinput.read_sheets()
9797

98-
def test_csv_no_files(self, tmpdir):
99-
csvinput = CSVInput(input_name=tmpdir.strpath)
100-
with pytest.raises(ValueError) as e:
101-
csvinput.read_sheets()
102-
assert 'Main sheet' in text_type(e) and 'not found' in text_type(e)
103-
10498
def test_xlsx_no_file(self, tmpdir):
10599
xlsxinput = XLSXInput(input_name=tmpdir.strpath.join('test.xlsx'))
106100
if sys.version > '3':

flattentool/tests/test_input_SpreadsheetInput_unflatten_mulitplesheets.py

Lines changed: 38 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -97,8 +97,8 @@ def test_nested_sub_sheet(self, nested_id_in_subsheet):
9797

9898
def test_basic_two_sub_sheets(self):
9999
spreadsheet_input = ListInput(
100-
sheets={
101-
'custom_main': [
100+
sheets=OrderedDict([
101+
('custom_main', [
102102
OrderedDict([
103103
('ocid', 1),
104104
('id', 2),
@@ -107,24 +107,24 @@ def test_basic_two_sub_sheets(self):
107107
('ocid', 1),
108108
('id', 6),
109109
])
110-
],
111-
'sub1': [
110+
]),
111+
('sub1', [
112112
{
113113
'ocid': 1,
114114
'id': 2,
115115
'sub1Field/0/id': 3,
116116
'sub1Field/0/testA': 4,
117117
}
118-
],
119-
'sub2': [
118+
]),
119+
('sub2', [
120120
{
121121
'ocid': 1,
122122
'id': 2,
123123
'sub1Field/0/id': 3,
124124
'sub1Field/0/sub2Field/0/testB': 5,
125125
}
126-
]
127-
}
126+
])
127+
])
128128
)
129129
spreadsheet_input.read_sheets()
130130
unflattened = list(spreadsheet_input.unflatten())
@@ -204,14 +204,14 @@ def test_missing_columns(self, recwarn):
204204

205205
def test_unmatched_id(self, recwarn):
206206
spreadsheet_input = ListInput(
207-
sheets={
208-
'custom_main': [
207+
sheets=OrderedDict([
208+
('custom_main', [
209209
{
210210
'ocid': 1,
211211
'id': 2,
212212
}
213-
],
214-
'sub': [
213+
]),
214+
('sub', [
215215
{
216216
'ocid': 1,
217217
'id': 100,
@@ -224,8 +224,8 @@ def test_unmatched_id(self, recwarn):
224224
'subField/0/id': 3,
225225
'subField/0/testA': 5,
226226
}
227-
]
228-
}
227+
])
228+
])
229229
)
230230
spreadsheet_input.read_sheets()
231231
unflattened = list(spreadsheet_input.unflatten())
@@ -285,24 +285,25 @@ def test_same_rollup(self, recwarn):
285285

286286
def test_conflicting_rollup(self, recwarn):
287287
spreadsheet_input = ListInput(
288-
sheets={
289-
'main': [
288+
sheets=OrderedDict([
289+
('main', [
290290
{
291291
'ocid': 1,
292292
'id': 2,
293293
'testA/0/id': 3,
294294
'testA/0/testB': 4
295295
}
296-
],
297-
'testA': [
296+
]),
297+
('testA', [
298298
{
299299
'ocid': 1,
300300
'id': 2,
301301
'testA/0/id': 3,
302302
'testA/0/testB': 5,
303303
}
304-
]
305-
},
304+
])
305+
])
306+
306307
)
307308
spreadsheet_input.read_sheets()
308309
unflattened = list(spreadsheet_input.unflatten())
@@ -313,9 +314,7 @@ def test_conflicting_rollup(self, recwarn):
313314
'testA': [{
314315
'id': 3,
315316
'testB': 4
316-
# We currently know that testB will be 4 because the main
317-
# sheet is currently always parsed first, but this may change:
318-
# https://github.com/OpenDataServices/flatten-tool/issues/96
317+
# (Since sheets are parsed in the order they appear, and the first value is used).
319318
}]
320319
}
321320
]
@@ -395,30 +394,30 @@ def test_nested_sub_sheet(self):
395394

396395
def test_basic_two_sub_sheets(self):
397396
spreadsheet_input = ListInput(
398-
sheets={
399-
'custom_main': [
397+
sheets=OrderedDict([
398+
('custom_main', [
400399
OrderedDict([
401400
('custom', 1),
402401
('id', 2),
403402
])
404-
],
405-
'sub1': [
403+
]),
404+
('sub1', [
406405
{
407406
'custom': 1,
408407
'id': 2,
409408
'sub1Field/0/id': 3,
410409
'sub1Field/0/testA': 4,
411410
}
412-
],
413-
'sub2': [
411+
]),
412+
('sub2', [
414413
{
415414
'custom': 1,
416415
'id': 2,
417416
'sub1Field/0/id': 3,
418417
'sub1Field/0/sub2Field/0/testB': 5,
419418
}
420-
]
421-
},
419+
])
420+
]),
422421
root_id='custom')
423422
spreadsheet_input.read_sheets()
424423
unflattened = list(spreadsheet_input.unflatten())
@@ -484,27 +483,27 @@ def test_nested_sub_sheet(self):
484483

485484
def test_basic_two_sub_sheets(self):
486485
spreadsheet_input = ListInput(
487-
sheets={
488-
'custom_main': [
486+
sheets=OrderedDict([
487+
('custom_main', [
489488
OrderedDict([
490489
('id', 2),
491490
])
492-
],
493-
'sub1': [
491+
]),
492+
('sub1', [
494493
{
495494
'id': 2,
496495
'sub1Field/0/id': 3,
497496
'sub1Field/0/testA': 4,
498497
}
499-
],
500-
'sub2': [
498+
]),
499+
('sub2', [
501500
{
502501
'id': 2,
503502
'sub1Field/0/id': 3,
504503
'sub1Field/0/sub2Field/0/testB': 5,
505504
}
506-
]
507-
},
505+
])
506+
]),
508507
root_id='')
509508
spreadsheet_input.read_sheets()
510509
unflattened = list(spreadsheet_input.unflatten())

0 commit comments

Comments
 (0)