[#96] Make order of sheets deterministic

Bjwebb · Bjwebb · commit 9635bf1cae89 · 2016-06-09T17:52:53.000+01:00
For CSVs we process them in alphabetical order.

For XLSX we use the order they appear in the spreadsheet.
diff --git a/flattentool/input.py b/flattentool/input.py
@@ -369,7 +369,7 @@ class XLSXInput(SpreadsheetInput):
     def read_sheets(self):
         self.workbook = openpyxl.load_workbook(self.input_name, data_only=True)
 
-        self.sheet_names_map = {sheet_name: sheet_name for sheet_name in self.workbook.get_sheet_names()}
+        self.sheet_names_map = OrderedDict((sheet_name, sheet_name) for sheet_name in self.workbook.get_sheet_names())
 
         sheet_names = list(self.sheet_names_map.keys())
         self.sub_sheet_names = sheet_names
diff --git a/flattentool/tests/test_input_SpreadsheetInput.py b/flattentool/tests/test_input_SpreadsheetInput.py
@@ -95,12 +95,6 @@ def test_csv_no_directory(self):
             with pytest.raises(OSError):
                 csvinput.read_sheets()
 
-    def test_csv_no_files(self, tmpdir):
-        csvinput = CSVInput(input_name=tmpdir.strpath)
-        with pytest.raises(ValueError) as e:
-            csvinput.read_sheets()
-        assert 'Main sheet' in text_type(e) and 'not found' in text_type(e)
-
     def test_xlsx_no_file(self, tmpdir):
         xlsxinput = XLSXInput(input_name=tmpdir.strpath.join('test.xlsx'))
         if sys.version > '3':
diff --git a/flattentool/tests/test_input_SpreadsheetInput_unflatten_mulitplesheets.py b/flattentool/tests/test_input_SpreadsheetInput_unflatten_mulitplesheets.py
@@ -97,8 +97,8 @@ def test_nested_sub_sheet(self, nested_id_in_subsheet):
 
     def test_basic_two_sub_sheets(self):
         spreadsheet_input = ListInput(
-            sheets={
-                'custom_main': [
+            sheets=OrderedDict([
+                ('custom_main', [
                     OrderedDict([
                         ('ocid', 1),
                         ('id', 2),
@@ -107,24 +107,24 @@ def test_basic_two_sub_sheets(self):
                         ('ocid', 1),
                         ('id', 6),
                     ])
-                ],
-                'sub1': [
+                ]),
+                ('sub1', [
                     {
                         'ocid': 1,
                         'id': 2,
                         'sub1Field/0/id': 3,
                         'sub1Field/0/testA': 4,
                     }
-                ],
-                'sub2': [
+                ]),
+                ('sub2', [
                     {
                         'ocid': 1,
                         'id': 2,
                         'sub1Field/0/id': 3,
                         'sub1Field/0/sub2Field/0/testB': 5,
                     }
-                ]
-            }
+                ])
+            ])
             )
         spreadsheet_input.read_sheets()
         unflattened = list(spreadsheet_input.unflatten())
@@ -204,14 +204,14 @@ def test_missing_columns(self, recwarn):
 
     def test_unmatched_id(self, recwarn):
         spreadsheet_input = ListInput(
-            sheets={
-                'custom_main': [
+            sheets=OrderedDict([
+                ('custom_main', [
                     {
                         'ocid': 1,
                         'id': 2,
                     }
-                ],
-                'sub': [
+                ]),
+                ('sub', [
                     {
                         'ocid': 1,
                         'id': 100,
@@ -224,8 +224,8 @@ def test_unmatched_id(self, recwarn):
                         'subField/0/id': 3,
                         'subField/0/testA': 5,
                     }
-                ]
-            }
+                ])
+            ])
             )
         spreadsheet_input.read_sheets()
         unflattened = list(spreadsheet_input.unflatten())
@@ -285,24 +285,25 @@ def test_same_rollup(self, recwarn):
 
     def test_conflicting_rollup(self, recwarn):
         spreadsheet_input = ListInput(
-            sheets={
-                'main': [
+            sheets=OrderedDict([
+                ('main', [
                     {
                         'ocid': 1,
                         'id': 2,
                         'testA/0/id': 3,
                         'testA/0/testB': 4
                     }
-                ],
-                'testA': [
+                ]),
+                ('testA', [
                     {
                         'ocid': 1,
                         'id': 2,
                         'testA/0/id': 3,
                         'testA/0/testB': 5,
                     }
-                ]
-            },
+                ])
+            ])
+
         )
         spreadsheet_input.read_sheets()
         unflattened = list(spreadsheet_input.unflatten())
@@ -313,9 +314,7 @@ def test_conflicting_rollup(self, recwarn):
                 'testA': [{
                     'id': 3,
                     'testB': 4
-                    # We currently know that testB will be 4 because the main
-                    # sheet is currently always parsed first, but this may change:
-                    # https://github.com/OpenDataServices/flatten-tool/issues/96
+                    # (Since sheets are parsed in the order they appear, and the first value is used).
                 }]
             }
         ]
@@ -395,30 +394,30 @@ def test_nested_sub_sheet(self):
 
     def test_basic_two_sub_sheets(self):
         spreadsheet_input = ListInput(
-            sheets={
-                'custom_main': [
+            sheets=OrderedDict([
+                ('custom_main', [
                     OrderedDict([
                         ('custom', 1),
                         ('id', 2),
                     ])
-                ],
-                'sub1': [
+                ]),
+                ('sub1', [
                     {
                         'custom': 1,
                         'id': 2,
                         'sub1Field/0/id': 3,
                         'sub1Field/0/testA': 4,
                     }
-                ],
-                'sub2': [
+                ]),
+                ('sub2', [
                     {
                         'custom': 1,
                         'id': 2,
                         'sub1Field/0/id': 3,
                         'sub1Field/0/sub2Field/0/testB': 5,
                     }
-                ]
-            },
+                ])
+            ]),
             root_id='custom')
         spreadsheet_input.read_sheets()
         unflattened = list(spreadsheet_input.unflatten())
@@ -484,27 +483,27 @@ def test_nested_sub_sheet(self):
 
     def test_basic_two_sub_sheets(self):
         spreadsheet_input = ListInput(
-            sheets={
-                'custom_main': [
+            sheets=OrderedDict([
+                ('custom_main', [
                     OrderedDict([
                         ('id', 2),
                     ])
-                ],
-                'sub1': [
+                ]),
+                ('sub1', [
                     {
                         'id': 2,
                         'sub1Field/0/id': 3,
                         'sub1Field/0/testA': 4,
                     }
-                ],
-                'sub2': [
+                ]),
+                ('sub2', [
                     {
                         'id': 2,
                         'sub1Field/0/id': 3,
                         'sub1Field/0/sub2Field/0/testB': 5,
                     }
-                ]
-            },
+                ])
+            ]),
             root_id='')
         spreadsheet_input.read_sheets()
         unflattened = list(spreadsheet_input.unflatten())