[#90] When ids are missing or don't match, simply append to JSON output

Bjwebb · Bjwebb · commit 9b15771d86ae · 2016-04-18T09:02:00.000+01:00
There may be valid reasons people want to do this with flatten-tool. For
the standards we support errors will be flagged by validation.
diff --git a/flattentool/input.py b/flattentool/input.py
@@ -161,16 +161,18 @@ def unflatten(self):
             main_sheet_by_ocid[root_id_or_none].append(unflatten_main_with_parser(self.parser, line, self.timezone))
 
         for sheet_name, lines in self.get_sub_sheets_lines():
-            for line in lines:
+            for i, line in enumerate(lines):
+                lineno = i+1
                 if all(x == '' for x in line.values()):
                     continue
                 root_id_or_none = line[self.root_id] if self.root_id else None
                 unflattened = unflatten_main_with_parser(self.parser, line, self.timezone)
-                try:
+                if root_id_or_none not in main_sheet_by_ocid:
+                    main_sheet_by_ocid[root_id_or_none] = TemporaryDict('id')
+                if 'id' in unflattened and unflattened['id'] in main_sheet_by_ocid[root_id_or_none]:
                     merge(main_sheet_by_ocid[root_id_or_none][unflattened.get('id')], unflattened)
-                except KeyError:
-                    pass
-                    # FIXME add an appropriate warning here
+                else:
+                    main_sheet_by_ocid[root_id_or_none].append(unflattened)
 
         temporarydicts_to_lists(main_sheet_by_ocid)
 
diff --git a/flattentool/tests/test_input_SpreadsheetInput_unflatten_mulitplesheets.py b/flattentool/tests/test_input_SpreadsheetInput_unflatten_mulitplesheets.py
@@ -169,7 +169,6 @@ def test_nested_id(self):
             {'ocid': 1, 'id': 2, 'subField': [{'id': 3, 'testA': {'id': 4}}]}
         ]
 
-    @pytest.mark.xfail
     def test_missing_columns(self, recwarn):
         spreadsheet_input = ListInput(
             sheets={
@@ -184,7 +183,7 @@ def test_missing_columns(self, recwarn):
                         'ocid': 1,
                         'id': '',
                         'subField/0/id': 3,
-                        'subField/0/testA/id': 4,
+                        'subField/0/testA': 4,
                     },
                     {
                         'ocid': 1,
@@ -197,13 +196,42 @@ def test_missing_columns(self, recwarn):
             main_sheet_name='custom_main')
         spreadsheet_input.read_sheets()
         unflattened = list(spreadsheet_input.unflatten())
-        # We should have a warning about conflicting ID fields
-        w = recwarn.pop(UserWarning)
-        assert 'no parent id fields populated' in text_type(w.message)
-        assert 'Line 2 of sheet sub' in text_type(w.message)
         # Check that following lines are parsed correctly
         assert unflattened == [
-            {'ocid': 1, 'id': 2, 'subField': [{'id': 3, 'testA': 5}]}
+            {'ocid': 1, 'id': 2, 'subField': [{'id': 3, 'testA': 5}]},
+            {'ocid': 1, 'subField': [{'id': 3, 'testA': 4}]},
+        ]
+
+    def test_unmatched_id(self, recwarn):
+        spreadsheet_input = ListInput(
+            sheets={
+                'custom_main': [
+                    {
+                        'ocid': 1,
+                        'id': 2,
+                    }
+                ],
+                'sub': [
+                    {
+                        'ocid': 1,
+                        'id': 100,
+                        'subField/0/id': 3,
+                        'subField/0/testA': 4,
+                    },
+                    {
+                        'ocid': 1,
+                        'id': 2,
+                        'subField/0/id': 3,
+                        'subField/0/testA': 5,
+                    }
+                ]
+            },
+            main_sheet_name='custom_main')
+        spreadsheet_input.read_sheets()
+        unflattened = list(spreadsheet_input.unflatten())
+        assert unflattened == [
+            {'ocid': 1, 'id': 2, 'subField': [{'id': 3, 'testA': 5}]},
+            {'ocid': 1, 'id': 100, 'subField': [{'id': 3, 'testA': 4}]},
         ]