Merge pull request #113 from OpenDataServices/40-change-root-id-default

Bjwebb · web-flow · commit d6938b598d7c · 2016-06-20T16:49:07.000+01:00
[#40] Make root_id default to None
diff --git a/README.md b/README.md
@@ -56,7 +56,7 @@ Can be converted to/from a spreadsheet like [examples/simple/main.csv](examples/
 Using the commands:
 
 ```
-flatten-tool unflatten -f csv examples/simple --root-id='' -o examples/simple.json
+flatten-tool unflatten -f csv examples/simple -o examples/simple.json
 flatten-tool flatten -f csv examples/simple.json -o examples/simple
 ```
 
@@ -123,7 +123,7 @@ These are also the spreadsheets that flatten-tool's `flatten` (JSON to Spreadshe
 Commands used to generate this:
 
 ```
-flatten-tool unflatten -f csv examples/array_multisheet --root-id='' -o examples/array_multisheet.json
+flatten-tool unflatten -f csv examples/array_multisheet -o examples/array_multisheet.json
 flatten-tool flatten -f csv examples/array.json -o examples/array_multisheet
 ```
 
@@ -137,7 +137,7 @@ New columns for each item of the array:
 |7|8|9|10|11|12|
 
 ```
-flatten-tool unflatten -f csv examples/array_pointer --root-id='' -o examples/array.json
+flatten-tool unflatten -f csv examples/array_pointer -o examples/array.json
 ```
 
 Repeated rows:
@@ -151,7 +151,7 @@ Repeated rows:
 
 
 ```
-flatten-tool unflatten -f csv examples/array_repeat_rows --root-id='' -o examples/array.json
+flatten-tool unflatten -f csv examples/array_repeat_rows -o examples/array.json
 ```
 
 
@@ -353,7 +353,7 @@ You can also upload the file to http://standard.open-contracting.org/validator/
 
 Download https://raw.githubusercontent.com/open-contracting/standard/1.0/standard/schema/release-schema.json to the current directory.
 
-    flatten-tool create-template --output-format all --output-name template --schema release-schema.json --main-sheet-name releases
+    flatten-tool create-template --root-id=ocid --output-format all --output-name template --schema release-schema.json --main-sheet-name releases
 
 This will create `template.xlsx` and a `template/` directory of csv files.
 
@@ -368,18 +368,18 @@ And populate this with the package information for your release.
 
 Then, for a populated xlsx template in (in release_populated.xlsx):
 
-    flatten-tool unflatten release_populated.xlsx --base-json base.json --input-format xlsx --output-name release.json --root-list-path='releases'
+    flatten-tool unflatten release_populated.xlsx --root-id=ocid --base-json base.json --input-format xlsx --output-name release.json --root-list-path='releases'
 
 Or for populated CSV files (in the release_populated directory):
 
-    flatten-tool unflatten release_populated --base-json base.json --input-format csv --output-name release.json --root-list-path='releases'
+    flatten-tool unflatten release_populated --root-id=ocid --base-json base.json --input-format csv --output-name release.json --root-list-path='releases'
 
 These produce a release.json file based on the data in the spreadsheets.
 
 
 ### Converting a JSON file to a spreadsheet
 
-    flatten-tool flatten input.json --main-sheet-name releases --output-name flattened --root-list-path='releases'
+    flatten-tool flatten input.json --root-id=ocid --main-sheet-name releases --output-name flattened --root-list-path='releases'
 
 This will create `flattened.xlsx` and a `flattened/` directory of csv files.
 
@@ -391,9 +391,9 @@ Download
 https://raw.githubusercontent.com/ThreeSixtyGiving/standard/master/schema/360-giving-schema.json
 to the current directory.
 
-    flatten-tool create-template --root-id='' --output-format all --output-name 360giving-template --schema 360-giving-schema.json --main-sheet-name grants --rollup --use-titles
+    flatten-tool create-template --output-format all --output-name 360giving-template --schema 360-giving-schema.json --main-sheet-name grants --rollup --use-titles
 
-    flatten-tool unflatten --root-id='' -o out.json -f xlsx input.xlsx --schema 360-giving-schema.json --convert-titles --root-list-path='grants'
+    flatten-tool unflatten -o out.json -f xlsx input.xlsx --schema 360-giving-schema.json --convert-titles --root-list-path='grants'
 
 
 Running the tests
diff --git a/flattentool/__init__.py b/flattentool/__init__.py
@@ -9,7 +9,7 @@
 from collections import OrderedDict
 
 
-def create_template(schema, output_name='template', output_format='all', main_sheet_name='main', flatten=False, rollup=False, root_id='ocid', use_titles=False, **_):
+def create_template(schema, output_name='template', output_format='all', main_sheet_name='main', flatten=False, rollup=False, root_id=None, use_titles=False, **_):
     """
     Creates template file(s) from given inputs
     This function is built to deal with commandline input and arguments
@@ -38,7 +38,7 @@ def spreadsheet_output(spreadsheet_output_class, name):
         raise Exception('The requested format is not available')
 
 
-def flatten(input_name, schema=None, output_name='flattened', output_format='all', main_sheet_name='main', root_list_path='main', rollup=False, root_id='ocid', use_titles=False, **_):
+def flatten(input_name, schema=None, output_name='flattened', output_format='all', main_sheet_name='main', root_list_path='main', rollup=False, root_id=None, use_titles=False, **_):
     """
     Flatten a nested structure (JSON) to a flat structure (spreadsheet - csv or xlsx).
 
@@ -102,7 +102,7 @@ def decimal_default(o):
 
 def unflatten(input_name, base_json=None, input_format=None, output_name='unflattened.json',
               root_list_path='main', encoding='utf8', timezone_name='UTC',
-              root_id='ocid', schema='', convert_titles=False, cell_source_map=None,
+              root_id=None, schema='', convert_titles=False, cell_source_map=None,
               heading_source_map=None, **_):
     """
     Unflatten a flat structure (spreadsheet - csv or xlsx) into a nested structure (JSON).
diff --git a/flattentool/cli.py b/flattentool/cli.py
@@ -50,7 +50,7 @@ def create_parser():
         help="\"Roll up\" columns from subsheets into the main sheet if they are specified in a rollUp attribute in the schema.")
     parser_create_template.add_argument(
         "-r", "--root-id",
-        help="Root ID of the data format, e.g. ocid for OCDS and blank for 360Giving (use --root-id=''). Defaults to ocid.")
+        help="Root ID of the data format, e.g. ocid for OCDS")
     parser_create_template.add_argument(
         "--use-titles",
         action='store_true',
@@ -83,7 +83,7 @@ def create_parser():
         help="\"Roll up\" columns from subsheets into the main sheet if they are specified in a rollUp attribute in the schema.")
     parser_flatten.add_argument(
         "-r", "--root-id",
-        help="Root ID of the data format, e.g. ocid for OCDS and blank for 360Giving (use --root-id=''). Defaults to ocid.")
+        help="Root ID of the data format, e.g. ocid for OCDS")
     parser_flatten.add_argument(
         "--use-titles",
         action='store_true',
@@ -122,7 +122,7 @@ def create_parser():
         help="Name of the timezone, defaults to UTC. Should be in tzdata format, e.g. Europe/London")
     parser_unflatten.add_argument(
         "-r", "--root-id",
-        help="Root ID of the data format, e.g. ocid for OCDS and blank for 360Giving (use --root-id=''). Defaults to ocid.")
+        help="Root ID of the data format, e.g. ocid for OCDS")
     parser_unflatten.add_argument(
         "-s", "--schema",
         help="Path to a relevant schema.")
diff --git a/flattentool/json_input.py b/flattentool/json_input.py
@@ -1,6 +1,6 @@
 """
 
-This file contains code that takes an OCDS JSON release file as input (not a
+This file contains code that takes an instace of a JSON file as input (not a
 JSON schema, for that see schema.py).
 
 """
diff --git a/flattentool/output.py b/flattentool/output.py
@@ -21,7 +21,7 @@
 class SpreadsheetOutput(object):
     # output_name is given a default here, partly to help with tests,
     # but should have been defined by the time we get here.
-    def __init__(self, parser, main_sheet_name='main', output_name='release'):
+    def __init__(self, parser, main_sheet_name='main', output_name='unflattened'):
         self.parser = parser
         self.main_sheet_name = main_sheet_name
         self.output_name = output_name
diff --git a/flattentool/schema.py b/flattentool/schema.py
@@ -62,7 +62,7 @@ def __contains__(self, key):
 class SchemaParser(object):
     """Parse the fields of a JSON schema into a flattened structure."""
 
-    def __init__(self, schema_filename=None, root_schema_dict=None, rollup=False, root_id='ocid', use_titles=False):
+    def __init__(self, schema_filename=None, root_schema_dict=None, rollup=False, root_id=None, use_titles=False):
         self.sub_sheets = {}
         self.main_sheet = Sheet()
         self.sub_sheet_mapping = {}
diff --git a/flattentool/tests/test_json_input.py b/flattentool/tests/test_json_input.py
@@ -147,7 +147,7 @@ def test_parse_ids(self):
             ('a', 'b'),
             ('c', [OrderedDict([('id', 3), ('d', 'e')]), OrderedDict([('id', 3), ('d', 'e2')])]),
             ('f', {'g':'h'}) # Check that having nested objects doesn't break ID output
-        ])])
+        ])], root_id='ocid')
         parser.parse()
         assert list(parser.main_sheet) == [ 'ocid', 'id', 'a', 'f/g' ]
         assert parser.main_sheet.lines == [
@@ -186,7 +186,7 @@ def test_parse_ids_subsheet(self):
                     ('f', {'g':'h'}) # Check that having nested objects doesn't break ID output
                 ])
             ])
-        ])])
+        ])], root_id='ocid')
         parser.parse()
         assert list(parser.main_sheet) == [ 'ocid', 'id' ]
         assert parser.main_sheet.lines == [
@@ -233,7 +233,7 @@ def test_parse_ids_nested(self):
                 ('c', [OrderedDict([('d', 'e')]), OrderedDict([('d', 'e2')])])
             ])),
             ('f', {'g':'h'}) # Check that having nested objects doesn't break ID output
-        ])])
+        ])], root_id='ocid')
         parser.parse()
         assert list(parser.main_sheet) == [ 'ocid', 'id', 'a', 'testnest/id', 'f/g' ]
         assert parser.main_sheet.lines == [
@@ -281,7 +281,8 @@ def test_sub_sheet_names(self, tmpdir):
             }
         }''')
         schema_parser = SchemaParser(
-            schema_filename=test_schema.strpath
+            schema_filename=test_schema.strpath,
+            root_id='ocid'
         )
         schema_parser.parse()
         parser = JSONParser(
@@ -342,13 +343,14 @@ def test_rollup(self):
                     }
                 },
             }
-        }, rollup=True)
+        }, rollup=True, root_id='ocid')
         schema_parser.parse()
         parser = JSONParser(
             root_json_dict=[OrderedDict([
                 ('testA', [OrderedDict([('testB', '1'), ('testC', '2')])]),
             ])],
-            schema_parser=schema_parser
+            schema_parser=schema_parser,
+            root_id='ocid'
         )
         parser.parse()
         assert list(parser.main_sheet) == [ 'testA/0/testB' ]
@@ -393,7 +395,7 @@ def test_rollup_multiple_values(self, recwarn):
             }
         ]
         assert len(parser.sub_sheets) == 1
-        assert set(parser.sub_sheets['testA']) == set(['ocid', 'testA/0/testB', 'testA/0/testC'])
+        assert set(parser.sub_sheets['testA']) == set(['testA/0/testB', 'testA/0/testC'])
         assert parser.sub_sheets['testA'].lines == [
             {'testA/0/testB':'1', 'testA/0/testC': '2'},
             {'testA/0/testB':'3', 'testA/0/testC': '4'}
diff --git a/flattentool/tests/test_schema_parser.py b/flattentool/tests/test_schema_parser.py
@@ -96,7 +96,7 @@ def test_sub_sheet():
     parser.parse()
     assert set(parser.main_sheet) == set([])
     assert set(parser.sub_sheets) == set(['Atest'])
-    assert list(parser.sub_sheets['Atest']) == ['ocid', 'Atest/0/Btest']
+    assert list(parser.sub_sheets['Atest']) == ['Atest/0/Btest']
 
 
 def object_in_array_example_properties(parent_name, child_name):
@@ -125,7 +125,7 @@ def test_parent_is_object(self):
         parser.parse()
         assert set(parser.main_sheet) == set(['Atest/id'])
         assert set(parser.sub_sheets) == set(['Ate_Btest'])
-        assert list(parser.sub_sheets['Ate_Btest']) == ['ocid', 'Atest/id', 'Atest/Btest/0/Ctest']
+        assert list(parser.sub_sheets['Ate_Btest']) == ['Atest/id', 'Atest/Btest/0/Ctest']
 
     def test_parent_is_array(self):
         parser = SchemaParser(root_schema_dict={
@@ -139,8 +139,8 @@ def test_parent_is_array(self):
         parser.parse()
         assert set(parser.main_sheet) == set()
         assert set(parser.sub_sheets) == set(['Atest', 'Ate_Btest'])
-        assert list(parser.sub_sheets['Atest']) == ['ocid', 'Atest/0/id']
-        assert list(parser.sub_sheets['Ate_Btest']) == ['ocid', 'Atest/0/id', 'Atest/0/Btest/0/Ctest']
+        assert list(parser.sub_sheets['Atest']) == ['Atest/0/id']
+        assert list(parser.sub_sheets['Ate_Btest']) == ['Atest/0/id', 'Atest/0/Btest/0/Ctest']
 
     def test_two_parents(self):
         parser = SchemaParser(root_schema_dict={
@@ -160,10 +160,10 @@ def test_two_parents(self):
         parser.parse()
         assert set(parser.main_sheet) == set()
         assert set(parser.sub_sheets) == set(['Atest', 'Dtest', 'Ate_Btest', 'Dte_Btest'])
-        assert list(parser.sub_sheets['Atest']) == ['ocid', 'Atest/0/id']
-        assert list(parser.sub_sheets['Dtest']) == ['ocid', 'Dtest/0/id']
-        assert list(parser.sub_sheets['Ate_Btest']) == ['ocid', 'Atest/0/id', 'Atest/0/Btest/0/Ctest']
-        assert list(parser.sub_sheets['Dte_Btest']) == ['ocid', 'Dtest/0/id', 'Dtest/0/Btest/0/Etest']
+        assert list(parser.sub_sheets['Atest']) == ['Atest/0/id']
+        assert list(parser.sub_sheets['Dtest']) == ['Dtest/0/id']
+        assert list(parser.sub_sheets['Ate_Btest']) == ['Atest/0/id', 'Atest/0/Btest/0/Ctest']
+        assert list(parser.sub_sheets['Dte_Btest']) == ['Dtest/0/id', 'Dtest/0/Btest/0/Etest']
 
     def test_parent_is_object_nested(self):
         parser = SchemaParser(root_schema_dict={
@@ -182,7 +182,7 @@ def test_parent_is_object_nested(self):
         parser.parse()
         assert set(parser.main_sheet) == set(['Atest/Btest/id'])
         assert set(parser.sub_sheets) == set(['Ate_Bte_Btest'])
-        assert list(parser.sub_sheets['Ate_Bte_Btest']) == ['ocid', 'Atest/Btest/id', 'Atest/Btest/Btest/0/Ctest']
+        assert list(parser.sub_sheets['Ate_Bte_Btest']) == ['Atest/Btest/id', 'Atest/Btest/Btest/0/Ctest']
 
 
 class TestSubSheetMainID(object):
@@ -199,7 +199,7 @@ def test_parent_is_object(self):
         parser.parse()
         assert set(parser.main_sheet) == set(['id', 'Atest/id'])
         assert set(parser.sub_sheets) == set(['Ate_Btest'])
-        assert list(parser.sub_sheets['Ate_Btest']) == ['ocid', 'id', 'Atest/id', 'Atest/Btest/0/Ctest']
+        assert list(parser.sub_sheets['Ate_Btest']) == ['id', 'Atest/id', 'Atest/Btest/0/Ctest']
 
     def test_parent_is_array(self):
         parser = SchemaParser(root_schema_dict={
@@ -215,8 +215,8 @@ def test_parent_is_array(self):
         parser.parse()
         assert set(parser.main_sheet) == set(['id'])
         assert set(parser.sub_sheets) == set(['Atest', 'Ate_Btest'])
-        assert list(parser.sub_sheets['Atest']) == ['ocid', 'id', 'Atest/0/id']
-        assert list(parser.sub_sheets['Ate_Btest']) == ['ocid', 'id', 'Atest/0/id', 'Atest/0/Btest/0/Ctest']
+        assert list(parser.sub_sheets['Atest']) == ['id', 'Atest/0/id']
+        assert list(parser.sub_sheets['Ate_Btest']) == ['id', 'Atest/0/id', 'Atest/0/Btest/0/Ctest']
 
     def test_two_parents(self):
         parser = SchemaParser(root_schema_dict={
@@ -237,10 +237,10 @@ def test_two_parents(self):
         parser.parse()
         assert set(parser.main_sheet) == set(['id'])
         assert set(parser.sub_sheets) == set(['Atest', 'Dtest', 'Ate_Btest', 'Dte_Btest'])
-        assert list(parser.sub_sheets['Atest']) == ['ocid', 'id', 'Atest/0/id']
-        assert list(parser.sub_sheets['Dtest']) == ['ocid', 'id', 'Dtest/0/id']
-        assert list(parser.sub_sheets['Ate_Btest']) == ['ocid', 'id', 'Atest/0/id', 'Atest/0/Btest/0/Ctest']
-        assert list(parser.sub_sheets['Dte_Btest']) == ['ocid', 'id', 'Dtest/0/id', 'Dtest/0/Btest/0/Etest']
+        assert list(parser.sub_sheets['Atest']) == ['id', 'Atest/0/id']
+        assert list(parser.sub_sheets['Dtest']) == ['id', 'Dtest/0/id']
+        assert list(parser.sub_sheets['Ate_Btest']) == ['id', 'Atest/0/id', 'Atest/0/Btest/0/Ctest']
+        assert list(parser.sub_sheets['Dte_Btest']) == ['id', 'Dtest/0/id', 'Dtest/0/Btest/0/Etest']
 
     def test_custom_main_sheet_name(self):
         parser = SchemaParser(
@@ -258,7 +258,6 @@ def test_custom_main_sheet_name(self):
         assert set(parser.main_sheet) == set(['id', 'Atest/id'])
         assert set(parser.sub_sheets) == set(['Ate_Btest'])
         assert list(parser.sub_sheets['Ate_Btest']) == [
-            'ocid',
             'id',
             'Atest/id',
             'Atest/Btest/0/Ctest']
@@ -318,7 +317,7 @@ def test_references_sheet_names(tmpdir):
     parser = SchemaParser(schema_filename=tmpfile.strpath)
     parser.parse()
     assert set(parser.sub_sheets) == set(['Atest']) # used to be Btest
-    assert list(parser.sub_sheets['Atest']) == ['ocid', 'Atest/0/Ctest']
+    assert list(parser.sub_sheets['Atest']) == ['Atest/0/Ctest']
 
 
 def test_rollup():
@@ -340,7 +339,7 @@ def test_rollup():
     parser.parse()
     assert set(parser.main_sheet) == set(['Atest/0/Btest'])
     assert set(parser.sub_sheets) == set(['Atest'])
-    assert set(parser.sub_sheets['Atest']) == set(['ocid', 'Atest/0/Btest', 'Atest/0/Ctest'])
+    assert set(parser.sub_sheets['Atest']) == set(['Atest/0/Btest', 'Atest/0/Ctest'])
 
 
 def test_bad_rollup(recwarn):
@@ -370,7 +369,7 @@ def test_bad_rollup(recwarn):
 
     assert set(parser.main_sheet) == set()
     assert set(parser.sub_sheets) == set(['Atest'])
-    assert set(parser.sub_sheets['Atest']) == set(['ocid', 'Atest/0/Ctest'])
+    assert set(parser.sub_sheets['Atest']) == set(['Atest/0/Ctest'])
 
 
 def test_sub_sheet_custom_id():
@@ -390,7 +389,7 @@ def test_sub_sheet_custom_id():
     assert set(parser.sub_sheets) == set(['Atest'])
     assert list(parser.sub_sheets['Atest']) == ['custom', 'Atest/0/Btest']
 
-def test_sub_sheet_no_root_id():
+def test_sub_sheet_empty_string_root_id():
     parser = SchemaParser(root_schema_dict={
         'properties': {
             'Atest': {
@@ -432,7 +431,7 @@ def test_use_titles(recwarn):
     parser.parse()
     assert set(parser.main_sheet) == set(['CTitle'])
     assert set(parser.sub_sheets) == set(['Atest'])
-    assert list(parser.sub_sheets['Atest']) == ['ocid', 'ATitle:BTitle']
+    assert list(parser.sub_sheets['Atest']) == ['ATitle:BTitle']
 
     # Array title missing
     parser = SchemaParser(root_schema_dict={
@@ -458,7 +457,7 @@ def test_use_titles(recwarn):
     parser.parse()
     assert set(parser.main_sheet) == set(['CTitle'])
     assert set(parser.sub_sheets) == set(['Atest'])
-    assert list(parser.sub_sheets['Atest']) == ['ocid']
+    assert list(parser.sub_sheets['Atest']) == []
     w = recwarn.pop(UserWarning)
     assert 'does not have a title' in text_type(w.message)
 
@@ -486,7 +485,7 @@ def test_use_titles(recwarn):
     parser.parse()
     assert set(parser.main_sheet) == set([])
     assert set(parser.sub_sheets) == set(['Atest'])
-    assert list(parser.sub_sheets['Atest']) == ['ocid', 'ATitle:BTitle']
+    assert list(parser.sub_sheets['Atest']) == ['ATitle:BTitle']
     w = recwarn.pop(UserWarning)
     assert 'does not have a title' in text_type(w.message)
 
@@ -514,7 +513,7 @@ def test_use_titles(recwarn):
     parser.parse()
     assert set(parser.main_sheet) == set(['CTitle'])
     assert set(parser.sub_sheets) == set(['Atest'])
-    assert list(parser.sub_sheets['Atest']) == ['ocid']
+    assert list(parser.sub_sheets['Atest']) == []
     w = recwarn.pop(UserWarning)
     assert 'does not have a title' in text_type(w.message)
 
@@ -545,7 +544,7 @@ def test_titles_rollup():
     parser.parse()
     assert set(parser.main_sheet) == set(['ATitle:BTitle'])
     assert set(parser.sub_sheets) == set(['Atest'])
-    assert set(parser.sub_sheets['Atest']) == set(['ocid', 'ATitle:BTitle', 'ATitle:CTitle'])
+    assert set(parser.sub_sheets['Atest']) == set(['ATitle:BTitle', 'ATitle:CTitle'])
 
 
 def test_schema_from_uri(httpserver):