[#96] Remove main_sheet_name except for spreadsheet output

Bjwebb · Bjwebb · commit f6d817e29736 · 2016-06-09T17:52:23.000+01:00
As part of this I added root_list_path to unflatten which fixes #69
diff --git a/flattentool/__init__.py b/flattentool/__init__.py
@@ -17,7 +17,7 @@ def create_template(schema, output_name='releases', output_format='all', main_sh
 
     """
 
-    parser = SchemaParser(schema_filename=schema, main_sheet_name=main_sheet_name, rollup=rollup, root_id=root_id, use_titles=use_titles)
+    parser = SchemaParser(schema_filename=schema, rollup=rollup, root_id=root_id, use_titles=use_titles)
     parser.parse()
 
     def spreadsheet_output(spreadsheet_output_class, name):
@@ -49,16 +49,14 @@ def flatten(input_name, schema=None, output_name='releases', output_format='all'
             schema_filename=schema,
             rollup=rollup,
             root_id=root_id,
-            use_titles=use_titles,
-            main_sheet_name=main_sheet_name)
+            use_titles=use_titles)
         schema_parser.parse()
     else:
         schema_parser = None
     parser = JSONParser(
         json_filename=input_name,
         root_list_path=root_list_path,
         schema_parser=schema_parser,
-        main_sheet_name=main_sheet_name,
         root_id=root_id,
         use_titles=use_titles)
     parser.parse()
@@ -103,7 +101,7 @@ def decimal_default(o):
 
 
 def unflatten(input_name, base_json=None, input_format=None, output_name='releases.json',
-              main_sheet_name='releases', encoding='utf8', timezone_name='UTC',
+              root_list_path='main', encoding='utf8', timezone_name='UTC',
               root_id='ocid', schema='', convert_titles=False, cell_source_map=None,
               heading_source_map=None, **_):
     """
@@ -119,11 +117,11 @@ def unflatten(input_name, base_json=None, input_format=None, output_name='releas
     spreadsheet_input = spreadsheet_input_class(
         input_name=input_name,
         timezone_name=timezone_name,
-        main_sheet_name=main_sheet_name,
+        root_list_path=root_list_path,
         root_id=root_id,
         convert_titles=convert_titles)
     if schema:
-        parser = SchemaParser(schema_filename=schema, main_sheet_name=main_sheet_name, rollup=True, root_id=root_id)
+        parser = SchemaParser(schema_filename=schema, rollup=True, root_id=root_id)
         parser.parse()
         spreadsheet_input.parser = parser
     spreadsheet_input.encoding = encoding
@@ -135,7 +133,7 @@ def unflatten(input_name, base_json=None, input_format=None, output_name='releas
         base = OrderedDict()
     if WITH_CELLS:
         result, cell_source_map_data, heading_source_map_data = spreadsheet_input.fancy_unflatten()
-        base[main_sheet_name] = list(result)
+        base[root_list_path] = list(result)
         with codecs.open(output_name, 'w', encoding='utf-8') as fp:
             json.dump(base, fp, indent=4, default=decimal_default, ensure_ascii=False)
         if cell_source_map:
@@ -146,7 +144,7 @@ def unflatten(input_name, base_json=None, input_format=None, output_name='releas
                 json.dump(heading_source_map_data, fp, indent=4, default=decimal_default, ensure_ascii=False)
     else:
         result = spreadsheet_input.unflatten()
-        base[main_sheet_name] = list(result)
+        base[root_list_path] = list(result)
         with codecs.open(output_name, 'w', encoding='utf-8') as fp:
             json.dump(base, fp, indent=4, default=decimal_default, ensure_ascii=False)
 
diff --git a/flattentool/input.py b/flattentool/input.py
@@ -143,21 +143,15 @@ def convert_dict_titles(self, dicts, title_lookup=None):
             else:
                 yield d
 
-    def __init__(self, input_name='', main_sheet_name='', timezone_name='UTC', root_id='ocid', convert_titles=False):
+    def __init__(self, input_name='', root_list_path='main', timezone_name='UTC', root_id='ocid', convert_titles=False):
         self.input_name = input_name
-        self.main_sheet_name = main_sheet_name
+        self.root_list_path = root_list_path
         self.sub_sheet_names = []
         self.timezone = pytz.timezone(timezone_name)
         self.root_id = root_id
         self.convert_titles = convert_titles
         self.parser = None
 
-    def get_main_sheet_lines(self):
-        if self.convert_titles:
-            return self.convert_dict_titles(self.get_sheet_lines(self.main_sheet_name))
-        else:
-            return self.get_sheet_lines(self.main_sheet_name)
-
     def get_sub_sheets_lines(self):
         for sub_sheet_name in self.sub_sheet_names:
             if self.convert_titles:
@@ -189,8 +183,7 @@ def convert_types(self, in_dict):
 
     def do_unflatten(self):
         main_sheet_by_ocid = OrderedDict()
-        # Eventually we should get rid of the concept of a "main sheet entirely"
-        sheets = [(self.main_sheet_name, self.get_main_sheet_lines())] + list(self.get_sub_sheets_lines())
+        sheets = list(self.get_sub_sheets_lines())
         for i, sheet in enumerate(sheets):
             sheet_name, lines = sheet
             try:
@@ -251,7 +244,7 @@ def fancy_unflatten(self):
             raise Exception('Can only do a fancy_unflatten() if WITH_CELLS=True')
         cell_tree = self.do_unflatten()
         result = extract_list_to_value(cell_tree)
-        cell_source_map = extract_list_to_error_path([self.main_sheet_name.lower()], cell_tree)
+        cell_source_map = extract_list_to_error_path([self.root_list_path], cell_tree)
         ordered_items = sorted(cell_source_map.items())
         ordered_cell_source_map = OrderedDict(( '/'.join(str(x) for x in path), location) for path, location in ordered_items)
         row_source_map = OrderedDict()
@@ -355,10 +348,6 @@ def get_sheet_headings(self, sheet_name):
 
     def read_sheets(self):
         sheet_file_names = os.listdir(self.input_name)
-        if self.main_sheet_name+'.csv' not in sheet_file_names:
-            raise ValueError('Main sheet "{}.csv" not found.'.format(self.main_sheet_name))
-        sheet_file_names.remove(self.main_sheet_name+'.csv')
-
         self.sub_sheet_names = sorted([fname[:-4] for fname in sheet_file_names if fname.endswith('.csv')])
 
     def get_sheet_lines(self, sheet_name):
@@ -381,16 +370,8 @@ def read_sheets(self):
         self.workbook = openpyxl.load_workbook(self.input_name, data_only=True)
 
         self.sheet_names_map = {sheet_name: sheet_name for sheet_name in self.workbook.get_sheet_names()}
-        # allow main sheet to be any case
-        for sheet_name in list(self.sheet_names_map):
-            if sheet_name.lower() == self.main_sheet_name.lower():
-                self.sheet_names_map.pop(sheet_name)
-                self.sheet_names_map[self.main_sheet_name] = sheet_name
 
         sheet_names = list(self.sheet_names_map.keys())
-        if self.main_sheet_name not in sheet_names:
-            raise ValueError('Main sheet "{}" not found in workbook.'.format(self.main_sheet_name))
-        sheet_names.remove(self.main_sheet_name)
         self.sub_sheet_names = sheet_names
 
     def get_sheet_headings(self, sheet_name):
diff --git a/flattentool/json_input.py b/flattentool/json_input.py
@@ -47,10 +47,9 @@ class JSONParser(object):
     # Named for consistency with schema.SchemaParser, but not sure it's the most appropriate name.
     # Similarily with methods like parse_json_dict
 
-    def __init__(self, json_filename=None, root_json_dict=None, main_sheet_name='main', schema_parser=None, root_list_path=None, root_id='ocid', use_titles=False):
+    def __init__(self, json_filename=None, root_json_dict=None, schema_parser=None, root_list_path=None, root_id='ocid', use_titles=False):
         self.sub_sheets = {}
         self.main_sheet = Sheet()
-        self.main_sheet_name = main_sheet_name
         self.root_list_path = root_list_path
         self.root_id = root_id
         self.use_titles = use_titles
diff --git a/flattentool/schema.py b/flattentool/schema.py
@@ -62,11 +62,10 @@ def __contains__(self, key):
 class SchemaParser(object):
     """Parse the fields of a JSON schema into a flattened structure."""
 
-    def __init__(self, schema_filename=None, root_schema_dict=None, main_sheet_name='main', rollup=False, root_id='ocid', use_titles=False):
+    def __init__(self, schema_filename=None, root_schema_dict=None, rollup=False, root_id='ocid', use_titles=False):
         self.sub_sheets = {}
         self.main_sheet = Sheet()
         self.sub_sheet_mapping = {}
-        self.main_sheet_name = main_sheet_name
         self.rollup = rollup
         self.root_id = root_id
         self.use_titles = use_titles
diff --git a/flattentool/tests/test_input_SpreadsheetInput.py b/flattentool/tests/test_input_SpreadsheetInput.py
@@ -24,7 +24,6 @@ def get_sheet_lines(self, sheet_name):
 
     def read_sheets(self):
         self.sub_sheet_names = list(self.sheets.keys())
-        self.sub_sheet_names.remove(self.main_sheet_name)
 
 def test_spreadsheetinput_base_fails():
     spreadsheet_input = SpreadsheetInput()
@@ -41,58 +40,54 @@ def test_csv_input(self, tmpdir):
         subsheet = tmpdir.join('subsheet.csv')
         subsheet.write('colC,colD\ncell5,cell6\ncell7,cell8')
 
-        csvinput = CSVInput(input_name=tmpdir.strpath, main_sheet_name='main')
-        assert csvinput.main_sheet_name == 'main'
+        csvinput = CSVInput(input_name=tmpdir.strpath)
 
         csvinput.read_sheets()
 
-        assert list(csvinput.get_main_sheet_lines()) == \
+        assert csvinput.sub_sheet_names == ['main', 'subsheet']
+        assert list(csvinput.get_sheet_lines('main')) == \
             [{'colA': 'cell1', 'colB': 'cell2'}, {'colA': 'cell3', 'colB': 'cell4'}]
-        assert csvinput.sub_sheet_names == ['subsheet']
         assert list(csvinput.get_sheet_lines('subsheet')) == \
             [{'colC': 'cell5', 'colD': 'cell6'}, {'colC': 'cell7', 'colD': 'cell8'}]
 
     def test_xlsx_input(self):
-        xlsxinput = XLSXInput(input_name='flattentool/tests/fixtures/xlsx/basic.xlsx', main_sheet_name='main')
-        assert xlsxinput.main_sheet_name == 'main'
+        xlsxinput = XLSXInput(input_name='flattentool/tests/fixtures/xlsx/basic.xlsx')
 
         xlsxinput.read_sheets()
 
-        assert list(xlsxinput.get_main_sheet_lines()) == \
+        assert xlsxinput.sub_sheet_names == ['main', 'subsheet']
+        assert list(xlsxinput.get_sheet_lines('main')) == \
             [{'colA': 'cell1', 'colB': 'cell2'}, {'colA': 'cell3', 'colB': 'cell4'}]
-        assert xlsxinput.sub_sheet_names == ['subsheet']
         assert list(xlsxinput.get_sheet_lines('subsheet')) == \
             [{'colC': 'cell5', 'colD': 'cell6'}, {'colC': 'cell7', 'colD': 'cell8'}]
 
     def test_xlsx_input_integer(self):
-        xlsxinput = XLSXInput(input_name='flattentool/tests/fixtures/xlsx/integer.xlsx', main_sheet_name='main')
-        assert xlsxinput.main_sheet_name == 'main'
+        xlsxinput = XLSXInput(input_name='flattentool/tests/fixtures/xlsx/integer.xlsx')
 
         xlsxinput.read_sheets()
 
-        assert list(xlsxinput.get_main_sheet_lines()) == \
+        assert list(xlsxinput.get_sheet_lines('main')) == \
             [{'colA': 1}]
-        assert xlsxinput.sub_sheet_names == []
+        assert xlsxinput.sub_sheet_names == ['main']
 
     def test_xlsx_input_formula(self):
         """ When a forumla is present, we should use the value, rather than the
         formula itself. """
 
-        xlsxinput = XLSXInput(input_name='flattentool/tests/fixtures/xlsx/formula.xlsx', main_sheet_name='main')
-        assert xlsxinput.main_sheet_name == 'main'
+        xlsxinput = XLSXInput(input_name='flattentool/tests/fixtures/xlsx/formula.xlsx')
 
         xlsxinput.read_sheets()
 
-        assert list(xlsxinput.get_main_sheet_lines()) == \
+        assert xlsxinput.sub_sheet_names == ['main', 'subsheet']
+        assert list(xlsxinput.get_sheet_lines('main')) == \
             [{'colA': 1, 'colB': 2}, {'colA': 2, 'colB': 4}]
-        assert xlsxinput.sub_sheet_names == ['subsheet']
         assert list(xlsxinput.get_sheet_lines('subsheet')) == \
             [{'colC': 3, 'colD': 9}, {'colC': 4, 'colD': 12}]
 
 
 class TestInputFailure(object):
     def test_csv_no_directory(self):
-        csvinput = CSVInput(input_name='nonesensedirectory', main_sheet_name='main')
+        csvinput = CSVInput(input_name='nonesensedirectory')
         if sys.version > '3':
             with pytest.raises(FileNotFoundError):
                 csvinput.read_sheets()
@@ -101,66 +96,60 @@ def test_csv_no_directory(self):
                 csvinput.read_sheets()
 
     def test_csv_no_files(self, tmpdir):
-        csvinput = CSVInput(input_name=tmpdir.strpath, main_sheet_name='main')
+        csvinput = CSVInput(input_name=tmpdir.strpath)
         with pytest.raises(ValueError) as e:
             csvinput.read_sheets()
         assert 'Main sheet' in text_type(e) and 'not found' in text_type(e)
 
     def test_xlsx_no_file(self, tmpdir):
-        xlsxinput = XLSXInput(input_name=tmpdir.strpath.join('test.xlsx'), main_sheet_name='main')
+        xlsxinput = XLSXInput(input_name=tmpdir.strpath.join('test.xlsx'))
         if sys.version > '3':
             with pytest.raises(FileNotFoundError):
                 xlsxinput.read_sheets()
         else:
             with pytest.raises(IOError):
                 xlsxinput.read_sheets()
 
-    def test_xlsx_no_main_sheet(self):
-        xlsxinput = XLSXInput(input_name='flattentool/tests/fixtures/xlsx/basic.xlsx', main_sheet_name='notmain')
-        with pytest.raises(ValueError) as e:
-            xlsxinput.read_sheets()
-        assert 'Main sheet "notmain" not found in workbook.' in text_type(e)
-
 
 class TestUnicodeInput(object):
     def test_csv_input_utf8(self, tmpdir):
         main = tmpdir.join('main.csv')
         main.write_text('colA\néαГ😼𝒞人', encoding='utf8')
-        csvinput = CSVInput(input_name=tmpdir.strpath, main_sheet_name='main')  # defaults to utf8
+        csvinput = CSVInput(input_name=tmpdir.strpath)  # defaults to utf8
         csvinput.read_sheets()
-        assert list(csvinput.get_main_sheet_lines()) == \
+        assert list(csvinput.get_sheet_lines('main')) == \
             [{'colA': 'éαГ😼𝒞人'}]
-        assert csvinput.sub_sheet_names == []
+        assert csvinput.sub_sheet_names == ['main']
 
     def test_csv_input_latin1(self, tmpdir):
         main = tmpdir.join('main.csv')
         main.write_text('colA\né', encoding='latin-1')
-        csvinput = CSVInput(input_name=tmpdir.strpath, main_sheet_name='main')
+        csvinput = CSVInput(input_name=tmpdir.strpath)
         csvinput.encoding = 'latin-1'
         csvinput.read_sheets()
-        assert list(csvinput.get_main_sheet_lines()) == \
+        assert list(csvinput.get_sheet_lines('main')) == \
             [{'colA': 'é'}]
-        assert csvinput.sub_sheet_names == []
+        assert csvinput.sub_sheet_names == ['main']
 
     @pytest.mark.xfail(
         sys.version_info < (3, 0),
         reason='Python 2 CSV readers does not support UTF-16 (or any encodings with null bytes')
     def test_csv_input_utf16(self, tmpdir):
         main = tmpdir.join('main.csv')
         main.write_text('colA\néαГ😼𝒞人', encoding='utf16')
-        csvinput = CSVInput(input_name=tmpdir.strpath, main_sheet_name='main')
+        csvinput = CSVInput(input_name=tmpdir.strpath)
         csvinput.encoding = 'utf16'
         csvinput.read_sheets()
-        assert list(csvinput.get_main_sheet_lines()) == \
+        assert list(csvinput.get_sheet_lines('main')) == \
             [{'colA': 'éαГ😼𝒞人'}]
-        assert csvinput.sub_sheet_names == []
+        assert csvinput.sub_sheet_names == ['main']
 
     def test_xlsx_input_utf8(self):
         """This is an xlsx file saved by OpenOffice. It seems to use UTF8 internally."""
-        xlsxinput = XLSXInput(input_name='flattentool/tests/fixtures/xlsx/unicode.xlsx', main_sheet_name='main')
+        csvinput = XLSXInput(input_name='flattentool/tests/fixtures/xlsx/unicode.xlsx')
 
-        xlsxinput.read_sheets()
-        assert list(xlsxinput.get_main_sheet_lines())[0]['id'] == 'éαГ😼𝒞人'
+        csvinput.read_sheets()
+        assert list(csvinput.get_sheet_lines('main'))[0]['id'] == 'éαГ😼𝒞人'
 
 
 def test_convert_type(recwarn):
diff --git a/flattentool/tests/test_input_SpreadsheetInput_unflatten.py b/flattentool/tests/test_input_SpreadsheetInput_unflatten.py
@@ -521,13 +521,11 @@ def test_unflatten(convert_titles, use_schema, root_id, root_id_kwargs, input_li
                 inject_root_id(root_id, input_row) for input_row in input_list
             ]
         },
-        main_sheet_name='custom_main',
         **extra_kwargs)
     spreadsheet_input.read_sheets()
 
     parser = SchemaParser(
         root_schema_dict=create_schema(root_id) if use_schema else {"properties": {}},
-        main_sheet_name='custom_main',
         root_id=root_id,
         rollup=True
     )
diff --git a/flattentool/tests/test_input_SpreadsheetInput_unflatten_mulitplesheets.py b/flattentool/tests/test_input_SpreadsheetInput_unflatten_mulitplesheets.py
diff --git a/flattentool/tests/test_roundtrip.py b/flattentool/tests/test_roundtrip.py
diff --git a/flattentool/tests/test_schema_parser.py b/flattentool/tests/test_schema_parser.py

Original file line number	Diff line number	Diff line change
`@@ -521,13 +521,11 @@ def test_unflatten(convert_titles, use_schema, root_id, root_id_kwargs, input_li`
`521`	`521`	`inject_root_id(root_id, input_row) for input_row in input_list`
`522`	`522`	`]`
`523`	`523`	`},`
`524`		`- main_sheet_name='custom_main',`
`525`	`524`	`**extra_kwargs)`
`526`	`525`	`spreadsheet_input.read_sheets()`
`527`	`526`
`528`	`527`	`parser = SchemaParser(`
`529`	`528`	`root_schema_dict=create_schema(root_id) if use_schema else {"properties": {}},`
`530`		`- main_sheet_name='custom_main',`
`531`	`529`	`root_id=root_id,`
`532`	`530`	`rollup=True`
`533`	`531`	`)`