Skip to content

Commit f6d817e

Browse files
committed
[#96] Remove main_sheet_name except for spreadsheet output
As part of this I added root_list_path to unflatten which fixes #69
1 parent 03c36c5 commit f6d817e

9 files changed

+63
-111
lines changed

flattentool/__init__.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def create_template(schema, output_name='releases', output_format='all', main_sh
1717
1818
"""
1919

20-
parser = SchemaParser(schema_filename=schema, main_sheet_name=main_sheet_name, rollup=rollup, root_id=root_id, use_titles=use_titles)
20+
parser = SchemaParser(schema_filename=schema, rollup=rollup, root_id=root_id, use_titles=use_titles)
2121
parser.parse()
2222

2323
def spreadsheet_output(spreadsheet_output_class, name):
@@ -49,16 +49,14 @@ def flatten(input_name, schema=None, output_name='releases', output_format='all'
4949
schema_filename=schema,
5050
rollup=rollup,
5151
root_id=root_id,
52-
use_titles=use_titles,
53-
main_sheet_name=main_sheet_name)
52+
use_titles=use_titles)
5453
schema_parser.parse()
5554
else:
5655
schema_parser = None
5756
parser = JSONParser(
5857
json_filename=input_name,
5958
root_list_path=root_list_path,
6059
schema_parser=schema_parser,
61-
main_sheet_name=main_sheet_name,
6260
root_id=root_id,
6361
use_titles=use_titles)
6462
parser.parse()
@@ -103,7 +101,7 @@ def decimal_default(o):
103101

104102

105103
def unflatten(input_name, base_json=None, input_format=None, output_name='releases.json',
106-
main_sheet_name='releases', encoding='utf8', timezone_name='UTC',
104+
root_list_path='main', encoding='utf8', timezone_name='UTC',
107105
root_id='ocid', schema='', convert_titles=False, cell_source_map=None,
108106
heading_source_map=None, **_):
109107
"""
@@ -119,11 +117,11 @@ def unflatten(input_name, base_json=None, input_format=None, output_name='releas
119117
spreadsheet_input = spreadsheet_input_class(
120118
input_name=input_name,
121119
timezone_name=timezone_name,
122-
main_sheet_name=main_sheet_name,
120+
root_list_path=root_list_path,
123121
root_id=root_id,
124122
convert_titles=convert_titles)
125123
if schema:
126-
parser = SchemaParser(schema_filename=schema, main_sheet_name=main_sheet_name, rollup=True, root_id=root_id)
124+
parser = SchemaParser(schema_filename=schema, rollup=True, root_id=root_id)
127125
parser.parse()
128126
spreadsheet_input.parser = parser
129127
spreadsheet_input.encoding = encoding
@@ -135,7 +133,7 @@ def unflatten(input_name, base_json=None, input_format=None, output_name='releas
135133
base = OrderedDict()
136134
if WITH_CELLS:
137135
result, cell_source_map_data, heading_source_map_data = spreadsheet_input.fancy_unflatten()
138-
base[main_sheet_name] = list(result)
136+
base[root_list_path] = list(result)
139137
with codecs.open(output_name, 'w', encoding='utf-8') as fp:
140138
json.dump(base, fp, indent=4, default=decimal_default, ensure_ascii=False)
141139
if cell_source_map:
@@ -146,7 +144,7 @@ def unflatten(input_name, base_json=None, input_format=None, output_name='releas
146144
json.dump(heading_source_map_data, fp, indent=4, default=decimal_default, ensure_ascii=False)
147145
else:
148146
result = spreadsheet_input.unflatten()
149-
base[main_sheet_name] = list(result)
147+
base[root_list_path] = list(result)
150148
with codecs.open(output_name, 'w', encoding='utf-8') as fp:
151149
json.dump(base, fp, indent=4, default=decimal_default, ensure_ascii=False)
152150

flattentool/input.py

Lines changed: 4 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -143,21 +143,15 @@ def convert_dict_titles(self, dicts, title_lookup=None):
143143
else:
144144
yield d
145145

146-
def __init__(self, input_name='', main_sheet_name='', timezone_name='UTC', root_id='ocid', convert_titles=False):
146+
def __init__(self, input_name='', root_list_path='main', timezone_name='UTC', root_id='ocid', convert_titles=False):
147147
self.input_name = input_name
148-
self.main_sheet_name = main_sheet_name
148+
self.root_list_path = root_list_path
149149
self.sub_sheet_names = []
150150
self.timezone = pytz.timezone(timezone_name)
151151
self.root_id = root_id
152152
self.convert_titles = convert_titles
153153
self.parser = None
154154

155-
def get_main_sheet_lines(self):
156-
if self.convert_titles:
157-
return self.convert_dict_titles(self.get_sheet_lines(self.main_sheet_name))
158-
else:
159-
return self.get_sheet_lines(self.main_sheet_name)
160-
161155
def get_sub_sheets_lines(self):
162156
for sub_sheet_name in self.sub_sheet_names:
163157
if self.convert_titles:
@@ -189,8 +183,7 @@ def convert_types(self, in_dict):
189183

190184
def do_unflatten(self):
191185
main_sheet_by_ocid = OrderedDict()
192-
# Eventually we should get rid of the concept of a "main sheet entirely"
193-
sheets = [(self.main_sheet_name, self.get_main_sheet_lines())] + list(self.get_sub_sheets_lines())
186+
sheets = list(self.get_sub_sheets_lines())
194187
for i, sheet in enumerate(sheets):
195188
sheet_name, lines = sheet
196189
try:
@@ -251,7 +244,7 @@ def fancy_unflatten(self):
251244
raise Exception('Can only do a fancy_unflatten() if WITH_CELLS=True')
252245
cell_tree = self.do_unflatten()
253246
result = extract_list_to_value(cell_tree)
254-
cell_source_map = extract_list_to_error_path([self.main_sheet_name.lower()], cell_tree)
247+
cell_source_map = extract_list_to_error_path([self.root_list_path], cell_tree)
255248
ordered_items = sorted(cell_source_map.items())
256249
ordered_cell_source_map = OrderedDict(( '/'.join(str(x) for x in path), location) for path, location in ordered_items)
257250
row_source_map = OrderedDict()
@@ -355,10 +348,6 @@ def get_sheet_headings(self, sheet_name):
355348

356349
def read_sheets(self):
357350
sheet_file_names = os.listdir(self.input_name)
358-
if self.main_sheet_name+'.csv' not in sheet_file_names:
359-
raise ValueError('Main sheet "{}.csv" not found.'.format(self.main_sheet_name))
360-
sheet_file_names.remove(self.main_sheet_name+'.csv')
361-
362351
self.sub_sheet_names = sorted([fname[:-4] for fname in sheet_file_names if fname.endswith('.csv')])
363352

364353
def get_sheet_lines(self, sheet_name):
@@ -381,16 +370,8 @@ def read_sheets(self):
381370
self.workbook = openpyxl.load_workbook(self.input_name, data_only=True)
382371

383372
self.sheet_names_map = {sheet_name: sheet_name for sheet_name in self.workbook.get_sheet_names()}
384-
# allow main sheet to be any case
385-
for sheet_name in list(self.sheet_names_map):
386-
if sheet_name.lower() == self.main_sheet_name.lower():
387-
self.sheet_names_map.pop(sheet_name)
388-
self.sheet_names_map[self.main_sheet_name] = sheet_name
389373

390374
sheet_names = list(self.sheet_names_map.keys())
391-
if self.main_sheet_name not in sheet_names:
392-
raise ValueError('Main sheet "{}" not found in workbook.'.format(self.main_sheet_name))
393-
sheet_names.remove(self.main_sheet_name)
394375
self.sub_sheet_names = sheet_names
395376

396377
def get_sheet_headings(self, sheet_name):

flattentool/json_input.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,9 @@ class JSONParser(object):
4747
# Named for consistency with schema.SchemaParser, but not sure it's the most appropriate name.
4848
# Similarily with methods like parse_json_dict
4949

50-
def __init__(self, json_filename=None, root_json_dict=None, main_sheet_name='main', schema_parser=None, root_list_path=None, root_id='ocid', use_titles=False):
50+
def __init__(self, json_filename=None, root_json_dict=None, schema_parser=None, root_list_path=None, root_id='ocid', use_titles=False):
5151
self.sub_sheets = {}
5252
self.main_sheet = Sheet()
53-
self.main_sheet_name = main_sheet_name
5453
self.root_list_path = root_list_path
5554
self.root_id = root_id
5655
self.use_titles = use_titles

flattentool/schema.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,10 @@ def __contains__(self, key):
6262
class SchemaParser(object):
6363
"""Parse the fields of a JSON schema into a flattened structure."""
6464

65-
def __init__(self, schema_filename=None, root_schema_dict=None, main_sheet_name='main', rollup=False, root_id='ocid', use_titles=False):
65+
def __init__(self, schema_filename=None, root_schema_dict=None, rollup=False, root_id='ocid', use_titles=False):
6666
self.sub_sheets = {}
6767
self.main_sheet = Sheet()
6868
self.sub_sheet_mapping = {}
69-
self.main_sheet_name = main_sheet_name
7069
self.rollup = rollup
7170
self.root_id = root_id
7271
self.use_titles = use_titles

flattentool/tests/test_input_SpreadsheetInput.py

Lines changed: 27 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ def get_sheet_lines(self, sheet_name):
2424

2525
def read_sheets(self):
2626
self.sub_sheet_names = list(self.sheets.keys())
27-
self.sub_sheet_names.remove(self.main_sheet_name)
2827

2928
def test_spreadsheetinput_base_fails():
3029
spreadsheet_input = SpreadsheetInput()
@@ -41,58 +40,54 @@ def test_csv_input(self, tmpdir):
4140
subsheet = tmpdir.join('subsheet.csv')
4241
subsheet.write('colC,colD\ncell5,cell6\ncell7,cell8')
4342

44-
csvinput = CSVInput(input_name=tmpdir.strpath, main_sheet_name='main')
45-
assert csvinput.main_sheet_name == 'main'
43+
csvinput = CSVInput(input_name=tmpdir.strpath)
4644

4745
csvinput.read_sheets()
4846

49-
assert list(csvinput.get_main_sheet_lines()) == \
47+
assert csvinput.sub_sheet_names == ['main', 'subsheet']
48+
assert list(csvinput.get_sheet_lines('main')) == \
5049
[{'colA': 'cell1', 'colB': 'cell2'}, {'colA': 'cell3', 'colB': 'cell4'}]
51-
assert csvinput.sub_sheet_names == ['subsheet']
5250
assert list(csvinput.get_sheet_lines('subsheet')) == \
5351
[{'colC': 'cell5', 'colD': 'cell6'}, {'colC': 'cell7', 'colD': 'cell8'}]
5452

5553
def test_xlsx_input(self):
56-
xlsxinput = XLSXInput(input_name='flattentool/tests/fixtures/xlsx/basic.xlsx', main_sheet_name='main')
57-
assert xlsxinput.main_sheet_name == 'main'
54+
xlsxinput = XLSXInput(input_name='flattentool/tests/fixtures/xlsx/basic.xlsx')
5855

5956
xlsxinput.read_sheets()
6057

61-
assert list(xlsxinput.get_main_sheet_lines()) == \
58+
assert xlsxinput.sub_sheet_names == ['main', 'subsheet']
59+
assert list(xlsxinput.get_sheet_lines('main')) == \
6260
[{'colA': 'cell1', 'colB': 'cell2'}, {'colA': 'cell3', 'colB': 'cell4'}]
63-
assert xlsxinput.sub_sheet_names == ['subsheet']
6461
assert list(xlsxinput.get_sheet_lines('subsheet')) == \
6562
[{'colC': 'cell5', 'colD': 'cell6'}, {'colC': 'cell7', 'colD': 'cell8'}]
6663

6764
def test_xlsx_input_integer(self):
68-
xlsxinput = XLSXInput(input_name='flattentool/tests/fixtures/xlsx/integer.xlsx', main_sheet_name='main')
69-
assert xlsxinput.main_sheet_name == 'main'
65+
xlsxinput = XLSXInput(input_name='flattentool/tests/fixtures/xlsx/integer.xlsx')
7066

7167
xlsxinput.read_sheets()
7268

73-
assert list(xlsxinput.get_main_sheet_lines()) == \
69+
assert list(xlsxinput.get_sheet_lines('main')) == \
7470
[{'colA': 1}]
75-
assert xlsxinput.sub_sheet_names == []
71+
assert xlsxinput.sub_sheet_names == ['main']
7672

7773
def test_xlsx_input_formula(self):
7874
""" When a forumla is present, we should use the value, rather than the
7975
formula itself. """
8076

81-
xlsxinput = XLSXInput(input_name='flattentool/tests/fixtures/xlsx/formula.xlsx', main_sheet_name='main')
82-
assert xlsxinput.main_sheet_name == 'main'
77+
xlsxinput = XLSXInput(input_name='flattentool/tests/fixtures/xlsx/formula.xlsx')
8378

8479
xlsxinput.read_sheets()
8580

86-
assert list(xlsxinput.get_main_sheet_lines()) == \
81+
assert xlsxinput.sub_sheet_names == ['main', 'subsheet']
82+
assert list(xlsxinput.get_sheet_lines('main')) == \
8783
[{'colA': 1, 'colB': 2}, {'colA': 2, 'colB': 4}]
88-
assert xlsxinput.sub_sheet_names == ['subsheet']
8984
assert list(xlsxinput.get_sheet_lines('subsheet')) == \
9085
[{'colC': 3, 'colD': 9}, {'colC': 4, 'colD': 12}]
9186

9287

9388
class TestInputFailure(object):
9489
def test_csv_no_directory(self):
95-
csvinput = CSVInput(input_name='nonesensedirectory', main_sheet_name='main')
90+
csvinput = CSVInput(input_name='nonesensedirectory')
9691
if sys.version > '3':
9792
with pytest.raises(FileNotFoundError):
9893
csvinput.read_sheets()
@@ -101,66 +96,60 @@ def test_csv_no_directory(self):
10196
csvinput.read_sheets()
10297

10398
def test_csv_no_files(self, tmpdir):
104-
csvinput = CSVInput(input_name=tmpdir.strpath, main_sheet_name='main')
99+
csvinput = CSVInput(input_name=tmpdir.strpath)
105100
with pytest.raises(ValueError) as e:
106101
csvinput.read_sheets()
107102
assert 'Main sheet' in text_type(e) and 'not found' in text_type(e)
108103

109104
def test_xlsx_no_file(self, tmpdir):
110-
xlsxinput = XLSXInput(input_name=tmpdir.strpath.join('test.xlsx'), main_sheet_name='main')
105+
xlsxinput = XLSXInput(input_name=tmpdir.strpath.join('test.xlsx'))
111106
if sys.version > '3':
112107
with pytest.raises(FileNotFoundError):
113108
xlsxinput.read_sheets()
114109
else:
115110
with pytest.raises(IOError):
116111
xlsxinput.read_sheets()
117112

118-
def test_xlsx_no_main_sheet(self):
119-
xlsxinput = XLSXInput(input_name='flattentool/tests/fixtures/xlsx/basic.xlsx', main_sheet_name='notmain')
120-
with pytest.raises(ValueError) as e:
121-
xlsxinput.read_sheets()
122-
assert 'Main sheet "notmain" not found in workbook.' in text_type(e)
123-
124113

125114
class TestUnicodeInput(object):
126115
def test_csv_input_utf8(self, tmpdir):
127116
main = tmpdir.join('main.csv')
128117
main.write_text('colA\néαГ😼𝒞人', encoding='utf8')
129-
csvinput = CSVInput(input_name=tmpdir.strpath, main_sheet_name='main') # defaults to utf8
118+
csvinput = CSVInput(input_name=tmpdir.strpath) # defaults to utf8
130119
csvinput.read_sheets()
131-
assert list(csvinput.get_main_sheet_lines()) == \
120+
assert list(csvinput.get_sheet_lines('main')) == \
132121
[{'colA': 'éαГ😼𝒞人'}]
133-
assert csvinput.sub_sheet_names == []
122+
assert csvinput.sub_sheet_names == ['main']
134123

135124
def test_csv_input_latin1(self, tmpdir):
136125
main = tmpdir.join('main.csv')
137126
main.write_text('colA\né', encoding='latin-1')
138-
csvinput = CSVInput(input_name=tmpdir.strpath, main_sheet_name='main')
127+
csvinput = CSVInput(input_name=tmpdir.strpath)
139128
csvinput.encoding = 'latin-1'
140129
csvinput.read_sheets()
141-
assert list(csvinput.get_main_sheet_lines()) == \
130+
assert list(csvinput.get_sheet_lines('main')) == \
142131
[{'colA': 'é'}]
143-
assert csvinput.sub_sheet_names == []
132+
assert csvinput.sub_sheet_names == ['main']
144133

145134
@pytest.mark.xfail(
146135
sys.version_info < (3, 0),
147136
reason='Python 2 CSV readers does not support UTF-16 (or any encodings with null bytes')
148137
def test_csv_input_utf16(self, tmpdir):
149138
main = tmpdir.join('main.csv')
150139
main.write_text('colA\néαГ😼𝒞人', encoding='utf16')
151-
csvinput = CSVInput(input_name=tmpdir.strpath, main_sheet_name='main')
140+
csvinput = CSVInput(input_name=tmpdir.strpath)
152141
csvinput.encoding = 'utf16'
153142
csvinput.read_sheets()
154-
assert list(csvinput.get_main_sheet_lines()) == \
143+
assert list(csvinput.get_sheet_lines('main')) == \
155144
[{'colA': 'éαГ😼𝒞人'}]
156-
assert csvinput.sub_sheet_names == []
145+
assert csvinput.sub_sheet_names == ['main']
157146

158147
def test_xlsx_input_utf8(self):
159148
"""This is an xlsx file saved by OpenOffice. It seems to use UTF8 internally."""
160-
xlsxinput = XLSXInput(input_name='flattentool/tests/fixtures/xlsx/unicode.xlsx', main_sheet_name='main')
149+
csvinput = XLSXInput(input_name='flattentool/tests/fixtures/xlsx/unicode.xlsx')
161150

162-
xlsxinput.read_sheets()
163-
assert list(xlsxinput.get_main_sheet_lines())[0]['id'] == 'éαГ😼𝒞人'
151+
csvinput.read_sheets()
152+
assert list(csvinput.get_sheet_lines('main'))[0]['id'] == 'éαГ😼𝒞人'
164153

165154

166155
def test_convert_type(recwarn):

flattentool/tests/test_input_SpreadsheetInput_unflatten.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -521,13 +521,11 @@ def test_unflatten(convert_titles, use_schema, root_id, root_id_kwargs, input_li
521521
inject_root_id(root_id, input_row) for input_row in input_list
522522
]
523523
},
524-
main_sheet_name='custom_main',
525524
**extra_kwargs)
526525
spreadsheet_input.read_sheets()
527526

528527
parser = SchemaParser(
529528
root_schema_dict=create_schema(root_id) if use_schema else {"properties": {}},
530-
main_sheet_name='custom_main',
531529
root_id=root_id,
532530
rollup=True
533531
)

0 commit comments

Comments
 (0)