Skip to content

Commit 7b2f4d4

Browse files
authored
Merge pull request #107 from OpenDataServices/96-deprecate-main-sheet-name
Changes to cli and Python function interface
2 parents 03c36c5 + 4e07b58 commit 7b2f4d4

11 files changed

+118
-173
lines changed

README.md

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,8 @@ Can be converted to/from a spreadsheet like [examples/simple/main.csv](examples/
5656
Using the commands:
5757

5858
```
59-
flatten-tool unflatten -f csv examples/simple --main-sheet-name main --root-id='' -o examples/simple.json
60-
flatten-tool flatten -f csv examples/simple.json --main-sheet-name main --root-list-path main -o examples/simple
59+
flatten-tool unflatten -f csv examples/simple --root-id='' -o examples/simple.json
60+
flatten-tool flatten -f csv examples/simple.json -o examples/simple
6161
```
6262

6363

@@ -123,8 +123,8 @@ These are also the spreadsheets that flatten-tool's `flatten` (JSON to Spreadshe
123123
Commands used to generate this:
124124

125125
```
126-
flatten-tool unflatten -f csv examples/array_multisheet --main-sheet-name main --root-id='' -o examples/array_multisheet.json
127-
flatten-tool flatten -f csv examples/array.json --main-sheet-name main --root-list-path main -o examples/array_multisheet
126+
flatten-tool unflatten -f csv examples/array_multisheet --root-id='' -o examples/array_multisheet.json
127+
flatten-tool flatten -f csv examples/array.json -o examples/array_multisheet
128128
```
129129

130130
However, there are other "shapes" of spreadsheet that can produce the same JSON.
@@ -137,7 +137,7 @@ New columns for each item of the array:
137137
|7|8|9|10|11|12|
138138

139139
```
140-
flatten-tool unflatten -f csv examples/array_pointer --main-sheet-name main --root-id='' -o examples/array.json
140+
flatten-tool unflatten -f csv examples/array_pointer --root-id='' -o examples/array.json
141141
```
142142

143143
Repeated rows:
@@ -151,7 +151,7 @@ Repeated rows:
151151

152152

153153
```
154-
flatten-tool unflatten -f csv examples/array_repeat_rows --main-sheet-name main --root-id='' -o examples/array.json
154+
flatten-tool unflatten -f csv examples/array_repeat_rows --root-id='' -o examples/array.json
155155
```
156156

157157

@@ -367,20 +367,20 @@ And populate this with the package information for your release.
367367

368368
Then, for a populated xlsx template in (in release_populated.xlsx):
369369

370-
flatten-tool unflatten release_populated.xlsx --base-json base.json --input-format xlsx --output-name release.json
370+
flatten-tool unflatten release_populated.xlsx --base-json base.json --input-format xlsx --output-name release.json --root-list-path='releases'
371371

372372
Or for populated CSV files (in the release_populated directory):
373373

374-
flatten-tool unflatten release_populated --base-json base.json --input-format csv --output-name release.json
374+
flatten-tool unflatten release_populated --base-json base.json --input-format csv --output-name release.json --root-list-path='releases'
375375

376376
These produce a release.json file based on the data in the spreadsheets.
377377

378378

379379
### Converting a JSON file to a spreadsheet
380380

381-
flatten-tool flatten input.json --main-sheet-name releases --output-name unflattened
381+
flatten-tool flatten input.json --main-sheet-name releases --output-name flattened --root-list-path='releases'
382382

383-
This will create `unflattened.xlsx` and a `unflattened/` directory of csv files.
383+
This will create `flattened.xlsx` and a `flattened/` directory of csv files.
384384

385385
## Usage for 360Giving
386386

@@ -392,7 +392,7 @@ to the current directory.
392392

393393
flatten-tool create-template --root-id='' --output-format all --output-name 360giving-template --schema 360-giving-schema.json --main-sheet-name grants --rollup --use-titles
394394

395-
flatten-tool unflatten --root-id='' -o out.json -f xlsx --main-sheet-name=grants input.xlsx --schema 360-giving-schema.json --convert-titles
395+
flatten-tool unflatten --root-id='' -o out.json -f xlsx input.xlsx --schema 360-giving-schema.json --convert-titles --root-list-path='grants'
396396

397397

398398
Running the tests

flattentool/__init__.py

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,15 @@
99
from collections import OrderedDict
1010

1111

12-
def create_template(schema, output_name='releases', output_format='all', main_sheet_name='main', flatten=False, rollup=False, root_id='ocid', use_titles=False, **_):
12+
def create_template(schema, output_name='template', output_format='all', main_sheet_name='main', flatten=False, rollup=False, root_id='ocid', use_titles=False, **_):
1313
"""
1414
Creates template file(s) from given inputs
1515
This function is built to deal with commandline input and arguments
1616
but to also be called from elswhere in future
1717
1818
"""
1919

20-
parser = SchemaParser(schema_filename=schema, main_sheet_name=main_sheet_name, rollup=rollup, root_id=root_id, use_titles=use_titles)
20+
parser = SchemaParser(schema_filename=schema, rollup=rollup, root_id=root_id, use_titles=use_titles)
2121
parser.parse()
2222

2323
def spreadsheet_output(spreadsheet_output_class, name):
@@ -38,7 +38,7 @@ def spreadsheet_output(spreadsheet_output_class, name):
3838
raise Exception('The requested format is not available')
3939

4040

41-
def flatten(input_name, schema=None, output_name='releases', output_format='all', main_sheet_name='main', root_list_path='releases', rollup=False, root_id='ocid', use_titles=False, **_):
41+
def flatten(input_name, schema=None, output_name='flattened', output_format='all', main_sheet_name='main', root_list_path='main', rollup=False, root_id='ocid', use_titles=False, **_):
4242
"""
4343
Flatten a nested structure (JSON) to a flat structure (spreadsheet - csv or xlsx).
4444
@@ -49,16 +49,14 @@ def flatten(input_name, schema=None, output_name='releases', output_format='all'
4949
schema_filename=schema,
5050
rollup=rollup,
5151
root_id=root_id,
52-
use_titles=use_titles,
53-
main_sheet_name=main_sheet_name)
52+
use_titles=use_titles)
5453
schema_parser.parse()
5554
else:
5655
schema_parser = None
5756
parser = JSONParser(
5857
json_filename=input_name,
5958
root_list_path=root_list_path,
6059
schema_parser=schema_parser,
61-
main_sheet_name=main_sheet_name,
6260
root_id=root_id,
6361
use_titles=use_titles)
6462
parser.parse()
@@ -102,8 +100,8 @@ def decimal_default(o):
102100
raise TypeError(repr(o) + " is not JSON serializable")
103101

104102

105-
def unflatten(input_name, base_json=None, input_format=None, output_name='releases.json',
106-
main_sheet_name='releases', encoding='utf8', timezone_name='UTC',
103+
def unflatten(input_name, base_json=None, input_format=None, output_name='unflattened.json',
104+
root_list_path='main', encoding='utf8', timezone_name='UTC',
107105
root_id='ocid', schema='', convert_titles=False, cell_source_map=None,
108106
heading_source_map=None, **_):
109107
"""
@@ -119,11 +117,11 @@ def unflatten(input_name, base_json=None, input_format=None, output_name='releas
119117
spreadsheet_input = spreadsheet_input_class(
120118
input_name=input_name,
121119
timezone_name=timezone_name,
122-
main_sheet_name=main_sheet_name,
120+
root_list_path=root_list_path,
123121
root_id=root_id,
124122
convert_titles=convert_titles)
125123
if schema:
126-
parser = SchemaParser(schema_filename=schema, main_sheet_name=main_sheet_name, rollup=True, root_id=root_id)
124+
parser = SchemaParser(schema_filename=schema, rollup=True, root_id=root_id)
127125
parser.parse()
128126
spreadsheet_input.parser = parser
129127
spreadsheet_input.encoding = encoding
@@ -135,7 +133,7 @@ def unflatten(input_name, base_json=None, input_format=None, output_name='releas
135133
base = OrderedDict()
136134
if WITH_CELLS:
137135
result, cell_source_map_data, heading_source_map_data = spreadsheet_input.fancy_unflatten()
138-
base[main_sheet_name] = list(result)
136+
base[root_list_path] = list(result)
139137
with codecs.open(output_name, 'w', encoding='utf-8') as fp:
140138
json.dump(base, fp, indent=4, default=decimal_default, ensure_ascii=False)
141139
if cell_source_map:
@@ -146,7 +144,7 @@ def unflatten(input_name, base_json=None, input_format=None, output_name='releas
146144
json.dump(heading_source_map_data, fp, indent=4, default=decimal_default, ensure_ascii=False)
147145
else:
148146
result = spreadsheet_input.unflatten()
149-
base[main_sheet_name] = list(result)
147+
base[root_list_path] = list(result)
150148
with codecs.open(output_name, 'w', encoding='utf-8') as fp:
151149
json.dump(base, fp, indent=4, default=decimal_default, ensure_ascii=False)
152150

flattentool/cli.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def create_parser():
7676
help="Name of the outputted file. Will have an extension appended if format is all.")
7777
parser_flatten.add_argument(
7878
"--root-list-path",
79-
help="Path of the root list, defaults to releases")
79+
help="Path of the root list, defaults to main")
8080
parser_flatten.add_argument(
8181
"--rollup",
8282
action='store_true',
@@ -101,16 +101,16 @@ def create_parser():
101101
required=True)
102102
parser_unflatten.add_argument(
103103
"-b", "--base-json",
104-
help="A base json file to populate the releases key in.")
104+
help="A base json file to populate with the unflattened data.")
105105
parser_unflatten.add_argument(
106-
"-m", "--main-sheet-name",
107-
help="The name of the main sheet. Defaults to releases")
106+
"-m", "--root-list-path",
107+
help="The path in the JSON that will contain the unflattened list. Defaults to main.")
108108
parser_unflatten.add_argument(
109109
"-e", "--encoding",
110110
help="Encoding of the input file(s) (only relevant for CSV). Defaults to utf8.")
111111
parser_unflatten.add_argument(
112112
"-o", "--output-name",
113-
help="Name of the outputted file. Will have an extension appended as appropriate. Defaults to releases")
113+
help="Name of the outputted file. Will have an extension appended as appropriate. Defaults to unflattened.json")
114114
parser_unflatten.add_argument(
115115
"-c", "--cell-source-map",
116116
help="Path to write a cell source map to. Will have an extension appended as appropriate.")

flattentool/input.py

Lines changed: 5 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -143,21 +143,15 @@ def convert_dict_titles(self, dicts, title_lookup=None):
143143
else:
144144
yield d
145145

146-
def __init__(self, input_name='', main_sheet_name='', timezone_name='UTC', root_id='ocid', convert_titles=False):
146+
def __init__(self, input_name='', root_list_path='main', timezone_name='UTC', root_id='ocid', convert_titles=False):
147147
self.input_name = input_name
148-
self.main_sheet_name = main_sheet_name
148+
self.root_list_path = root_list_path
149149
self.sub_sheet_names = []
150150
self.timezone = pytz.timezone(timezone_name)
151151
self.root_id = root_id
152152
self.convert_titles = convert_titles
153153
self.parser = None
154154

155-
def get_main_sheet_lines(self):
156-
if self.convert_titles:
157-
return self.convert_dict_titles(self.get_sheet_lines(self.main_sheet_name))
158-
else:
159-
return self.get_sheet_lines(self.main_sheet_name)
160-
161155
def get_sub_sheets_lines(self):
162156
for sub_sheet_name in self.sub_sheet_names:
163157
if self.convert_titles:
@@ -189,8 +183,7 @@ def convert_types(self, in_dict):
189183

190184
def do_unflatten(self):
191185
main_sheet_by_ocid = OrderedDict()
192-
# Eventually we should get rid of the concept of a "main sheet entirely"
193-
sheets = [(self.main_sheet_name, self.get_main_sheet_lines())] + list(self.get_sub_sheets_lines())
186+
sheets = list(self.get_sub_sheets_lines())
194187
for i, sheet in enumerate(sheets):
195188
sheet_name, lines = sheet
196189
try:
@@ -251,7 +244,7 @@ def fancy_unflatten(self):
251244
raise Exception('Can only do a fancy_unflatten() if WITH_CELLS=True')
252245
cell_tree = self.do_unflatten()
253246
result = extract_list_to_value(cell_tree)
254-
cell_source_map = extract_list_to_error_path([self.main_sheet_name.lower()], cell_tree)
247+
cell_source_map = extract_list_to_error_path([self.root_list_path], cell_tree)
255248
ordered_items = sorted(cell_source_map.items())
256249
ordered_cell_source_map = OrderedDict(( '/'.join(str(x) for x in path), location) for path, location in ordered_items)
257250
row_source_map = OrderedDict()
@@ -355,10 +348,6 @@ def get_sheet_headings(self, sheet_name):
355348

356349
def read_sheets(self):
357350
sheet_file_names = os.listdir(self.input_name)
358-
if self.main_sheet_name+'.csv' not in sheet_file_names:
359-
raise ValueError('Main sheet "{}.csv" not found.'.format(self.main_sheet_name))
360-
sheet_file_names.remove(self.main_sheet_name+'.csv')
361-
362351
self.sub_sheet_names = sorted([fname[:-4] for fname in sheet_file_names if fname.endswith('.csv')])
363352

364353
def get_sheet_lines(self, sheet_name):
@@ -380,17 +369,9 @@ class XLSXInput(SpreadsheetInput):
380369
def read_sheets(self):
381370
self.workbook = openpyxl.load_workbook(self.input_name, data_only=True)
382371

383-
self.sheet_names_map = {sheet_name: sheet_name for sheet_name in self.workbook.get_sheet_names()}
384-
# allow main sheet to be any case
385-
for sheet_name in list(self.sheet_names_map):
386-
if sheet_name.lower() == self.main_sheet_name.lower():
387-
self.sheet_names_map.pop(sheet_name)
388-
self.sheet_names_map[self.main_sheet_name] = sheet_name
372+
self.sheet_names_map = OrderedDict((sheet_name, sheet_name) for sheet_name in self.workbook.get_sheet_names())
389373

390374
sheet_names = list(self.sheet_names_map.keys())
391-
if self.main_sheet_name not in sheet_names:
392-
raise ValueError('Main sheet "{}" not found in workbook.'.format(self.main_sheet_name))
393-
sheet_names.remove(self.main_sheet_name)
394375
self.sub_sheet_names = sheet_names
395376

396377
def get_sheet_headings(self, sheet_name):

flattentool/json_input.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,9 @@ class JSONParser(object):
4747
# Named for consistency with schema.SchemaParser, but not sure it's the most appropriate name.
4848
# Similarily with methods like parse_json_dict
4949

50-
def __init__(self, json_filename=None, root_json_dict=None, main_sheet_name='main', schema_parser=None, root_list_path=None, root_id='ocid', use_titles=False):
50+
def __init__(self, json_filename=None, root_json_dict=None, schema_parser=None, root_list_path=None, root_id='ocid', use_titles=False):
5151
self.sub_sheets = {}
5252
self.main_sheet = Sheet()
53-
self.main_sheet_name = main_sheet_name
5453
self.root_list_path = root_list_path
5554
self.root_id = root_id
5655
self.use_titles = use_titles

flattentool/schema.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,10 @@ def __contains__(self, key):
6262
class SchemaParser(object):
6363
"""Parse the fields of a JSON schema into a flattened structure."""
6464

65-
def __init__(self, schema_filename=None, root_schema_dict=None, main_sheet_name='main', rollup=False, root_id='ocid', use_titles=False):
65+
def __init__(self, schema_filename=None, root_schema_dict=None, rollup=False, root_id='ocid', use_titles=False):
6666
self.sub_sheets = {}
6767
self.main_sheet = Sheet()
6868
self.sub_sheet_mapping = {}
69-
self.main_sheet_name = main_sheet_name
7069
self.rollup = rollup
7170
self.root_id = root_id
7271
self.use_titles = use_titles

0 commit comments

Comments
 (0)