@@ -69,6 +69,31 @@ def convert_type(type_string, value, timezone = pytz.timezone('UTC')):
69
69
else :
70
70
raise ValueError ('Unrecognised type: "{}"' .format (type_string ))
71
71
72
+
73
+ def merge (base , mergee , debug_info = None ):
74
+ if not debug_info :
75
+ debug_info = {}
76
+ for key , value in mergee .items ():
77
+ if key in base :
78
+ if isinstance (value , TemporaryDict ):
79
+ for temporarydict_key , temporarydict_value in value .items ():
80
+ if temporarydict_key in base [key ]:
81
+ merge (base [key ][temporarydict_key ], temporarydict_value , debug_info )
82
+ else :
83
+ base [key ][temporarydict_key ] = temporarydict_value
84
+ for temporarydict_value in value .items_no_keyfield :
85
+ base [key ].items_no_keyfield .append (temporarydict_value )
86
+ elif isinstance (value , dict ) and isinstance (base [key ], dict ):
87
+ merge (base [key ], value , debug_info )
88
+ elif base [key ] != value :
89
+ id_info = 'id "{}"' .format (debug_info .get ('id' ))
90
+ if debug_info .get ('root_id' ):
91
+ id_info = '{} "{}", ' .format (debug_info .get ('root_id' ), debug_info .get ('root_id_or_none' ))+ id_info
92
+ warn ('Conflict when merging field "{}" for {} in sheet {}: "{}" != "{}". If you were not expecting merging you may have a duplicate ID.' .format (
93
+ key , id_info , debug_info .get ('sheet_name' ), base [key ], value ))
94
+ else :
95
+ base [key ] = value
96
+
72
97
class SpreadsheetInput (object ):
73
98
"""
74
99
Base class describing a spreadsheet input. Has stubs which are
@@ -133,76 +158,28 @@ def convert_types(self, in_dict):
133
158
134
159
def unflatten (self ):
135
160
main_sheet_by_ocid = OrderedDict ()
136
- for line in self .get_main_sheet_lines ():
137
- if all (x == '' for x in line .values ()):
138
- continue
139
- root_id_or_none = line [self .root_id ] if self .root_id else None
140
- if root_id_or_none not in main_sheet_by_ocid :
141
- main_sheet_by_ocid [root_id_or_none ] = TemporaryDict ('id' )
142
- if not self .parser :
143
- main_sheet_by_ocid [root_id_or_none ].append (unflatten_line (self .convert_types (line )))
144
- else :
145
- main_sheet_by_ocid [root_id_or_none ].append (unflatten_main_with_parser (self .parser , line , self .timezone ))
146
-
147
- for sheet_name , lines in self .get_sub_sheets_lines ():
148
- for i , line in enumerate (lines ):
149
- line_number = i + 2
150
- try :
151
- if all (x == '' for x in line .values ()):
152
- continue
153
- id_fields = {k : v for k , v in line .items () if
154
- k .split (':' )[0 ].endswith ('/id' ) and
155
- k .startswith (self .main_sheet_name )}
156
- line_without_id_fields = OrderedDict (
157
- (k , v ) for k , v in line .items ()
158
- if k not in id_fields and (not k or k != self .root_id ))
159
- raw_id_fields_with_values = {k .split (':' )[0 ]: v for k , v in id_fields .items () if v }
160
- if not raw_id_fields_with_values :
161
- warn ('Line {} of sheet {} has no parent id fields populated,'
162
- 'skipping.' .format (line_number , sheet_name ))
163
- continue
164
- sheet_context_names = {k .split (':' )[0 ]: k .split (':' )[1 ] if len (k .split (':' )) > 1 else None
165
- for k , v in id_fields .items () if v }
166
-
167
- try :
168
- id_field = find_deepest_id_field (raw_id_fields_with_values )
169
- except ConflictingIDFieldsError :
170
- warn ('Multiple conflicting ID fields have been filled in on line {} of sheet {},'
171
- 'skipping that line.' .format (line_number , sheet_name ))
172
- continue
173
-
174
- try :
175
- context = path_search (
176
- {self .main_sheet_name : main_sheet_by_ocid [line [self .root_id ] if self .root_id else None ]},
177
- id_field .split ('/' )[:- 1 ],
178
- id_fields = raw_id_fields_with_values ,
179
- top = True
180
- )
181
- except IDFieldMissing as e :
182
- warn ('The parent id field "{}" was expected, but not present on line {} of sheet {}.' .format (
183
- e .args [0 ], line_number , sheet_name ))
184
- continue
185
-
186
- sheet_context_name = sheet_context_names [id_field ] or sheet_name
187
- # Added the following line to support the usecase in test_nested_sub_sheet
188
- context = path_search (context , sheet_context_name .split ('/' )[:- 1 ])
189
- unflattened = unflatten_line (self .convert_types (line_without_id_fields ))
190
- sheet_context_base_name = sheet_context_name .split ('/' )[- 1 ]
191
- if sheet_context_base_name not in context :
192
- context [sheet_context_base_name ] = TemporaryDict (keyfield = 'id' )
193
- elif context [sheet_context_base_name ].top_sheet :
194
- # Overwirte any rolled up data from the main sheet
195
- print (context [sheet_context_base_name ].data , unflattened )
196
- if context [sheet_context_base_name ].data .get (None ) != unflattened :
197
- warn ('Conflict between main sheet and sub sheet {}, using values from sub sheet' .format (sheet_context_base_name ))
198
- context [sheet_context_base_name ] = TemporaryDict (keyfield = 'id' )
199
- context [sheet_context_base_name ].append (unflattened )
200
- except Exception as e : # pylint: disable=W0703
201
- # Deliberately catch all exceptions for a line, so that
202
- # all lines without exceptions will still be processed.
203
- print ('An error occured whilst parsing line {} of sheet {}"' .format (line_number , sheet_name ))
204
- traceback .print_exc ()
205
- sys .exit ()
161
+ # Eventually we should get rid of the concept of a "main sheet entirely"
162
+ for sheet_name , lines in [(self .main_sheet_name , self .get_main_sheet_lines ())] + list (self .get_sub_sheets_lines ()):
163
+ for line in lines :
164
+ if all (x == '' for x in line .values ()):
165
+ continue
166
+ root_id_or_none = line [self .root_id ] if self .root_id else None
167
+ unflattened = unflatten_main_with_parser (self .parser , line , self .timezone )
168
+ if root_id_or_none not in main_sheet_by_ocid :
169
+ main_sheet_by_ocid [root_id_or_none ] = TemporaryDict ('id' )
170
+ if 'id' in unflattened and unflattened ['id' ] in main_sheet_by_ocid [root_id_or_none ]:
171
+ merge (
172
+ main_sheet_by_ocid [root_id_or_none ][unflattened .get ('id' )],
173
+ unflattened ,
174
+ {
175
+ 'sheet_name' : sheet_name ,
176
+ 'root_id' : self .root_id ,
177
+ 'root_id_or_none' : root_id_or_none ,
178
+ 'id' : unflattened .get ('id' )
179
+ }
180
+ )
181
+ else :
182
+ main_sheet_by_ocid [root_id_or_none ].append (unflattened )
206
183
207
184
temporarydicts_to_lists (main_sheet_by_ocid )
208
185
@@ -301,7 +278,7 @@ def list_as_dicts_to_temporary_dicts(unflattened):
301
278
302
279
303
280
def unflatten_main_with_parser (parser , line , timezone ):
304
- unflattened = {}
281
+ unflattened = OrderedDict ()
305
282
for path , value in line .items ():
306
283
if value is None or value == '' :
307
284
continue
@@ -311,7 +288,10 @@ def unflatten_main_with_parser(parser, line, timezone):
311
288
if isint (path_item ):
312
289
continue
313
290
path_till_now = '/' .join ([item for item in path_list [:num + 1 ] if not isint (item )])
314
- current_type = parser .flattened .get (path_till_now )
291
+ if parser :
292
+ current_type = parser .flattened .get (path_till_now )
293
+ else :
294
+ current_type = None
315
295
try :
316
296
next_path_item = path_list [num + 1 ]
317
297
except IndexError :
@@ -331,7 +311,7 @@ def unflatten_main_with_parser(parser, line, timezone):
331
311
current_path [path_item ] = list_as_dict
332
312
new_path = list_as_dict .get (list_index )
333
313
if new_path is None :
334
- new_path = {}
314
+ new_path = OrderedDict ()
335
315
list_as_dict [list_index ] = new_path
336
316
current_path = new_path
337
317
continue
@@ -340,7 +320,7 @@ def unflatten_main_with_parser(parser, line, timezone):
340
320
if current_type == 'object' or (not current_type and next_path_item ):
341
321
new_path = current_path .get (path_item )
342
322
if new_path is None :
343
- new_path = {}
323
+ new_path = OrderedDict ()
344
324
current_path [path_item ] = new_path
345
325
current_path = new_path
346
326
continue
0 commit comments