8
8
import sys
9
9
from decimal import Decimal , InvalidOperation
10
10
import os
11
+ import codecs
11
12
from collections import OrderedDict
13
+
12
14
import openpyxl
13
15
from six import text_type
14
16
from warnings import warn
15
17
import traceback
16
18
import datetime
19
+ import json
17
20
import pytz
18
21
from openpyxl .utils import _get_column_letter , column_index_from_string
22
+ from flattentool .lib import decimal_default , Cell
23
+ import tempfile
19
24
20
25
WITH_CELLS = True
21
26
22
- class Cell :
23
- def __init__ (self , cell_value , cell_location ):
24
- self .cell_value = cell_value
25
- self .cell_location = cell_location
26
- self .sub_cells = []
27
27
28
28
# The "pylint: disable" lines exist to ignore warnings about the imports we expect not to work not working
29
29
@@ -238,26 +238,41 @@ def inthere(unflattened, id_name):
238
238
else :
239
239
main_sheet_by_ocid [root_id_or_none ].append (unflattened )
240
240
temporarydicts_to_lists (main_sheet_by_ocid )
241
+
241
242
return sum (main_sheet_by_ocid .values (), [])
242
243
244
+
243
245
def unflatten (self ):
244
- result = self .do_unflatten ()
245
246
if WITH_CELLS :
246
- result = extract_list_to_value (result )
247
- return result
247
+ tmp_directory = tempfile .mkdtemp ()
248
+ file_name = os .path .join (tmp_directory , 'unflattened.json' )
249
+ self .results_from_cell_tree ({}, 'main' , file_name )
250
+ with open (file_name ) as unflattened :
251
+ return json .load (unflattened , object_pairs_hook = OrderedDict )['main' ]
252
+ return self .do_unflatten ()
253
+
254
+
255
+ def extract_error_path (self , cell_tree ):
256
+ return sorted (extract_list_to_error_path ([self .main_sheet_name .lower ()], cell_tree ).items ())
257
+
248
258
249
- def fancy_unflatten (self ):
259
+ def results_from_cell_tree (self , base , main_sheet_name , output_name ):
260
+ cell_tree = self .do_unflatten ()
261
+ base [main_sheet_name ] = cell_tree
262
+ with codecs .open (output_name , 'w' , encoding = 'utf-8' ) as fp :
263
+ json .dump (base , fp , indent = 4 , default = decimal_default , ensure_ascii = False )
264
+ return self .extract_error_path (cell_tree )
265
+
266
+
267
+ def fancy_unflatten (self , base , main_sheet_name , output_name , cell_source_map , heading_source_map ):
250
268
if not WITH_CELLS :
251
269
raise Exception ('Can only do a fancy_unflatten() if WITH_CELLS=True' )
252
- cell_tree = self .do_unflatten ()
253
- result = extract_list_to_value (cell_tree )
254
- cell_source_map = extract_list_to_error_path ([self .main_sheet_name .lower ()], cell_tree )
255
- ordered_items = sorted (cell_source_map .items ())
256
- ordered_cell_source_map = OrderedDict (( '/' .join (str (x ) for x in path ), location ) for path , location in ordered_items )
270
+ ordered_items = self .results_from_cell_tree (base , main_sheet_name , output_name )
271
+ if not cell_source_map and not heading_source_map :
272
+ return
257
273
row_source_map = OrderedDict ()
258
- heading_source_map = OrderedDict ()
259
- for path , _ in ordered_items :
260
- cells = cell_source_map [path ]
274
+ heading_source_map_data = OrderedDict ()
275
+ for path , cells in ordered_items :
261
276
# Prepare row_source_map key
262
277
key = '/' .join (str (x ) for x in path [:- 1 ])
263
278
if not key in row_source_map :
@@ -270,19 +285,28 @@ def fancy_unflatten(self):
270
285
except :
271
286
header_path_parts .append (x )
272
287
header_path = '/' .join (header_path_parts )
273
- if header_path not in heading_source_map :
274
- heading_source_map [header_path ] = []
288
+ if header_path not in heading_source_map_data :
289
+ heading_source_map_data [header_path ] = []
275
290
# Populate the row and header source maps
276
291
for cell in cells :
277
292
sheet , col , row , header = cell
278
293
if (sheet , row ) not in row_source_map [key ]:
279
294
row_source_map [key ].append ((sheet , row ))
280
- if (sheet , header ) not in heading_source_map [header_path ]:
281
- heading_source_map [header_path ].append ((sheet , header ))
295
+ if (sheet , header ) not in heading_source_map_data [header_path ]:
296
+ heading_source_map_data [header_path ].append ((sheet , header ))
282
297
for key in row_source_map :
283
- assert key not in ordered_cell_source_map , 'Row/cell collision: {}' .format (key )
284
- ordered_cell_source_map [key ] = row_source_map [key ]
285
- return result , ordered_cell_source_map , heading_source_map
298
+ ordered_items .append ((key .split ('/' ), row_source_map [key ]))
299
+
300
+ if cell_source_map :
301
+ with codecs .open (cell_source_map , 'w' , encoding = 'utf-8' ) as fp :
302
+ json .dump (
303
+ OrderedDict (( '/' .join (str (x ) for x in path ), location ) for path , location in ordered_items ),
304
+ fp , default = decimal_default , ensure_ascii = False , indent = 4
305
+ )
306
+ if heading_source_map :
307
+ with codecs .open (heading_source_map , 'w' , encoding = 'utf-8' ) as fp :
308
+ json .dump (heading_source_map_data , fp , indent = 4 , default = decimal_default , ensure_ascii = False )
309
+
286
310
287
311
def extract_list_to_error_path (path , input ):
288
312
output = {}
@@ -317,24 +341,6 @@ def extract_dict_to_error_path(path, input):
317
341
raise Exception ('Unexpected result type in the JSON cell tree: {}' .format (input [k ]))
318
342
return output
319
343
320
- def extract_list_to_value (input ):
321
- output = []
322
- for item in input :
323
- output .append (extract_dict_to_value (item ))
324
- return output
325
-
326
- def extract_dict_to_value (input ):
327
- output = OrderedDict ()
328
- for k in input :
329
- if isinstance (input [k ], list ):
330
- output [k ] = extract_list_to_value (input [k ])
331
- elif isinstance (input [k ], dict ):
332
- output [k ] = extract_dict_to_value (input [k ])
333
- elif isinstance (input [k ], Cell ):
334
- output [k ] = input [k ].cell_value
335
- else :
336
- raise Exception ('Unexpected result type in the JSON cell tree: {}' .format (input [k ]))
337
- return output
338
344
339
345
class CSVInput (SpreadsheetInput ):
340
346
encoding = 'utf-8'
@@ -557,6 +563,7 @@ def path_search(nested_dict, path_list, id_fields=None, path=None, top=False, to
557
563
558
564
559
565
class TemporaryDict (UserDict ):
566
+ __slots__ = ['keyfield' , 'items_no_keyfield' , 'data' , 'top_sheet' ]
560
567
def __init__ (self , keyfield , top_sheet = False ):
561
568
self .keyfield = keyfield
562
569
self .items_no_keyfield = []
0 commit comments