28
28
except ImportError :
29
29
from UserDict import UserDict # pylint: disable=F0401
30
30
31
+ def convert_type (type_string , value , timezone = pytz .timezone ('UTC' )):
32
+ if value == '' or value is None :
33
+ return None
34
+ if type_string == 'number' :
35
+ try :
36
+ return Decimal (value )
37
+ except (TypeError , ValueError , InvalidOperation ):
38
+ warn ('Non-numeric value "{}" found in number column, returning as string instead.' .format (value ))
39
+ return text_type (value )
40
+ elif type_string == 'integer' :
41
+ try :
42
+ return int (value )
43
+ except (TypeError , ValueError ):
44
+ warn ('Non-integer value "{}" found in integer column, returning as string instead.' .format (value ))
45
+ return text_type (value )
46
+ elif type_string == 'boolean' :
47
+ value = text_type (value )
48
+ if value .lower () in ['true' , '1' ]:
49
+ return True
50
+ elif value .lower () in ['false' , '0' ]:
51
+ return False
52
+ else :
53
+ warn ('Unrecognised value for boolean: "{}", returning as string instead' .format (value ))
54
+ return text_type (value )
55
+ elif type_string in ('array' , 'array_array' , 'string_array' ):
56
+ value = text_type (value )
57
+ if ',' in value :
58
+ return [x .split (',' ) for x in value .split (';' )]
59
+ else :
60
+ return value .split (';' )
61
+ elif type_string == 'string' :
62
+ if type (value ) == datetime .datetime :
63
+ return timezone .localize (value ).isoformat ()
64
+ return text_type (value )
65
+ elif type_string == '' :
66
+ if type (value ) == datetime .datetime :
67
+ return timezone .localize (value ).isoformat ()
68
+ return value if type (value ) in [int ] else text_type (value )
69
+ else :
70
+ raise ValueError ('Unrecognised type: "{}"' .format (type_string ))
31
71
32
72
class SpreadsheetInput (object ):
33
73
"""
@@ -79,56 +119,15 @@ def get_sheet_lines(self, sheet_name):
79
119
def read_sheets (self ):
80
120
raise NotImplementedError
81
121
82
- def convert_type (self , type_string , value ):
83
- if value == '' or value is None :
84
- return None
85
- if type_string == 'number' :
86
- try :
87
- return Decimal (value )
88
- except (TypeError , ValueError , InvalidOperation ):
89
- warn ('Non-numeric value "{}" found in number column, returning as string instead.' .format (value ))
90
- return text_type (value )
91
- elif type_string == 'integer' :
92
- try :
93
- return int (value )
94
- except (TypeError , ValueError ):
95
- warn ('Non-integer value "{}" found in integer column, returning as string instead.' .format (value ))
96
- return text_type (value )
97
- elif type_string == 'boolean' :
98
- value = text_type (value )
99
- if value .lower () in ['true' , '1' ]:
100
- return True
101
- elif value .lower () in ['false' , '0' ]:
102
- return False
103
- else :
104
- warn ('Unrecognised value for boolean: "{}", returning as string instead' .format (value ))
105
- return text_type (value )
106
- elif type_string == 'array' :
107
- value = text_type (value )
108
- if ',' in value :
109
- return [x .split (',' ) for x in value .split (';' )]
110
- else :
111
- return value .split (';' )
112
- elif type_string == 'string' :
113
- if type (value ) == datetime .datetime :
114
- return self .timezone .localize (value ).isoformat ()
115
- return text_type (value )
116
- elif type_string == '' :
117
- if type (value ) == datetime .datetime :
118
- return self .timezone .localize (value ).isoformat ()
119
- return value if type (value ) in [int ] else text_type (value )
120
- else :
121
- raise ValueError ('Unrecognised type: "{}"' .format (type_string ))
122
-
123
122
124
123
def convert_types (self , in_dict ):
125
124
out_dict = OrderedDict ()
126
125
for key , value in in_dict .items ():
127
126
parts = key .split (':' )
128
127
if len (parts ) > 1 :
129
- out_dict [parts [0 ]] = self . convert_type (parts [1 ], value )
128
+ out_dict [parts [0 ]] = convert_type (parts [1 ], value , self . timezone )
130
129
else :
131
- out_dict [parts [0 ]] = self . convert_type ('' , value )
130
+ out_dict [parts [0 ]] = convert_type ('' , value , self . timezone )
132
131
return out_dict
133
132
134
133
@@ -140,7 +139,10 @@ def unflatten(self):
140
139
root_id_or_none = line [self .root_id ] if self .root_id else None
141
140
if root_id_or_none not in main_sheet_by_ocid :
142
141
main_sheet_by_ocid [root_id_or_none ] = TemporaryDict ('id' )
143
- main_sheet_by_ocid [root_id_or_none ].append (unflatten_line (self .convert_types (line )))
142
+ if not self .parser :
143
+ main_sheet_by_ocid [root_id_or_none ].append (unflatten_line (self .convert_types (line )))
144
+ else :
145
+ main_sheet_by_ocid [root_id_or_none ].append (unflatten_main_with_parser (self .parser , line , self .timezone ))
144
146
145
147
for sheet_name , lines in self .get_sub_sheets_lines ():
146
148
for i , line in enumerate (lines ):
@@ -274,6 +276,86 @@ def unflatten_line(line):
274
276
path_search (unflattened , fields [:- 1 ], top_sheet = True )[fields [- 1 ]] = v
275
277
return unflattened
276
278
279
+ def isint (string ):
280
+ try :
281
+ int (string )
282
+ return True
283
+ except ValueError :
284
+ return False
285
+
286
+ class ListAsDict (dict ):
287
+ pass
288
+
289
+ def list_as_dicts_to_temporary_dicts (unflattened ):
290
+ for key , value in list (unflattened .items ()):
291
+ if hasattr (value , 'items' ):
292
+ if not value :
293
+ unflattened .pop (key )
294
+ list_as_dicts_to_temporary_dicts (value )
295
+ if isinstance (value , ListAsDict ):
296
+ temporarydict = TemporaryDict ("id" )
297
+ for index in sorted (value .keys ()):
298
+ temporarydict .append (value [index ])
299
+ unflattened [key ] = temporarydict
300
+ return unflattened
301
+
302
+
303
+ def unflatten_main_with_parser (parser , line , timezone ):
304
+ unflattened = {}
305
+ for path , value in line .items ():
306
+ if not value :
307
+ continue
308
+ current_path = unflattened
309
+ path_list = [item .rstrip ('[]' ) for item in path .split ('/' )]
310
+ for num , path_item in enumerate (path_list ):
311
+ if isint (path_item ):
312
+ continue
313
+ path_till_now = '/' .join ([item for item in path_list [:num + 1 ] if not isint (item )])
314
+ current_type = parser .flattened .get (path_till_now )
315
+ try :
316
+ next_path_item = path_list [num + 1 ]
317
+ except IndexError :
318
+ next_path_item = ''
319
+
320
+ ## Array
321
+ list_index = - 1
322
+ if isint (next_path_item ):
323
+ if current_type and current_type != 'array' :
324
+ raise ValueError ("There is an array at '{}' when the schema says there should be a '{}'" .format (path_till_now , current_type ))
325
+ list_index = int (next_path_item )
326
+
327
+ if isint (next_path_item ) or current_type == 'array' :
328
+ list_as_dict = current_path .get (path_item )
329
+ if list_as_dict is None :
330
+ list_as_dict = ListAsDict ()
331
+ current_path [path_item ] = list_as_dict
332
+ new_path = list_as_dict .get (list_index )
333
+ if new_path is None :
334
+ new_path = {}
335
+ list_as_dict [list_index ] = new_path
336
+ current_path = new_path
337
+ continue
338
+
339
+ ## Object
340
+ if current_type == 'object' or (not current_type and next_path_item ):
341
+ new_path = current_path .get (path_item )
342
+ if new_path is None :
343
+ new_path = {}
344
+ current_path [path_item ] = new_path
345
+ current_path = new_path
346
+ continue
347
+ if current_type and current_type != 'object' and next_path_item :
348
+ raise ValueError ("There is an object or list at '{}' but it should be an {}" .format (path_till_now , current_type ))
349
+
350
+ ## Other Types
351
+ converted_value = convert_type (current_type or '' , value , timezone )
352
+ if converted_value :
353
+ current_path [path_item ] = converted_value
354
+
355
+ unflattened = list_as_dicts_to_temporary_dicts (unflattened )
356
+ return unflattened
357
+
358
+
277
359
278
360
class IDFieldMissing (KeyError ):
279
361
pass
0 commit comments