15
15
from flattentool .sheet import Sheet
16
16
from warnings import warn
17
17
import codecs
18
+ import xmltodict
18
19
19
20
BASIC_TYPES = [six .text_type , bool , int , Decimal , type (None )]
20
21
@@ -45,12 +46,15 @@ class JSONParser(object):
45
46
# Named for consistency with schema.SchemaParser, but not sure it's the most appropriate name.
46
47
# Similarily with methods like parse_json_dict
47
48
48
- def __init__ (self , json_filename = None , root_json_dict = None , schema_parser = None , root_list_path = None , root_id = 'ocid' , use_titles = False ):
49
+ def __init__ (self , json_filename = None , root_json_dict = None , schema_parser = None , root_list_path = None ,
50
+ root_id = 'ocid' , use_titles = False , xml = False , id_name = 'id' ):
49
51
self .sub_sheets = {}
50
52
self .main_sheet = Sheet ()
51
53
self .root_list_path = root_list_path
52
54
self .root_id = root_id
53
55
self .use_titles = use_titles
56
+ self .id_name = id_name
57
+ self .xml = xml
54
58
if schema_parser :
55
59
self .main_sheet = schema_parser .main_sheet
56
60
self .sub_sheets = schema_parser .sub_sheets
@@ -60,6 +64,18 @@ def __init__(self, json_filename=None, root_json_dict=None, schema_parser=None,
60
64
else :
61
65
self .rollup = False
62
66
67
+ if self .xml :
68
+ with codecs .open (json_filename , 'rb' ) as xml_file :
69
+ top_dict = xmltodict .parse (
70
+ xml_file ,
71
+ force_list = (root_list_path ,),
72
+ force_cdata = True ,
73
+ )
74
+ # AFAICT, this should be true for *all* XML files
75
+ assert len (top_dict ) == 1
76
+ root_json_dict = list (top_dict .values ())[0 ]
77
+ json_filename = None
78
+
63
79
if json_filename is None and root_json_dict is None :
64
80
raise ValueError ('Etiher json_filename or root_json_dict must be supplied' )
65
81
@@ -81,6 +97,10 @@ def parse(self):
81
97
else :
82
98
root_json_list = path_search (self .root_json_dict , self .root_list_path .split ('/' ))
83
99
for json_dict in root_json_list :
100
+ if json_dict is None :
101
+ # This is particularly useful for IATI XML, in order to not
102
+ # fallover on empty activity, e.g. <iati-activity/>
103
+ continue
84
104
self .parse_json_dict (json_dict , sheet = self .main_sheet )
85
105
86
106
def parse_json_dict (self , json_dict , sheet , json_key = None , parent_name = '' , flattened_dict = None , parent_id_fields = None , top_level_of_sub_sheet = False ):
@@ -109,17 +129,24 @@ def parse_json_dict(self, json_dict, sheet, json_key=None, parent_name='', flatt
109
129
if top_level_of_sub_sheet :
110
130
# Only add the IDs for the top level of object in an array
111
131
for k , v in parent_id_fields .items ():
112
- flattened_dict [sheet_key (sheet , k )] = v
132
+ if self .xml :
133
+ flattened_dict [sheet_key (sheet , k )] = v ['#text' ]
134
+ else :
135
+ flattened_dict [sheet_key (sheet , k )] = v
113
136
114
137
if self .root_id and self .root_id in json_dict :
115
138
parent_id_fields [sheet_key (sheet , self .root_id )] = json_dict [self .root_id ]
116
139
117
- if 'id' in json_dict :
118
- parent_id_fields [sheet_key (sheet , parent_name + 'id' )] = json_dict ['id' ]
140
+ if self . id_name in json_dict :
141
+ parent_id_fields [sheet_key (sheet , parent_name + self . id_name )] = json_dict [self . id_name ]
119
142
120
143
121
144
for key , value in json_dict .items ():
122
145
if type (value ) in BASIC_TYPES :
146
+ if self .xml and key == '#text' :
147
+ # Handle the text output from xmltodict
148
+ key = ''
149
+ parent_name = parent_name .strip ('/' )
123
150
flattened_dict [sheet_key (sheet , parent_name + key )] = value
124
151
elif hasattr (value , 'items' ):
125
152
self .parse_json_dict (
0 commit comments