2020from collections import Counter
2121from collections import OrderedDict
2222import io
23+ import json
2324
2425import attr
2526
4041def transform_csv_to_csv (location , output , transformer ):
4142 """
4243 Read a CSV file at `location` and write a new CSV file at `output`. Apply
43- transformations using the `transformer` Tranformer .
44+ transformations using the `transformer` Transformer .
4445 Return a list of Error objects.
4546 """
4647 if not transformer :
4748 raise ValueError ('Cannot transform without Transformer' )
4849
4950 rows = read_csv_rows (location )
5051
51- column_names , data , errors = transform_data (rows , transformer )
52+ column_names , data , errors = transform_csv (rows , transformer )
5253
5354 if errors :
5455 return errors
5556 else :
5657 write_csv (output , data , column_names )
5758 return []
5859
60+ def transform_json_to_json (location , output , transformer ):
61+ """
62+ Read a JSON file at `location` and write a new JSON file at `output`. Apply
63+ transformations using the `transformer` Transformer.
64+ Return a list of Error objects.
65+ """
66+ if not transformer :
67+ raise ValueError ('Cannot transform without Transformer' )
5968
60- def transform_data (rows , transformer ):
69+ data = read_json (location )
70+
71+ new_data , errors = transform_json (data , transformer )
72+
73+ if errors :
74+ return errors
75+ else :
76+ write_json (output , new_data )
77+ return []
78+
79+
80+ def transform_csv (rows , transformer ):
6181 """
6282 Read a list of list of CSV-like data `rows` and apply transformations using the
63- `transformer` Tranformer .
83+ `transformer` Transformer .
6484 Return a tuple of:
6585 ([column names...], [transformed ordered dict...], [Error objects..])
6686 """
@@ -90,12 +110,54 @@ def transform_data(rows, transformer):
90110 column_names = [c for c in column_names if c in transformer .column_filters ]
91111
92112 errors = transformer .check_required_columns (data )
93- if errors :
94- return column_names , data , errors
95113
96114 return column_names , data , errors
97115
98116
117+ def transform_json (data , transformer ):
118+ """
119+ Read a dictionary and apply transformations using the
120+ `transformer` Transformer.
121+ Return a new list of dictionary.
122+ """
123+
124+ if not transformer :
125+ return data
126+
127+ errors = []
128+ new_data = []
129+ renamings = transformer .column_renamings
130+ if isinstance (data , list ):
131+ for item in data :
132+ element , err = process_json_keys (item , renamings , transformer )
133+ for e in element :
134+ new_data .append (e )
135+ for e in err :
136+ errors .append (e )
137+ else :
138+ new_data , errors = process_json_keys (data , renamings , transformer )
139+
140+ return new_data , errors
141+
142+
143+ def process_json_keys (data , renamings , transformer ):
144+ o_dict = OrderedDict ()
145+ for k in data .keys ():
146+ if k in renamings .keys ():
147+ for r_key in renamings .keys ():
148+ if k == r_key :
149+ o_dict [renamings [r_key ]] = data [k ]
150+ else :
151+ o_dict [k ] = data [k ]
152+ new_data = [o_dict ]
153+
154+ if transformer .column_filters :
155+ new_data = list (transformer .filter_columns (new_data ))
156+
157+ errors = transformer .check_required_columns (new_data )
158+ return new_data , errors
159+
160+
99161tranformer_config_help = '''
100162A transform configuration file is used to describe which transformations and
101163validations to apply to a source CSV file. This is a simple text file using YAML
@@ -266,6 +328,15 @@ def read_csv_rows(location):
266328 yield row
267329
268330
331+ def read_json (location ):
332+ """
333+ Yield rows (as a list of values) from a CSV file at `location`.
334+ """
335+ with io .open (location , encoding = 'utf-8' , errors = 'replace' ) as jsonfile :
336+ data = json .load (jsonfile , object_pairs_hook = OrderedDict )
337+ return data
338+
339+
269340def write_csv (location , data , column_names ): # NOQA
270341 """
271342 Write a CSV file at `location` the `data` list of ordered dicts using the
@@ -275,3 +346,11 @@ def write_csv(location, data, column_names): # NOQA
275346 writer = csv .DictWriter (csvfile , fieldnames = column_names )
276347 writer .writeheader ()
277348 writer .writerows (data )
349+
350+
351+ def write_json (location , data ):
352+ """
353+ Write a JSON file at `location` the `data` list of ordered dicts.
354+ """
355+ with open (location , 'w' ) as jsonfile :
356+ json .dump (data , jsonfile , indent = 3 )
0 commit comments