YelpArchive · Ronyell · Nov 20, 2018
diff --git a/json_to_csv_converter.py b/json_to_csv_converter.py
@@ -1,18 +1,17 @@
 # -*- coding: utf-8 -*-
 """Convert the Yelp Dataset Challenge dataset from json format to csv.
-
 For more information on the Yelp Dataset Challenge please visit http://yelp.com/dataset_challenge
-
 """
 import argparse
 import collections
 import csv
 import simplejson as json
+import ast
 
 
 def read_and_write_file(json_file_path, csv_file_path, column_names):
     """Read in the json dataset file and write it out to a csv file, given the column names."""
-    with open(csv_file_path, 'wb+') as fout:
+    with open(csv_file_path, 'w') as fout:
         csv_file = csv.writer(fout)
         csv_file.writerow(list(column_names))
         with open(json_file_path) as fin:
@@ -33,25 +32,25 @@ def get_superset_of_column_names_from_file(json_file_path):
 
 def get_column_names(line_contents, parent_key=''):
     """Return a list of flattened key names given a dict.
-
     Example:
-
         line_contents = {
             'a': {
                 'b': 2,
                 'c': 3,
                 },
         }
-
         will return: ['a.b', 'a.c']
-
     These will be the column names for the eventual csv file.
-
     """
     column_names = []
-    for k, v in line_contents.iteritems():
+    for k, v in line_contents.items():
         column_name = "{0}.{1}".format(parent_key, k) if parent_key else k
-        if isinstance(v, collections.MutableMapping):
+        if isinstance(v, str):
+            try:
+                v = ast.literal_eval(v)
+            except:
+                pass
+        if isinstance(v, dict):
             column_names.extend(
                     get_column_names(v, column_name).items()
                     )
@@ -63,24 +62,27 @@ def get_nested_value(d, key):
     """Return a dictionary item given a dictionary `d` and a flattened key from `get_column_names`.
 
     Example:
-
         d = {
             'a': {
                 'b': 2,
                 'c': 3,
                 },
         }
         key = 'a.b'
-
         will return: 2
 
     """
+    if isinstance(d, str):
+        try:
+            d = ast.literal_eval(d)
+        except:
+            pass
     if '.' not in key:
-        if key not in d:
+        if not d or key not in d:
             return None
         return d[key]
     base_key, sub_key = key.split('.', 1)
-    if base_key not in d:
+    if not d or base_key not in d:
         return None
     sub_dict = d[base_key]
     return get_nested_value(sub_dict, sub_key)
@@ -93,8 +95,8 @@ def get_row(line_contents, column_names):
                         line_contents,
                         column_name,
                         )
-        if isinstance(line_value, unicode):
-            row.append('{0}'.format(line_value.encode('utf-8')))
+        if isinstance(line_value, str):
+            row.append('{0}'.format(line_value))
         elif line_value is not None:
             row.append('{0}'.format(line_value))
         else:
@@ -121,3 +123,5 @@ def get_row(line_contents, column_names):
 
     column_names = get_superset_of_column_names_from_file(json_file)
     read_and_write_file(json_file, csv_file, column_names)
+
+