[#65] Add utf-8 support to JSON input and CSV output

Bjwebb · Bjwebb · commit 61d8404b444f · 2015-08-03T16:40:47.000+01:00
diff --git a/flattentool/json_input.py b/flattentool/json_input.py
@@ -14,6 +14,7 @@
 from flattentool.input import path_search
 from flattentool.sheet import Sheet
 from warnings import warn
+import codecs
 
 BASIC_TYPES = [six.text_type, bool, int, Decimal, type(None)]
 
@@ -94,7 +95,7 @@ def __init__(self, json_filename=None, root_json_dict=None, main_sheet_name='mai
             raise ValueError('Only one of json_file or root_json_dict should be supplied')
  
         if json_filename:
-            with open(json_filename) as json_file:
+            with codecs.open(json_filename, encoding='utf-8') as json_file:
                 try:
                     self.root_json_dict = json.load(json_file, object_pairs_hook=OrderedDict, parse_float=Decimal)
                 except ValueError as err:
diff --git a/flattentool/output.py b/flattentool/output.py
@@ -4,6 +4,12 @@
 import openpyxl
 import csv
 import os
+import sys
+
+if sys.version > '3':
+    import csv
+else:
+    import unicodecsv as csv  # pylint: disable=F0401
 
 
 class SpreadsheetOutput(object):
@@ -59,11 +65,20 @@ def open(self):
 
     def write_sheet(self, sheet_name, sheet):
         sheet_header = list(sheet)
-        with open(os.path.join(self.output_name, sheet_name+'.csv'), 'w') as csv_file:
-            dictwriter = csv.DictWriter(csv_file, sheet_header)
-            dictwriter.writeheader()
-            for sheet_line in sheet.lines:
-                dictwriter.writerow(sheet_line)
+        if sys.version > '3':  # If Python 3 or greater
+            # Pass the encoding to the open function
+            with open(os.path.join(self.output_name, sheet_name+'.csv'), 'w', encoding='utf-8') as csv_file:
+                dictwriter = csv.DictWriter(csv_file, sheet_header)
+                dictwriter.writeheader()
+                for sheet_line in sheet.lines:
+                    dictwriter.writerow(sheet_line)
+        else:  # If Python 2
+            # Pass the encoding to DictReader
+            with open(os.path.join(self.output_name, sheet_name+'.csv'), 'w') as csv_file:
+                dictwriter = csv.DictWriter(csv_file, sheet_header, encoding='utf-8')
+                dictwriter.writeheader()
+                for sheet_line in sheet.lines:
+                    dictwriter.writerow(sheet_line)
 
 
 FORMATS = {
diff --git a/flattentool/tests/test_json_input.py b/flattentool/tests/test_json_input.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 from __future__ import unicode_literals
 from flattentool.json_input import JSONParser, BadlyFormedJSONError
 from flattentool.schema import SchemaParser
@@ -40,6 +41,13 @@ def test_json_filename(tmpdir):
     assert parser.root_json_dict == {'a':'b'}
 
 
+def test_json_filename_utf8(tmpdir):
+    test_json = tmpdir.join('test.json')
+    test_json.write_text('{"a":"éαГ😼𝒞人"}', encoding='utf-8')
+    parser = JSONParser(json_filename=test_json.strpath)
+    assert parser.root_json_dict == {'a':'éαГ😼𝒞人'}
+
+
 def test_json_filename_ordered(tmpdir):
     test_json = tmpdir.join('test.json')
     test_json.write('{"a":"b", "c": "d"}')
diff --git a/flattentool/tests/test_output.py b/flattentool/tests/test_output.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
 import pytest
 import os
 from flattentool import output, schema
@@ -129,3 +131,29 @@ def test_populated_lines(tmpdir):
     ])
     assert tmpdir.join('release', 'release.csv').read().strip('\r\n').replace('\r', '') == 'a\ncell1\ncell2'
     assert tmpdir.join('release', 'b.csv').read().strip('\r\n').replace('\r', '') == 'ocid,c\n,cell3\n,cell4'
+
+
+def test_utf8(tmpdir):
+    parser = MockParser(['é'], {})
+    parser.main_sheet.lines = [{'é': 'éαГ😼𝒞人'}, {'é': 'cell2'}]
+    for format_name, spreadsheet_output_class in output.FORMATS.items():
+        spreadsheet_output = spreadsheet_output_class(
+            parser=parser,
+            main_sheet_name='release',
+            output_name=os.path.join(tmpdir.strpath, 'release'+output.FORMATS_SUFFIX[format_name]))
+        spreadsheet_output.write_sheets()
+
+    # Check XLSX
+    wb = openpyxl.load_workbook(tmpdir.join('release.xlsx').strpath)
+    assert wb.get_sheet_names() == ['release']
+    assert len(wb['release'].rows) == 3
+    assert [ x.value for x in wb['release'].rows[0] ] == [ 'é' ]
+    assert [ x.value for x in wb['release'].rows[1] ] == [ 'éαГ😼𝒞人' ]
+    assert [ x.value for x in wb['release'].rows[2] ] == [ 'cell2' ]
+
+    # Check CSV
+    assert set(tmpdir.join('release').listdir()) == set([
+        tmpdir.join('release').join('release.csv'),
+    ])
+    release_csv_text = tmpdir.join('release', 'release.csv').read_text(encoding='utf-8')
+    assert release_csv_text.strip('\r\n').replace('\r', '') == 'é\néαГ😼𝒞人\ncell2'