Skip to content

Commit 61d8404

Browse files
committed
[#65] Add utf-8 support to JSON input and CSV output
1 parent 765b088 commit 61d8404

File tree

4 files changed

+58
-6
lines changed

4 files changed

+58
-6
lines changed

flattentool/json_input.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from flattentool.input import path_search
1515
from flattentool.sheet import Sheet
1616
from warnings import warn
17+
import codecs
1718

1819
BASIC_TYPES = [six.text_type, bool, int, Decimal, type(None)]
1920

@@ -94,7 +95,7 @@ def __init__(self, json_filename=None, root_json_dict=None, main_sheet_name='mai
9495
raise ValueError('Only one of json_file or root_json_dict should be supplied')
9596

9697
if json_filename:
97-
with open(json_filename) as json_file:
98+
with codecs.open(json_filename, encoding='utf-8') as json_file:
9899
try:
99100
self.root_json_dict = json.load(json_file, object_pairs_hook=OrderedDict, parse_float=Decimal)
100101
except ValueError as err:

flattentool/output.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,12 @@
44
import openpyxl
55
import csv
66
import os
7+
import sys
8+
9+
if sys.version > '3':
10+
import csv
11+
else:
12+
import unicodecsv as csv # pylint: disable=F0401
713

814

915
class SpreadsheetOutput(object):
@@ -59,11 +65,20 @@ def open(self):
5965

6066
def write_sheet(self, sheet_name, sheet):
6167
sheet_header = list(sheet)
62-
with open(os.path.join(self.output_name, sheet_name+'.csv'), 'w') as csv_file:
63-
dictwriter = csv.DictWriter(csv_file, sheet_header)
64-
dictwriter.writeheader()
65-
for sheet_line in sheet.lines:
66-
dictwriter.writerow(sheet_line)
68+
if sys.version > '3': # If Python 3 or greater
69+
# Pass the encoding to the open function
70+
with open(os.path.join(self.output_name, sheet_name+'.csv'), 'w', encoding='utf-8') as csv_file:
71+
dictwriter = csv.DictWriter(csv_file, sheet_header)
72+
dictwriter.writeheader()
73+
for sheet_line in sheet.lines:
74+
dictwriter.writerow(sheet_line)
75+
else: # If Python 2
76+
# Pass the encoding to DictReader
77+
with open(os.path.join(self.output_name, sheet_name+'.csv'), 'w') as csv_file:
78+
dictwriter = csv.DictWriter(csv_file, sheet_header, encoding='utf-8')
79+
dictwriter.writeheader()
80+
for sheet_line in sheet.lines:
81+
dictwriter.writerow(sheet_line)
6782

6883

6984
FORMATS = {

flattentool/tests/test_json_input.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# -*- coding: utf-8 -*-
12
from __future__ import unicode_literals
23
from flattentool.json_input import JSONParser, BadlyFormedJSONError
34
from flattentool.schema import SchemaParser
@@ -40,6 +41,13 @@ def test_json_filename(tmpdir):
4041
assert parser.root_json_dict == {'a':'b'}
4142

4243

44+
def test_json_filename_utf8(tmpdir):
45+
test_json = tmpdir.join('test.json')
46+
test_json.write_text('{"a":"éαГ😼𝒞人"}', encoding='utf-8')
47+
parser = JSONParser(json_filename=test_json.strpath)
48+
assert parser.root_json_dict == {'a':'éαГ😼𝒞人'}
49+
50+
4351
def test_json_filename_ordered(tmpdir):
4452
test_json = tmpdir.join('test.json')
4553
test_json.write('{"a":"b", "c": "d"}')

flattentool/tests/test_output.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
# -*- coding: utf-8 -*-
2+
from __future__ import unicode_literals
13
import pytest
24
import os
35
from flattentool import output, schema
@@ -129,3 +131,29 @@ def test_populated_lines(tmpdir):
129131
])
130132
assert tmpdir.join('release', 'release.csv').read().strip('\r\n').replace('\r', '') == 'a\ncell1\ncell2'
131133
assert tmpdir.join('release', 'b.csv').read().strip('\r\n').replace('\r', '') == 'ocid,c\n,cell3\n,cell4'
134+
135+
136+
def test_utf8(tmpdir):
137+
parser = MockParser(['é'], {})
138+
parser.main_sheet.lines = [{'é': 'éαГ😼𝒞人'}, {'é': 'cell2'}]
139+
for format_name, spreadsheet_output_class in output.FORMATS.items():
140+
spreadsheet_output = spreadsheet_output_class(
141+
parser=parser,
142+
main_sheet_name='release',
143+
output_name=os.path.join(tmpdir.strpath, 'release'+output.FORMATS_SUFFIX[format_name]))
144+
spreadsheet_output.write_sheets()
145+
146+
# Check XLSX
147+
wb = openpyxl.load_workbook(tmpdir.join('release.xlsx').strpath)
148+
assert wb.get_sheet_names() == ['release']
149+
assert len(wb['release'].rows) == 3
150+
assert [ x.value for x in wb['release'].rows[0] ] == [ 'é' ]
151+
assert [ x.value for x in wb['release'].rows[1] ] == [ 'éαГ😼𝒞人' ]
152+
assert [ x.value for x in wb['release'].rows[2] ] == [ 'cell2' ]
153+
154+
# Check CSV
155+
assert set(tmpdir.join('release').listdir()) == set([
156+
tmpdir.join('release').join('release.csv'),
157+
])
158+
release_csv_text = tmpdir.join('release', 'release.csv').read_text(encoding='utf-8')
159+
assert release_csv_text.strip('\r\n').replace('\r', '') == \néαГ😼𝒞人\ncell2'

0 commit comments

Comments
 (0)