Skip to content

Commit 064fea4

Browse files
author
Steve Lamb
committed
Always utf8 encode column headers
1 parent edb00d3 commit 064fea4

File tree

2 files changed

+36
-12
lines changed

2 files changed

+36
-12
lines changed

djqscsv/djqscsv.py

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def write_csv(queryset, file_obj, **kwargs):
7070
csv_kwargs[key] = val
7171

7272
# add BOM to support CSVs in MS Excel (for Windows only)
73-
file_obj.write(_safe_utf8_encode(u'\ufeff'))
73+
file_obj.write(_safe_utf8_stringify(u'\ufeff'))
7474

7575
# the CSV must always be built from a values queryset
7676
# in order to introspect the necessary fields.
@@ -110,7 +110,7 @@ def write_csv(queryset, file_obj, **kwargs):
110110
name_map = dict((field, field) for field in field_names)
111111
if use_verbose_names:
112112
name_map.update(
113-
dict((field.name, field.verbose_name.encode('utf-8'))
113+
dict((field.name, field.verbose_name)
114114
for field in queryset.model._meta.fields
115115
if field.name in field_names))
116116

@@ -119,6 +119,9 @@ def write_csv(queryset, file_obj, **kwargs):
119119
merged_header_map.update(field_header_map)
120120
if extra_columns:
121121
merged_header_map.update(dict((k, k) for k in extra_columns))
122+
123+
merged_header_map = dict((k, _safe_utf8_stringify(v))
124+
for (k, v) in merged_header_map.items())
122125
writer.writerow(merged_header_map)
123126

124127
for record in values_qs:
@@ -155,6 +158,15 @@ def _validate_and_clean_filename(filename):
155158
return filename
156159

157160

161+
def _safe_utf8_stringify(value):
162+
if isinstance(value, str):
163+
return value
164+
elif isinstance(value, unicode):
165+
return value.encode('utf-8')
166+
else:
167+
return unicode(value).encode('utf-8')
168+
169+
158170
def _sanitize_unicode_record(field_serializer_map, record):
159171

160172
def _serialize_value(value):
@@ -165,21 +177,12 @@ def _serialize_value(value):
165177
else:
166178
return unicode(value)
167179

168-
def _sanitize_text(value):
169-
# make sure every text value is of type 'str', coercing unicode
170-
if isinstance(value, unicode):
171-
return value.encode("utf-8")
172-
elif isinstance(value, str):
173-
return value
174-
else:
175-
return str(value).encode("utf-8")
176-
177180
obj = {}
178181
for key, val in six.iteritems(record):
179182
if val is not None:
180183
serializer = field_serializer_map.get(key, _serialize_value)
181184
newval = serializer(val)
182-
obj[_sanitize_text(key)] = _sanitize_text(newval)
185+
obj[_safe_utf8_stringify(key)] = _safe_utf8_stringify(newval)
183186

184187
return obj
185188

test_app/djqscsv_tests/tests/test_utilities.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# -*- coding: utf-8 -*-
12
import datetime
23

34
from django.test import TestCase
@@ -116,3 +117,23 @@ def test_generate_filename(self):
116117
r'person_export_[0-9]{8}.csv')
117118

118119

120+
class SafeUtf8EncodeTest(TestCase):
121+
def test_safe_utf8_encode(self):
122+
123+
class Foo(object):
124+
def __unicode__(self):
125+
return u'¯\_(ツ)_/¯'
126+
def __str_(self):
127+
return self.__unicode__().encode('utf-8')
128+
129+
for val in (u'¯\_(ツ)_/¯', 'plain', r'raw',
130+
b'123', 11312312312313L, False,
131+
datetime.datetime(2001, 01, 01),
132+
4, None, [], set(), Foo):
133+
134+
first_pass = djqscsv._safe_utf8_stringify(val)
135+
second_pass = djqscsv._safe_utf8_stringify(first_pass)
136+
third_pass = djqscsv._safe_utf8_stringify(second_pass)
137+
self.assertEqual(first_pass, second_pass)
138+
self.assertEqual(second_pass, third_pass)
139+
self.assertEqual(type(first_pass), type(third_pass))

0 commit comments

Comments
 (0)