11# -*- coding: utf-8 -*-
22from __future__ import absolute_import , print_function , division
33import unittest
4- import itertools
4+ import warnings
55
66
77import numpy as np
88
99
1010try :
1111 from numcodecs .msgpacks import LegacyMsgPack , MsgPack
12- codecs = [LegacyMsgPack (), MsgPack ()]
12+ default_codec = MsgPack ()
13+ # N.B., legacy codec is broken, see tests below. Also legacy code generates
14+ # PendingDeprecationWarning due to use of encoding argument, which we ignore here
15+ # as not relevant.
16+ legacy_codec = LegacyMsgPack ()
1317except ImportError : # pragma: no cover
1418 raise unittest .SkipTest ("msgpack not available" )
1519
1620
1721from numcodecs .tests .common import (check_config , check_repr , check_encode_decode_array ,
1822 check_backwards_compatibility , greetings )
23+ from numcodecs .compat import text_type , binary_type , PY2
1924
2025
2126# object array with strings
2227# object array with mix strings / nans
2328# object array with mix of string, int, float
2429# ...
2530arrays = [
26- np .array (['foo' , 'bar' , 'baz' ] * 300 , dtype = object ),
27- np .array ([['foo' , 'bar' , np .nan ]] * 300 , dtype = object ),
28- np .array (['foo' , 1.0 , 2 ] * 300 , dtype = object ),
31+ np .array ([u 'foo' , u 'bar' , u 'baz' ] * 300 , dtype = object ),
32+ np .array ([[u 'foo' , u 'bar' , np .nan ]] * 300 , dtype = object ),
33+ np .array ([u 'foo' , 1.0 , 2 ] * 300 , dtype = object ),
2934 np .arange (1000 , dtype = 'i4' ),
30- np .array (['foo' , 'bar' , 'baz' ] * 300 ),
31- np .array (['foo' , ['bar' , 1.0 , 2 ], {'a' : 'b' , 'c' : 42 }] * 300 , dtype = object ),
35+ np .array ([u 'foo' , u 'bar' , u 'baz' ] * 300 ),
36+ np .array ([u 'foo' , [u 'bar' , 1.0 , 2 ], {u 'a' : u 'b' , u 'c' : 42 }] * 300 , dtype = object ),
3237 np .array (greetings * 100 ),
3338 np .array (greetings * 100 , dtype = object ),
39+ np .array ([b'foo' , b'bar' , b'baz' ] * 300 , dtype = object ),
40+ np .array ([g .encode ('utf-8' ) for g in greetings ] * 100 , dtype = object ),
3441]
3542
3643
44+ legacy_arrays = arrays [:8 ]
45+
46+
3747def test_encode_decode ():
38- for arr , codec in itertools .product (arrays , codecs ):
39- check_encode_decode_array (arr , codec )
48+
49+ for arr in arrays :
50+ check_encode_decode_array (arr , default_codec )
51+
52+ with warnings .catch_warnings ():
53+ warnings .simplefilter ('ignore' , PendingDeprecationWarning )
54+ for arr in legacy_arrays :
55+ check_encode_decode_array (arr , legacy_codec )
4056
4157
4258def test_config ():
43- for codec in codecs :
59+ for codec in [ default_codec , legacy_codec ] :
4460 check_config (codec )
4561
4662
4763def test_repr ():
48- check_repr ("MsgPack(encoding='utf-8' )" )
49- check_repr ("MsgPack(encoding='ascii' )" )
64+ check_repr ("MsgPack(raw=False, use_bin_type=True, use_single_float=False )" )
65+ check_repr ("MsgPack(raw=True, use_bin_type=False, use_single_float=True )" )
5066 check_repr ("LegacyMsgPack(encoding='utf-8')" )
5167 check_repr ("LegacyMsgPack(encoding='ascii')" )
5268
5369
5470def test_backwards_compatibility ():
55- for codec in codecs :
56- check_backwards_compatibility (codec .codec_id , arrays , [codec ])
71+ check_backwards_compatibility (default_codec .codec_id , arrays , [default_codec ])
72+ with warnings .catch_warnings ():
73+ warnings .simplefilter ('ignore' , PendingDeprecationWarning )
74+ check_backwards_compatibility (legacy_codec .codec_id , legacy_arrays ,
75+ [legacy_codec ])
5776
5877
5978def test_non_numpy_inputs ():
@@ -64,16 +83,21 @@ def test_non_numpy_inputs():
6483 [[0 , 1 ], [2 , 3 ]],
6584 [[0 ], [1 ], [2 , 3 ]],
6685 [[[0 , 0 ]], [[1 , 1 ]], [[2 , 3 ]]],
67- ["1" ],
68- ["11" , "11" ],
69- ["11" , "1" , "1" ],
86+ [u "1" ],
87+ [u "11" , u "11" ],
88+ [u "11" , u "1" , u "1" ],
7089 [{}],
71- [{"key" : "value" }, ["list" , "of" , "strings" ]],
90+ [{u"key" : u"value" }, [u"list" , u"of" , u"strings" ]],
91+ [b"1" ],
92+ [b"11" , b"11" ],
93+ [b"11" , b"1" , b"1" ],
94+ [{b"key" : b"value" }, [b"list" , b"of" , b"strings" ]],
7295 ]
7396 for input_data in data :
74- for codec in codecs :
75- output_data = codec .decode (codec .encode (input_data ))
76- assert np .array_equal (np .array (input_data ), output_data )
97+ actual = default_codec .decode (default_codec .encode (input_data ))
98+ expect = np .array (input_data )
99+ assert expect .shape == actual .shape
100+ assert np .array_equal (expect , actual )
77101
78102
79103def test_legacy_codec_broken ():
@@ -85,7 +109,9 @@ def test_legacy_codec_broken():
85109 a [0 ] = [0 , 1 ]
86110 a [1 ] = [2 , 3 ]
87111 codec = LegacyMsgPack ()
88- b = codec .decode (codec .encode (a ))
112+ with warnings .catch_warnings ():
113+ warnings .simplefilter ('ignore' , PendingDeprecationWarning )
114+ b = codec .decode (codec .encode (a ))
89115 assert a .shape == (2 ,)
90116 assert b .shape == (2 , 2 )
91117 assert not np .array_equal (a , b )
@@ -94,3 +120,66 @@ def test_legacy_codec_broken():
94120 codec = MsgPack ()
95121 b = codec .decode (codec .encode (a ))
96122 assert np .array_equal (a , b )
123+ assert a .shape == b .shape
124+
125+
126+ def test_encode_decode_shape_dtype_preserved ():
127+ for arr in arrays :
128+ actual = default_codec .decode (default_codec .encode (arr ))
129+ assert arr .shape == actual .shape
130+ assert arr .dtype == actual .dtype
131+
132+
133+ def test_bytes ():
134+ # test msgpack behaviour with bytes and str (unicode)
135+ bytes_arr = np .array ([b'foo' , b'bar' , b'baz' ], dtype = object )
136+ unicode_arr = np .array ([u'foo' , u'bar' , u'baz' ], dtype = object )
137+
138+ # raw=False (default)
139+ codec = MsgPack ()
140+ # works for bytes array, round-trips bytes to bytes
141+ b = codec .decode (codec .encode (bytes_arr ))
142+ assert np .array_equal (bytes_arr , b )
143+ assert isinstance (b [0 ], binary_type )
144+ assert b [0 ] == b'foo'
145+ # works for unicode array, round-trips unicode to unicode
146+ b = codec .decode (codec .encode (unicode_arr ))
147+ assert np .array_equal (unicode_arr , b )
148+ assert isinstance (b [0 ], text_type )
149+ assert b [0 ] == u'foo'
150+
151+ # raw=True
152+ codec = MsgPack (raw = True )
153+ # works for bytes array, round-trips bytes to bytes
154+ b = codec .decode (codec .encode (bytes_arr ))
155+ assert np .array_equal (bytes_arr , b )
156+ assert isinstance (b [0 ], binary_type )
157+ assert b [0 ] == b'foo'
158+ # broken for unicode array, round-trips unicode to bytes
159+ b = codec .decode (codec .encode (unicode_arr ))
160+ if PY2 :
161+ # PY2 considers b'foo' and u'foo' to be equal
162+ assert np .array_equal (unicode_arr , b )
163+ else :
164+ assert not np .array_equal (unicode_arr , b )
165+ assert isinstance (b [0 ], binary_type )
166+ assert b [0 ] == b'foo'
167+
168+ # legacy codec
169+ codec = LegacyMsgPack ()
170+ with warnings .catch_warnings ():
171+ warnings .simplefilter ('ignore' , PendingDeprecationWarning )
172+ # broken for bytes array, round-trips bytes to unicode
173+ b = codec .decode (codec .encode (bytes_arr ))
174+ if PY2 :
175+ # PY2 considers b'foo' and u'foo' to be equal
176+ assert np .array_equal (unicode_arr , b )
177+ else :
178+ assert not np .array_equal (bytes_arr , b )
179+ assert isinstance (b [0 ], text_type )
180+ assert b [0 ] == u'foo'
181+ # works for unicode array, round-trips unicode to unicode
182+ b = codec .decode (codec .encode (unicode_arr ))
183+ assert np .array_equal (unicode_arr , b )
184+ assert isinstance (b [0 ], text_type )
185+ assert b [0 ] == u'foo'
0 commit comments