Skip to content

Commit 47a2811

Browse files
committed
fixup! COMPAT: For pandas 0.21 CategoricalDtype
1 parent c5d4eee commit 47a2811

File tree

2 files changed

+9
-16
lines changed

2 files changed

+9
-16
lines changed

dask/array/percentile.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def _percentile(a, q, interpolation='linear'):
1717
return None
1818
if isinstance(q, Iterator):
1919
q = list(q)
20-
if str(a.dtype) == 'category':
20+
if a.dtype.name == 'category':
2121
result = np.percentile(a.codes, q, interpolation=interpolation)
2222
import pandas as pd
2323
return pd.Categorical.from_codes(result, a.categories, a.ordered)
@@ -100,7 +100,7 @@ def merge_percentiles(finalq, qs, vals, Ns, interpolation='lower'):
100100

101101
# TODO: Perform this check above in percentile once dtype checking is easy
102102
# Here we silently change meaning
103-
if str(vals[0].dtype) == 'category':
103+
if vals[0].dtype.name == 'category':
104104
result = merge_percentiles(finalq, qs, [v.codes for v in vals], Ns, interpolation)
105105
import pandas as pd
106106
return pd.Categorical.from_codes(result, vals[0].categories, vals[0].ordered)

dask/dataframe/io/tests/test_io.py

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@
44

55
import pytest
66
from threading import Lock
7-
8-
import threading
7+
from multiprocessing.pool import ThreadPool
98

109
import dask.array as da
1110
import dask.dataframe as dd
@@ -15,7 +14,7 @@
1514
from dask.utils import tmpfile
1615
from dask.local import get_sync
1716

18-
from dask.dataframe.utils import assert_eq
17+
from dask.dataframe.utils import assert_eq, is_categorical_dtype
1918

2019

2120
####################
@@ -119,13 +118,14 @@ def test_from_array_with_record_dtype():
119118

120119
def test_from_bcolz_multiple_threads():
121120
bcolz = pytest.importorskip('bcolz')
121+
pool = ThreadPool(processes=5)
122122

123-
def check():
123+
def check(i):
124124
t = bcolz.ctable([[1, 2, 3], [1., 2., 3.], ['a', 'b', 'a']],
125125
names=['x', 'y', 'a'])
126126
d = dd.from_bcolz(t, chunksize=2)
127127
assert d.npartitions == 2
128-
assert str(d.dtypes['a']) == 'category'
128+
assert is_categorical_dtype(d.dtypes['a'])
129129
assert list(d.x.compute(get=get_sync)) == [1, 2, 3]
130130
assert list(d.a.compute(get=get_sync)) == ['a', 'b', 'a']
131131

@@ -139,14 +139,7 @@ def check():
139139
assert (sorted(dd.from_bcolz(t, chunksize=2).dask) !=
140140
sorted(dd.from_bcolz(t, chunksize=3).dask))
141141

142-
threads = []
143-
for i in range(5):
144-
thread = threading.Thread(target=check)
145-
thread.start()
146-
threads.append(thread)
147-
148-
for thread in threads:
149-
thread.join()
142+
pool.map(check, range(5))
150143

151144

152145
def test_from_bcolz():
@@ -156,7 +149,7 @@ def test_from_bcolz():
156149
names=['x', 'y', 'a'])
157150
d = dd.from_bcolz(t, chunksize=2)
158151
assert d.npartitions == 2
159-
assert str(d.dtypes['a']) == 'category'
152+
assert is_categorical_dtype(d.dtypes['a'])
160153
assert list(d.x.compute(get=get_sync)) == [1, 2, 3]
161154
assert list(d.a.compute(get=get_sync)) == ['a', 'b', 'a']
162155
L = list(d.index.compute(get=get_sync))

0 commit comments

Comments
 (0)