6
6
import shutil
7
7
from multiprocessing .pool import ThreadPool , Pool as ProcessPool
8
8
from multiprocessing import cpu_count
9
- import os
10
9
import tempfile
11
10
12
11
13
12
import numpy as np
14
- from nose .tools import eq_ as eq
13
+ from nose .tools import eq_ as eq , assert_is_instance
14
+ from numpy .testing import assert_array_equal
15
15
16
16
17
17
from zarr .tests .test_attrs import TestAttributes
18
18
from zarr .tests .test_core import TestArray
19
+ from zarr .tests .test_hierarchy import TestGroup
19
20
from zarr .sync import ThreadSynchronizer , ProcessSynchronizer
20
21
from zarr .core import Array
21
22
from zarr .attrs import Attributes
22
- from zarr .storage import init_array , TempStore
23
+ from zarr .storage import init_array , TempStore , init_group , atexit_rmtree
23
24
from zarr .compat import PY2
24
25
from zarr .codecs import Zlib
26
+ from zarr .hierarchy import Group
25
27
26
28
27
- class TestThreadSynchronizedAttributes (TestAttributes ):
29
+ if PY2 :
30
+
31
+ class TemporaryDirectory (object ):
32
+ def __init__ (self ):
33
+ self .name = tempfile .mkdtemp ()
34
+ atexit .register (atexit_rmtree , self .name )
35
+
36
+ else :
37
+ from tempfile import TemporaryDirectory
38
+
39
+
40
+ class TestAttributesWithThreadSynchronizer (TestAttributes ):
28
41
29
42
def init_attributes (self , store , read_only = False ):
30
43
key = 'attrs'
@@ -34,7 +47,7 @@ def init_attributes(self, store, read_only=False):
34
47
read_only = read_only )
35
48
36
49
37
- class TestProcessSynchronizedAttributes (TestAttributes ):
50
+ class TestAttributesProcessSynchronizer (TestAttributes ):
38
51
39
52
def init_attributes (self , store , read_only = False ):
40
53
key = 'attrs'
@@ -46,7 +59,7 @@ def init_attributes(self, store, read_only=False):
46
59
read_only = read_only )
47
60
48
61
49
- def _append_data (arg ):
62
+ def _append (arg ):
50
63
z , i = arg
51
64
import numpy as np
52
65
x = np .empty (1000 , dtype = 'i4' )
@@ -55,22 +68,46 @@ def _append_data(arg):
55
68
return z .shape
56
69
57
70
71
+ def _set_arange (arg ):
72
+ z , i = arg
73
+ import numpy as np
74
+ x = np .arange (i * 1000 , (i * 1000 )+ 1000 , 1 )
75
+ z [i * 1000 :(i * 1000 )+ 1000 ] = x
76
+ return i
77
+
78
+
58
79
class MixinArraySyncTests (object ):
59
80
81
+ def test_parallel_setitem (self ):
82
+ n = 99
83
+
84
+ # setup
85
+ arr = self .create_array (shape = n * 1000 , chunks = 999 , dtype = 'i4' )
86
+ arr [:] = 0
87
+ pool = self .create_pool ()
88
+
89
+ # parallel setitem
90
+ results = pool .map_async (_set_arange , zip ([arr ] * n , range (n )))
91
+ print (results .get ())
92
+
93
+ assert_array_equal (np .arange (n * 1000 ), arr [:])
94
+
60
95
def test_parallel_append (self ):
96
+ n = 99
61
97
62
98
# setup
63
- arr = self .create_array (shape = 1000 , chunks = 100 , dtype = 'i4' )
99
+ arr = self .create_array (shape = 1000 , chunks = 999 , dtype = 'i4' )
64
100
arr [:] = 0
65
- pool = self .create_pool (cpu_count () )
101
+ pool = self .create_pool ()
66
102
67
- results = pool .map_async (_append_data , zip ([arr ] * 39 , range (1 , 40 , 1 )))
103
+ # parallel append
104
+ results = pool .map_async (_append , zip ([arr ] * n , range (n )))
68
105
print (results .get ())
69
106
70
- eq ((40000 ,), arr .shape )
107
+ eq ((( n + 1 ) * 1000 ,), arr .shape )
71
108
72
109
73
- class TestThreadSynchronizedArray (TestArray , MixinArraySyncTests ):
110
+ class TestArrayWithThreadSynchronizer (TestArray , MixinArraySyncTests ):
74
111
75
112
def create_array (self , read_only = False , ** kwargs ):
76
113
store = dict ()
@@ -93,17 +130,17 @@ def test_repr(self):
93
130
for l1 , l2 in zip (expect .split ('\n ' ), actual .split ('\n ' )):
94
131
eq (l1 , l2 )
95
132
96
- def create_pool (self , size ):
97
- pool = ThreadPool (size )
133
+ def create_pool (self ):
134
+ pool = ThreadPool (cpu_count () )
98
135
return pool
99
136
100
137
101
- class TestProcessSynchronizedArray (TestArray , MixinArraySyncTests ):
138
+ class TestArrayWithProcessSynchronizer (TestArray , MixinArraySyncTests ):
102
139
103
140
def create_array (self , read_only = False , ** kwargs ):
104
141
store = TempStore ()
105
142
init_array (store , ** kwargs )
106
- synchronizer = ProcessSynchronizer (tempfile . TemporaryDirectory ().name )
143
+ synchronizer = ProcessSynchronizer (TemporaryDirectory ().name )
107
144
return Array (store , synchronizer = synchronizer ,
108
145
read_only = read_only , cache_metadata = False )
109
146
@@ -122,6 +159,113 @@ def test_repr(self):
122
159
for l1 , l2 in zip (expect .split ('\n ' ), actual .split ('\n ' )):
123
160
eq (l1 , l2 )
124
161
125
- def create_pool (self , size ):
126
- pool = ProcessPool (size )
162
+ def create_pool (self ):
163
+ pool = ProcessPool (cpu_count () )
127
164
return pool
165
+
166
+
167
+ def _create_group (arg ):
168
+ g , name = arg
169
+ h = g .create_group (name )
170
+ return h .name
171
+
172
+
173
+ def _require_group (arg ):
174
+ g , name = arg
175
+ h = g .require_group (name )
176
+ return h .name
177
+
178
+
179
+ class MixinGroupSyncTests (object ):
180
+
181
+ def test_parallel_create_group (self ):
182
+
183
+ # setup
184
+ g = self .create_group ()
185
+ pool = self .create_pool ()
186
+
187
+ # parallel create group
188
+ n = 1000
189
+ results = pool .map_async (
190
+ _create_group , zip ([g ] * n , [str (i ) for i in range (n )]))
191
+ print (results .get ())
192
+
193
+ eq (n , len (g ))
194
+
195
+ def test_parallel_require_group (self ):
196
+
197
+ # setup
198
+ g = self .create_group ()
199
+ pool = self .create_pool ()
200
+
201
+ # parallel require group
202
+ n = 1000
203
+ results = pool .map_async (
204
+ _require_group , zip ([g ] * n , [str (i // 10 ) for i in range (n )]))
205
+ print (results .get ())
206
+
207
+ eq (n // 10 , len (g ))
208
+
209
+
210
+ class TestGroupWithThreadSynchronizer (TestGroup , MixinGroupSyncTests ):
211
+
212
+ def create_group (self , store = None , path = None , read_only = False ,
213
+ chunk_store = None , synchronizer = None ):
214
+ if store is None :
215
+ store , chunk_store = self .create_store ()
216
+ init_group (store , path = path , chunk_store = chunk_store )
217
+ synchronizer = ThreadSynchronizer ()
218
+ g = Group (store , path = path , read_only = read_only ,
219
+ chunk_store = chunk_store , synchronizer = synchronizer )
220
+ return g
221
+
222
+ def create_pool (self ):
223
+ pool = ThreadPool (cpu_count ())
224
+ return pool
225
+
226
+ def test_group_repr (self ):
227
+ if not PY2 :
228
+ g = self .create_group ()
229
+ expect = 'Group(/, 0)\n ' \
230
+ ' store: dict; synchronizer: ThreadSynchronizer'
231
+ actual = repr (g )
232
+ for l1 , l2 in zip (expect .split ('\n ' ), actual .split ('\n ' )):
233
+ eq (l1 , l2 )
234
+
235
+ def test_synchronizer_property (self ):
236
+ g = self .create_group ()
237
+ assert_is_instance (g .synchronizer , ThreadSynchronizer )
238
+
239
+
240
+ class TestGroupWithProcessSynchronizer (TestGroup , MixinGroupSyncTests ):
241
+
242
+ def create_store (self ):
243
+ return TempStore (), None
244
+
245
+ def create_group (self , store = None , path = None , read_only = False ,
246
+ chunk_store = None , synchronizer = None ):
247
+ if store is None :
248
+ store = TempStore ()
249
+ chunk_store = None
250
+ init_group (store , path = path , chunk_store = chunk_store )
251
+ synchronizer = ProcessSynchronizer (TemporaryDirectory ().name )
252
+ g = Group (store , path = path , read_only = read_only ,
253
+ synchronizer = synchronizer , chunk_store = chunk_store )
254
+ return g
255
+
256
+ def create_pool (self ):
257
+ pool = ProcessPool (cpu_count ())
258
+ return pool
259
+
260
+ def test_group_repr (self ):
261
+ if not PY2 :
262
+ g = self .create_group ()
263
+ expect = 'Group(/, 0)\n ' \
264
+ ' store: TempStore; synchronizer: ProcessSynchronizer'
265
+ actual = repr (g )
266
+ for l1 , l2 in zip (expect .split ('\n ' ), actual .split ('\n ' )):
267
+ eq (l1 , l2 )
268
+
269
+ def test_synchronizer_property (self ):
270
+ g = self .create_group ()
271
+ assert_is_instance (g .synchronizer , ProcessSynchronizer )
0 commit comments