1
1
"""Views of remote engines."""
2
2
# Copyright (c) IPython Development Team.
3
3
# Distributed under the terms of the Modified BSD License.
4
- from __future__ import absolute_import
5
- from __future__ import print_function
6
-
4
+ import concurrent .futures
7
5
import inspect
8
6
import warnings
7
+ from collections import deque
9
8
from contextlib import contextmanager
10
9
11
10
from decorator import decorator
@@ -1213,9 +1212,8 @@ def _really_apply(
1213
1212
1214
1213
@sync_results
1215
1214
@save_ids
1216
- def map (self , f , * sequences , ** kwargs ):
1217
- """``view.map(f, *sequences, block=self.block, chunksize=1, ordered=True)`` => list|AsyncMapResult
1218
- Parallel version of builtin `map`, load-balanced by this View.
1215
+ def map (self , f , * sequences , block = None , chunksize = 1 , ordered = True ):
1216
+ """Parallel version of builtin `map`, load-balanced by this View.
1219
1217
1220
1218
`block`, and `chunksize` can be specified by keyword only.
1221
1219
@@ -1231,10 +1229,6 @@ def map(self, f, *sequences, **kwargs):
1231
1229
the sequences to be distributed and passed to `f`
1232
1230
block : bool [default self.block]
1233
1231
whether to wait for the result or not
1234
- track : bool
1235
- whether to create a MessageTracker to allow the user to
1236
- safely edit after arrays and buffers during non-copying
1237
- sends.
1238
1232
chunksize : int [default 1]
1239
1233
how many elements should be in each task.
1240
1234
ordered : bool [default True]
@@ -1256,14 +1250,8 @@ def map(self, f, *sequences, **kwargs):
1256
1250
"""
1257
1251
1258
1252
# default
1259
- block = kwargs .get ('block' , self .block )
1260
- chunksize = kwargs .get ('chunksize' , 1 )
1261
- ordered = kwargs .get ('ordered' , True )
1262
-
1263
- keyset = set (kwargs .keys ())
1264
- extra_keys = keyset .difference_update (set (['block' , 'chunksize' ]))
1265
- if extra_keys :
1266
- raise TypeError ("Invalid kwargs: %s" % list (extra_keys ))
1253
+ if block is None :
1254
+ block = self .block
1267
1255
1268
1256
assert len (sequences ) > 0 , "must have some sequences to map onto!"
1269
1257
@@ -1272,6 +1260,139 @@ def map(self, f, *sequences, **kwargs):
1272
1260
)
1273
1261
return pf .map (* sequences )
1274
1262
1263
+ def imap (
1264
+ self ,
1265
+ f ,
1266
+ * sequences ,
1267
+ ordered = True ,
1268
+ max_outstanding = 'auto' ,
1269
+ ):
1270
+ """Parallel version of lazily-evaluated `imap`, load-balanced by this View.
1271
+
1272
+ `ordered`, and `max_outstanding` can be specified by keyword only.
1273
+
1274
+ Unlike other map functions in IPython Parallel,
1275
+ this one does not consume the full iterable before submitting work,
1276
+ returning a single 'AsyncMapResult' representing the full computation.
1277
+
1278
+ Instead, it consumes iterables as they come, submitting up to `max_outstanding`
1279
+ tasks to the cluster before waiting on results (default: one task per engine).
1280
+ This allows it to work with infinite generators,
1281
+ and avoid potentially expensive read-ahead for large streams of inputs
1282
+ that may not fit in memory all at once.
1283
+
1284
+ .. versionadded: 7.0
1285
+
1286
+ Parameters
1287
+ ----------
1288
+ f : callable
1289
+ function to be mapped
1290
+ *sequences : one or more sequences of matching length
1291
+ the sequences to be distributed and passed to `f`
1292
+ ordered : bool [default True]
1293
+ Whether the results should be yielded on a first-come-first-yield basis,
1294
+ or preserve the order of submission.
1295
+
1296
+ max_outstanding : int [default len(engines)]
1297
+ The maximum number of tasks to be outstanding.
1298
+
1299
+ max_outstanding=0 will greedily consume the whole generator
1300
+ (map_async may be more efficient).
1301
+
1302
+ A limit of 1 should be strictly worse than running a local map,
1303
+ as there will be no parallelism.
1304
+
1305
+ Use this to tune how greedily input generator should be consumed.
1306
+
1307
+ Returns
1308
+ -------
1309
+
1310
+ lazily-evaluated generator, yielding results of `f` on each item of sequences.
1311
+ Yield-order depends on `ordered` argument.
1312
+ """
1313
+
1314
+ assert len (sequences ) > 0 , "must have some sequences to map onto!"
1315
+
1316
+ if max_outstanding == 'auto' :
1317
+ max_outstanding = len (self )
1318
+
1319
+ pf = PrePickled (f )
1320
+
1321
+ if ordered :
1322
+ outstanding = deque ()
1323
+
1324
+ def wait_for_ready ():
1325
+ ar = outstanding .popleft ()
1326
+ return [ar ]
1327
+
1328
+ def should_yield ():
1329
+ # ordered: yield first result if it's ready
1330
+ if outstanding [0 ].ready ():
1331
+ return True
1332
+
1333
+ if max_outstanding == 0 :
1334
+ # no limit
1335
+ return False
1336
+
1337
+ # or if we've reached capacity (only counting still-outstanding computations)
1338
+ # not counting locally available, but not yet yielded results
1339
+ # TODO: should we limit the local?
1340
+ # if consumers are much slower than producers,
1341
+ # this can fill up local memory
1342
+ return sum (not ar .ready () for ar in outstanding ) >= max_outstanding
1343
+
1344
+ else :
1345
+ outstanding = []
1346
+
1347
+ def wait_for_ready ():
1348
+ # unordered, yield whatever finishes first, as soon as it's ready
1349
+ done , outstanding [:] = concurrent .futures .wait (
1350
+ outstanding , return_when = concurrent .futures .FIRST_COMPLETED
1351
+ )
1352
+ return done
1353
+
1354
+ def should_yield ():
1355
+ # unordered, we are ready to yield if any result is ready
1356
+ if any (ar .ready () for ar in outstanding ):
1357
+ return True
1358
+
1359
+ if max_outstanding == 0 :
1360
+ # no limit
1361
+ return False
1362
+
1363
+ # or wait if we are full
1364
+ if len (outstanding ) >= max_outstanding :
1365
+ return True
1366
+ return False
1367
+
1368
+ # zip is a lazy iterator
1369
+ for args in zip (* sequences ):
1370
+ # submit one work item
1371
+ ar = self .apply_async (pf , * args )
1372
+ outstanding .append (ar )
1373
+ # count 'pending' tasks
1374
+ # yield first result if it's ready
1375
+ # *or* the number of outstanding tasks has reached our limit
1376
+ # yielding immediately means
1377
+ if should_yield ():
1378
+ for ready_ar in wait_for_ready ():
1379
+ yield ready_ar .get ()
1380
+
1381
+ # we've filled the buffer, wait for at least one result before continuing
1382
+ if len (outstanding ) == max_outstanding :
1383
+ for ready_ar in wait_for_ready ():
1384
+ yield ready_ar .get ()
1385
+
1386
+ # yield any remaining results
1387
+ if ordered :
1388
+ for ar in outstanding :
1389
+ yield ar .get ()
1390
+ else :
1391
+ while outstanding :
1392
+ done , outstanding = concurrent .futures .wait (outstanding )
1393
+ for ar in done :
1394
+ yield ar .get ()
1395
+
1275
1396
def register_joblib_backend (self , name = 'ipyparallel' , make_default = False ):
1276
1397
"""Register this View as a joblib parallel backend
1277
1398
@@ -1319,7 +1440,7 @@ def map(self, func, *iterables, **kwargs):
1319
1440
if 'timeout' in kwargs :
1320
1441
warnings .warn ("timeout unsupported in ViewExecutor.map" )
1321
1442
kwargs .pop ('timeout' )
1322
- for r in self .view .map_async (func , * iterables , ** kwargs ):
1443
+ for r in self .view .imap (func , * iterables , ** kwargs ):
1323
1444
yield r
1324
1445
1325
1446
def shutdown (self , wait = True ):
0 commit comments