@@ -276,58 +276,6 @@ macro ismain()
276276end
277277
278278
279- @init @require MPIClusterManagers= " e7922434-ae4b-11e9-05c5-9780451d2c66" begin
280-
281- using . MPIClusterManagers: MPI, start_main_loop, TCP_TRANSPORT_ALL, MPI_TRANSPORT_ALL
282-
283- """
284- init_MPI_workers()
285-
286- Initialize MPI processes as Julia workers. Should be called from all MPI
287- processes, and will only return on the master process.
288-
289- `transport` should be `"MPI"` or `"TCP"`, which is by default read from the
290- environment variable `JULIA_MPI_TRANSPORT`, and otherwise defaults to `"TCP"`.
291-
292- If CUDA is loaded and functional in the Main module, additionally calls
293- [`assign_GPU_workers()`](@ref)
294- """
295- function init_MPI_workers (;
296- stdout_to_master = false ,
297- stderr_to_master = false ,
298- transport = get (ENV ," JULIA_MPI_TRANSPORT" ," TCP" )
299- )
300-
301- if ! MPI. Initialized ()
302- MPI. Init ()
303- end
304- size = MPI. Comm_size (MPI. COMM_WORLD)
305- rank = MPI. Comm_rank (MPI. COMM_WORLD)
306-
307- if size> 1
308- # workers don't return from this call:
309- start_main_loop (
310- Dict (" TCP" => TCP_TRANSPORT_ALL," MPI" => MPI_TRANSPORT_ALL)[transport],
311- stdout_to_master= stdout_to_master,
312- stderr_to_master= stderr_to_master
313- )
314-
315- if @isdefined (CUDA) && CUDA. functional ()
316- assign_GPU_workers ()
317- end
318- @everywhere begin
319- typ = (myid ()== 1 ) ? " (master)" : " (worker)"
320- dev = (@isdefined (CUDA) && CUDA. functional ()) ? device () : " CPU"
321- @info " MPI process $(myid ()) $typ is running on $(gethostname ()) ::$dev "
322- end
323- end
324-
325- end
326-
327-
328- end
329-
330-
331279firsthalf (x) = x[1 : end ÷ 2 ]
332280lasthalf (x) = x[end ÷ 2 : end ]
333281
@@ -463,59 +411,6 @@ macro auto_adjoint(funcdef)
463411 esc (Expr (:block , defs... ))
464412end
465413
466-
467-
468- """
469- assign_GPU_workers()
470-
471- Assign each Julia worker process a unique GPU using `CUDA.device!`.
472- Workers may be distributed across different hosts, and each host can have
473- multiple GPUs.
474- """
475- function assign_GPU_workers ()
476- @everywhere @eval Main using Distributed, CMBLensing
477- master_uuid = @isdefined (CUDA) ? CUDA. uuid (device ()) : nothing
478- accessible_gpus = Dict (map (workers ()) do id
479- @eval Main @fetchfrom $ id begin
480- ds = CUDA. devices ()
481- # put master's GPU last so we don't double up on it unless we need to
482- $ id => sort ((CUDA. deviceid .(ds) .=> CUDA. uuid .(ds)), by= (((k,v),)-> v== $ master_uuid ? Inf : k))
483- end
484- end )
485- claimed = Set ()
486- assignments = Dict (map (workers ()) do myid
487- for (gpu_id, gpu_uuid) in accessible_gpus[myid]
488- if ! (gpu_uuid in claimed)
489- push! (claimed, gpu_uuid)
490- return myid => gpu_id
491- end
492- end
493- error (" Can't assign a unique GPU to every worker, process $myid has no free GPUs left." )
494- end )
495- @everywhere workers () device! ($ assignments[myid ()])
496- println (GPU_worker_info ())
497- end
498-
499- """
500- GPU_worker_info()
501-
502- Returns string showing info about assigned GPU workers.
503- """
504- function GPU_worker_info ()
505- lines = @eval Main map (procs ()) do id
506- @fetchfrom id begin
507- if @isdefined (CUDA)
508- device = " device = $(sprint (io-> show (io, MIME (" text/plain" ), CUDA. device ()))) $(split (string (CUDA. uuid (CUDA. device ())),' -' )[1 ]) )"
509- else
510- device = " "
511- end
512- join ([" ($(id== 1 ? " master" : " worker" ) = $id " , " host = $(gethostname ()) " , device], " , " )
513- end
514- end
515- join ([" GPU_worker_info:" ; lines], " \n " )
516- end
517-
518-
519414string_trunc (x) = Base. _truncate_at_width_or_chars (string (x), displaysize (stdout )[2 ]- 14 )
520415
521416import NamedTupleTools
0 commit comments