Skip to content

Commit 73d11f8

Browse files
authored
Merge pull request #983 from ProjectTorreyPines/parallel_environment_return_pids
Return pid list in parallel_environment for cleanup
2 parents c2f4747 + c00f878 commit 73d11f8

File tree

1 file changed

+9
-8
lines changed

1 file changed

+9
-8
lines changed

src/utils_begin.jl

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,7 @@ function parallel_environment(
190190
current_nworkers = Distributed.nprocs() - 1
191191

192192
if nworkers == 0
193+
pid_list = Int[]
193194
#pass
194195

195196
elseif cluster == "omega"
@@ -203,7 +204,7 @@ function parallel_environment(
203204
gigamem_per_cpu = Int(ceil(memory_usage_fraction * gigamem_per_node / cpus_per_node * cpus_per_task))
204205
ENV["JULIA_WORKER_TIMEOUT"] = "360"
205206
if current_nworkers < nworkers
206-
Distributed.addprocs(
207+
pid_list = Distributed.addprocs(
207208
ClusterManagers.SlurmManager(nworkers - current_nworkers);
208209
partition="ga-ird",
209210
exclusive="",
@@ -229,7 +230,7 @@ function parallel_environment(
229230
gigamem_per_cpu = Int(ceil(memory_usage_fraction * gigamem_per_node / cpus_per_node * cpus_per_task))
230231
ENV["JULIA_WORKER_TIMEOUT"] = "360"
231232
if current_nworkers < nworkers
232-
Distributed.addprocs(
233+
pid_list = Distributed.addprocs(
233234
ClusterManagers.SlurmManager(nworkers - current_nworkers);
234235
partition="pppl-medium",
235236
exclusive="",
@@ -255,7 +256,7 @@ function parallel_environment(
255256
gigamem_per_cpu = Int(ceil(memory_usage_fraction * gigamem_per_node / cpus_per_node * cpus_per_task))
256257
ENV["JULIA_WORKER_TIMEOUT"] = "180"
257258
if current_nworkers < nworkers
258-
Distributed.addprocs(
259+
pid_list = Distributed.addprocs(
259260
ClusterManagers.SlurmManager(nworkers - current_nworkers);
260261
exclusive="",
261262
topology=:master_worker,
@@ -279,7 +280,7 @@ function parallel_environment(
279280
gigamem_per_cpu = Int(ceil(memory_usage_fraction * gigamem_per_node / cpus_per_node * cpus_per_task))
280281
ENV["JULIA_WORKER_TIMEOUT"] = "360"
281282
if current_nworkers < nworkers
282-
Distributed.addprocs(
283+
pid_list = Distributed.addprocs(
283284
ClusterManagers.SlurmManager(nworkers - current_nworkers);
284285
partition="LocalQ",
285286
topology=:master_worker,
@@ -304,7 +305,7 @@ function parallel_environment(
304305
gigamem_per_cpu = Int(ceil(memory_usage_fraction * gigamem_per_node / cpus_per_node * cpus_per_task))
305306
ENV["JULIA_WORKER_TIMEOUT"] = "360"
306307
if current_nworkers < nworkers
307-
Distributed.addprocs(
308+
pid_list = Distributed.addprocs(
308309
ClusterManagers.SlurmManager(nworkers - current_nworkers);
309310
partition="sched_mit_psfc_r8",
310311
exclusive="",
@@ -325,13 +326,12 @@ function parallel_environment(
325326
nworkers = length(Sys.cpu_info())
326327
end
327328
if current_nworkers < nworkers
328-
Distributed.addprocs(nworkers - current_nworkers; topology=:master_worker, exeflags=["--heap-size-hint=$(mem_size)G"])
329+
pid_list = Distributed.addprocs(nworkers - current_nworkers; topology=:master_worker, exeflags=["--heap-size-hint=$(mem_size)G"])
329330
end
330331

331332
else
332333
error("Cluster `$cluster` is unknown. Use `localhost` or add `$cluster` to the FUSE.parallel_environment")
333334
end
334-
335335
# import FUSE and IJulia on workers
336336
if workers_import_fuse
337337
if isdefined(Main, :IJulia)
@@ -349,7 +349,8 @@ function parallel_environment(
349349
Base.include_string(Main, code)
350350
end
351351

352-
return println("Using $(Distributed.nprocs() - 1) workers on $(gethostname())")
352+
println("Using $(Distributed.nprocs() - 1) workers on $(gethostname())")
353+
return pid_list
353354
end
354355

355356
"""

0 commit comments

Comments
 (0)