Skip to content

Commit 080b7cd

Browse files
authored
bugfix: leaked semaphore error (#309)
* use config for n_cpu * rm import * fix process loop * unuse mp.spawn ref. https://discuss.pytorch.org/t/how-to-fix-a-sigsegv-in-pytorch-when-using-distributed-training-e-g-ddp/113518/10 * fix commentout
1 parent 563c64d commit 080b7cd

File tree

4 files changed

+19
-17
lines changed

4 files changed

+19
-17
lines changed

extract_f0_print.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ def go(self, paths, f0_method):
154154
f0method,
155155
),
156156
)
157-
p.start()
158157
ps.append(p)
159-
for p in ps:
160-
p.join()
158+
p.start()
159+
for i in range(n_p):
160+
ps[i].join()

infer-web.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import torch, os, traceback, sys, warnings, shutil, numpy as np
22

33
os.environ["no_proxy"] = "localhost, 127.0.0.1, ::1"
4-
from multiprocessing import cpu_count
54
import threading
65
from time import sleep
76
from subprocess import Popen
@@ -25,7 +24,6 @@
2524

2625
i18n = I18nAuto()
2726
# 判断是否有能用来训练和加速推理的N卡
28-
ncpu = cpu_count()
2927
ngpu = torch.cuda.device_count()
3028
gpu_infos = []
3129
mem = []
@@ -1436,10 +1434,10 @@ def export_onnx(ModelPath, ExportedPath, MoeVS=True):
14361434
)
14371435
np7 = gr.Slider(
14381436
minimum=0,
1439-
maximum=ncpu,
1437+
maximum=config.n_cpu,
14401438
step=1,
14411439
label=i18n("提取音高和处理数据使用的CPU进程数"),
1442-
value=ncpu,
1440+
value=config.n_cpu,
14431441
interactive=True,
14441442
)
14451443
with gr.Group(): # 暂时单人的, 后面支持最多4人的#数据处理

train_nsf_sim_cache_sid_load_pretrain.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -66,18 +66,22 @@ def record(self):
6666

6767

6868
def main():
69-
# n_gpus = torch.cuda.device_count()
69+
n_gpus = torch.cuda.device_count()
7070
os.environ["MASTER_ADDR"] = "localhost"
7171
os.environ["MASTER_PORT"] = "51545"
7272

73-
mp.spawn(
74-
run,
75-
nprocs=n_gpus,
76-
args=(
73+
children = []
74+
for i in range(n_gpus):
75+
subproc = mp.Process(target=run, args=(
76+
i,
7777
n_gpus,
7878
hps,
79-
),
80-
)
79+
))
80+
children.append(subproc)
81+
subproc.start()
82+
83+
for i in range(n_gpus):
84+
children[i].join()
8185

8286

8387
def run(rank, n_gpus, hps):

trainset_preprocess_pipeline_print.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -115,10 +115,10 @@ def pipeline_mp_inp_dir(self, inp_root, n_p):
115115
p = multiprocessing.Process(
116116
target=self.pipeline_mp, args=(infos[i::n_p],)
117117
)
118-
p.start()
119118
ps.append(p)
120-
for p in ps:
121-
p.join()
119+
p.start()
120+
for i in range(n_p):
121+
ps[i].join()
122122
except:
123123
println("Fail. %s" % traceback.format_exc())
124124

0 commit comments

Comments
 (0)