Skip to content

Commit 29090cc

Browse files
committed
优化服务停止逻辑
1 parent 5fd7fd5 commit 29090cc

File tree

2 files changed

+50
-11
lines changed

2 files changed

+50
-11
lines changed

gpt_server/serving/main.py

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import yaml
22
import os
33
import sys
4-
import signal
54
import ray
65

76
os.environ["OPENBLAS_NUM_THREADS"] = (
@@ -20,27 +19,25 @@
2019
from gpt_server.utils import (
2120
start_api_server,
2221
start_model_worker,
23-
stop_server,
2422
delete_log,
2523
)
2624

2725
# 删除日志
2826
delete_log()
2927

3028

31-
def signal_handler(signum, frame):
32-
stop_server()
33-
raise KeyboardInterrupt
34-
35-
36-
signal.signal(signal.SIGINT, signal_handler)
37-
3829
config_path = os.path.join(root_dir, "gpt_server/script/config.yaml")
3930
with open(config_path, "r") as f:
4031
config = yaml.safe_load(f)
32+
33+
4134
# print(config)
42-
if __name__ == "__main__":
35+
def main():
4336
# ----------------------------启动 Controller 和 Openai API 服务----------------------------------------
4437
start_api_server(config=config)
4538
# ----------------------------启动 Model Worker 服务----------------------------------------------------
4639
start_model_worker(config=config)
40+
41+
42+
if __name__ == "__main__":
43+
main()

gpt_server/utils.py

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,55 @@
77
import subprocess
88
from loguru import logger
99
import torch
10+
import psutil
11+
from rich import print
12+
import signal
1013

1114
logger.add("logs/gpt_server.log", rotation="100 MB", level="INFO")
1215

1316

17+
def kill_child_processes(parent_pid, including_parent=False):
18+
"杀死子进程/僵尸进程"
19+
try:
20+
parent = psutil.Process(parent_pid)
21+
children = parent.children(recursive=True)
22+
for child in children:
23+
try:
24+
print(f"终止子进程 {child.pid}...")
25+
os.kill(child.pid, signal.SIGTERM) # 优雅终止
26+
child.wait(5) # 等待子进程最多 5 秒
27+
except psutil.NoSuchProcess:
28+
pass
29+
except psutil.TimeoutExpired():
30+
print(f"终止子进程 {child.pid} 超时!强制终止...")
31+
os.kill(child.pid, signal.SIGKILL) # 强制终止
32+
if including_parent:
33+
print(f"终止父进程 {parent_pid}...")
34+
os.kill(parent_pid, signal.SIGTERM)
35+
except psutil.NoSuchProcess:
36+
print(f"父进程 {parent_pid} 不存在!")
37+
38+
39+
# 记录父进程 PID
40+
parent_pid = os.getpid()
41+
42+
43+
def signal_handler(signum, frame):
44+
print("\nCtrl-C detected! Cleaning up...")
45+
kill_child_processes(parent_pid, including_parent=False)
46+
exit(0) # 正常退出程序
47+
48+
49+
signal.signal(signal.SIGINT, signal_handler)
50+
51+
1452
def run_cmd(cmd: str, *args, **kwargs):
1553
logger.info(f"执行命令如下:\n{cmd}\n")
16-
subprocess.run(cmd, shell=True)
54+
# subprocess.run(cmd, shell=True)
55+
process = subprocess.Popen(cmd, shell=True)
56+
# 等待命令执行完成
57+
process.wait()
58+
return process.pid
1759

1860

1961
def start_controller(controller_host, controller_port, dispatch_method):

0 commit comments

Comments
 (0)