|
7 | 7 | import subprocess |
8 | 8 | from loguru import logger |
9 | 9 | import torch |
| 10 | +import psutil |
| 11 | +from rich import print |
| 12 | +import signal |
10 | 13 |
|
11 | 14 | logger.add("logs/gpt_server.log", rotation="100 MB", level="INFO") |
12 | 15 |
|
13 | 16 |
|
| 17 | +def kill_child_processes(parent_pid, including_parent=False): |
| 18 | + "杀死子进程/僵尸进程" |
| 19 | + try: |
| 20 | + parent = psutil.Process(parent_pid) |
| 21 | + children = parent.children(recursive=True) |
| 22 | + for child in children: |
| 23 | + try: |
| 24 | + print(f"终止子进程 {child.pid}...") |
| 25 | + os.kill(child.pid, signal.SIGTERM) # 优雅终止 |
| 26 | + child.wait(5) # 等待子进程最多 5 秒 |
| 27 | + except psutil.NoSuchProcess: |
| 28 | + pass |
| 29 | + except psutil.TimeoutExpired(): |
| 30 | + print(f"终止子进程 {child.pid} 超时!强制终止...") |
| 31 | + os.kill(child.pid, signal.SIGKILL) # 强制终止 |
| 32 | + if including_parent: |
| 33 | + print(f"终止父进程 {parent_pid}...") |
| 34 | + os.kill(parent_pid, signal.SIGTERM) |
| 35 | + except psutil.NoSuchProcess: |
| 36 | + print(f"父进程 {parent_pid} 不存在!") |
| 37 | + |
| 38 | + |
| 39 | +# 记录父进程 PID |
| 40 | +parent_pid = os.getpid() |
| 41 | + |
| 42 | + |
| 43 | +def signal_handler(signum, frame): |
| 44 | + print("\nCtrl-C detected! Cleaning up...") |
| 45 | + kill_child_processes(parent_pid, including_parent=False) |
| 46 | + exit(0) # 正常退出程序 |
| 47 | + |
| 48 | + |
| 49 | +signal.signal(signal.SIGINT, signal_handler) |
| 50 | + |
| 51 | + |
14 | 52 | def run_cmd(cmd: str, *args, **kwargs): |
15 | 53 | logger.info(f"执行命令如下:\n{cmd}\n") |
16 | | - subprocess.run(cmd, shell=True) |
| 54 | + # subprocess.run(cmd, shell=True) |
| 55 | + process = subprocess.Popen(cmd, shell=True) |
| 56 | + # 等待命令执行完成 |
| 57 | + process.wait() |
| 58 | + return process.pid |
17 | 59 |
|
18 | 60 |
|
19 | 61 | def start_controller(controller_host, controller_port, dispatch_method): |
|
0 commit comments