16
16
17
17
18
18
def get_base_port ():
19
- nv_visible_devices = os .environ .get ("NVIDIA_VISIBLE_DEVICES" , "" )
19
+ """获取base port"""
20
+ nv_visible_devices = os .environ .get ("CUDA_VISIBLE_DEVICES" , "" )
20
21
if not nv_visible_devices or nv_visible_devices .lower () == "all" :
21
22
return 8000
22
23
# 提取第一个数字
@@ -26,14 +27,37 @@ def get_base_port():
26
27
return 8000
27
28
28
29
30
+ def is_port_in_use (port ):
31
+ """检查端口是否被占用"""
32
+ with socket .socket (socket .AF_INET , socket .SOCK_STREAM ) as s :
33
+ return s .connect_ex (("localhost" , port )) == 0
34
+
35
+
36
+ def get_available_port (env_key : str , default_start : int ):
37
+ """从环境变量读取端口,如果未设置或已被占用,则从default_start开始寻找空闲端口"""
38
+ port_str = os .environ .get (env_key )
39
+ if port_str and port_str .isdigit ():
40
+ port = int (port_str )
41
+ if not is_port_in_use (port ):
42
+ return port
43
+ else :
44
+ print (f"Warning: Port { port } from { env_key } is in use, searching for a free port..." )
45
+
46
+ # 从 default_start 开始查找空闲端口
47
+ port = default_start
48
+ while is_port_in_use (port ):
49
+ port += 1
50
+ return port
51
+
52
+
29
53
# 默认参数值
30
54
PID_FILE = "pid_port"
31
55
LOG_FILE = "server.log"
32
56
base_port = get_base_port ()
33
- FLASK_PORT = int ( os . environ . get ( "FLASK_PORT" , base_port + 1 ) )
34
- FD_API_PORT = int ( os . environ . get ( "FD_API_PORT" , base_port + 2 ) )
35
- FD_ENGINE_QUEUE_PORT = int ( os . environ . get ( "FD_ENGINE_QUEUE_PORT" , base_port + 3 ) )
36
- FD_METRICS_PORT = int ( os . environ . get ( "FD_METRICS_PORT" , base_port + 4 ) )
57
+ FLASK_PORT = get_available_port ( "FLASK_PORT" , base_port + 1 )
58
+ FD_API_PORT = get_available_port ( "FD_API_PORT" , FLASK_PORT + 1 )
59
+ FD_ENGINE_QUEUE_PORT = get_available_port ( "FD_ENGINE_QUEUE_PORT" , FD_API_PORT + 1 )
60
+ FD_METRICS_PORT = get_available_port ( "FD_METRICS_PORT" , FD_ENGINE_QUEUE_PORT + 1 )
37
61
DEFAULT_PARAMS = {
38
62
"--port" : FD_API_PORT ,
39
63
"--engine-worker-queue-port" : FD_ENGINE_QUEUE_PORT ,
@@ -73,12 +97,6 @@ def merge_configs(base_config, override_config):
73
97
return merged
74
98
75
99
76
- def is_port_in_use (port ):
77
- """检查端口是否被占用"""
78
- with socket .socket (socket .AF_INET , socket .SOCK_STREAM ) as s :
79
- return s .connect_ex (("localhost" , port )) == 0
80
-
81
-
82
100
def get_server_pid ():
83
101
"""获取服务进程ID PORT"""
84
102
if os .path .exists (PID_FILE ):
@@ -105,7 +123,8 @@ def is_server_running():
105
123
try :
106
124
response = requests .get (health_check_endpoint , timeout = 2 )
107
125
return response .status_code == 200 , result
108
- except requests .exceptions .RequestException :
126
+ except requests .exceptions .RequestException as e :
127
+ print (f"Failed to check server health: { e } " )
109
128
return False , result
110
129
111
130
@@ -158,14 +177,14 @@ def stop_server(signum=None, frame=None):
158
177
except Exception as e :
159
178
print (f"Failed to stop server: { e } " )
160
179
161
- for port in [FD_API_PORT , FD_ENGINE_QUEUE_PORT , FD_METRICS_PORT ]:
162
- try :
163
- output = subprocess .check_output (f"lsof -i:{ port } -t" , shell = True ).decode ().strip ()
164
- for pid in output .splitlines ():
165
- os .kill (int (pid ), signal .SIGKILL )
166
- print (f"Killed process on port { port } , pid={ pid } " )
167
- except Exception as e :
168
- print (f"Failed to killed process on port: { e } " )
180
+ for port in [FD_API_PORT , FD_ENGINE_QUEUE_PORT , FD_METRICS_PORT ]:
181
+ try :
182
+ output = subprocess .check_output (f"lsof -i:{ port } -t" , shell = True ).decode ().strip ()
183
+ for pid in output .splitlines ():
184
+ os .kill (int (pid ), signal .SIGKILL )
185
+ print (f"Killed process on port { port } , pid={ pid } " )
186
+ except Exception as e :
187
+ print (f"Failed to killed process on port: { e } " )
169
188
# 若log目录存在,则重命名为log_timestamp
170
189
if os .path .isdir ("./log" ):
171
190
os .rename ("./log" , "./log_{}" .format (time .strftime ("%Y%m%d%H%M%S" )))
@@ -196,6 +215,7 @@ def start_service():
196
215
base_config = DEFAULT_PARAMS
197
216
198
217
override_config = request .get_json () or {}
218
+ print ("override_config" , override_config )
199
219
200
220
final_config = merge_configs (base_config , override_config )
201
221
0 commit comments