Skip to content

Commit f12af67

Browse files
committed
Fix critical server communication error handling
- Add proper exception handling for server_api -> server_pytc communication - Add timeout protection (30s) to prevent hanging requests - Provide clear error messages for ConnectionError and Timeout cases - Return structured error responses with both message and error details - Fixes potential hanging when server_pytc is not running or overloaded All endpoints now handle: - ConnectionError: server_pytc not running - Timeout: server_pytc overloaded/unresponsive - Other exceptions: generic error handling
1 parent 646127f commit f12af67

File tree

1 file changed

+74
-42
lines changed

1 file changed

+74
-42
lines changed

server_api/main.py

Lines changed: 74 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -71,65 +71,97 @@ def ngLayer(data, res, oo=[0, 0, 0], tt="segmentation"):
7171
@app.post("/start_model_training")
7272
async def start_model_training(req: Request):
7373
req = await req.json()
74-
response = requests.post(
75-
REACT_APP_SERVER_PROTOCOL
76-
+ "://"
77-
+ REACT_APP_SERVER_URL
78-
+ "/start_model_training",
79-
json=req,
80-
)
74+
try:
75+
response = requests.post(
76+
REACT_APP_SERVER_PROTOCOL
77+
+ "://"
78+
+ REACT_APP_SERVER_URL
79+
+ "/start_model_training",
80+
json=req,
81+
timeout=30 # TODO: Add timeout to prevent hanging
82+
)
8183

82-
if response.status_code == 200:
83-
return {"message": "Model training started successfully"}
84-
else:
85-
return {"message": "Failed to start model training"}
84+
if response.status_code == 200:
85+
return {"message": "Model training started successfully", "data": response.json()}
86+
else:
87+
return {"message": f"Failed to start model training: {response.status_code}", "error": response.text}
88+
except requests.exceptions.ConnectionError:
89+
return {"message": "Failed to connect to PyTC server. Is server_pytc running?", "error": "ConnectionError"}
90+
except requests.exceptions.Timeout:
91+
return {"message": "Request timed out. PyTC server may be overloaded.", "error": "Timeout"}
92+
except Exception as e:
93+
return {"message": f"Failed to start model training: {str(e)}", "error": str(e)}
8694

8795

8896
@app.post("/stop_model_training")
8997
async def stop_model_training():
90-
response = requests.post(
91-
REACT_APP_SERVER_PROTOCOL
92-
+ "://"
93-
+ REACT_APP_SERVER_URL
94-
+ "/stop_model_training"
95-
)
98+
try:
99+
response = requests.post(
100+
REACT_APP_SERVER_PROTOCOL
101+
+ "://"
102+
+ REACT_APP_SERVER_URL
103+
+ "/stop_model_training",
104+
timeout=30
105+
)
96106

97-
if response.status_code == 200:
98-
return {"message": "Model training stopped successfully"}
99-
else:
100-
return {"message": "Failed to stop model training"}
107+
if response.status_code == 200:
108+
return {"message": "Model training stopped successfully", "data": response.json()}
109+
else:
110+
return {"message": f"Failed to stop model training: {response.status_code}", "error": response.text}
111+
except requests.exceptions.ConnectionError:
112+
return {"message": "Failed to connect to PyTC server. Is server_pytc running?", "error": "ConnectionError"}
113+
except requests.exceptions.Timeout:
114+
return {"message": "Request timed out.", "error": "Timeout"}
115+
except Exception as e:
116+
return {"message": f"Failed to stop model training: {str(e)}", "error": str(e)}
101117

102118

103119
@app.post("/start_model_inference")
104120
async def start_model_inference(req: Request):
105121
req = await req.json()
106-
response = requests.post(
107-
REACT_APP_SERVER_PROTOCOL
108-
+ "://"
109-
+ REACT_APP_SERVER_URL
110-
+ "/start_model_inference",
111-
json=req,
112-
)
122+
try:
123+
response = requests.post(
124+
REACT_APP_SERVER_PROTOCOL
125+
+ "://"
126+
+ REACT_APP_SERVER_URL
127+
+ "/start_model_inference",
128+
json=req,
129+
timeout=30
130+
)
113131

114-
if response.status_code == 200:
115-
return {"message": "Model inference started successfully"}
116-
else:
117-
return {"message": "Failed to start model inference"}
132+
if response.status_code == 200:
133+
return {"message": "Model inference started successfully", "data": response.json()}
134+
else:
135+
return {"message": f"Failed to start model inference: {response.status_code}", "error": response.text}
136+
except requests.exceptions.ConnectionError:
137+
return {"message": "Failed to connect to PyTC server. Is server_pytc running?", "error": "ConnectionError"}
138+
except requests.exceptions.Timeout:
139+
return {"message": "Request timed out. PyTC server may be overloaded.", "error": "Timeout"}
140+
except Exception as e:
141+
return {"message": f"Failed to start model inference: {str(e)}", "error": str(e)}
118142

119143

120144
@app.post("/stop_model_inference")
121145
async def stop_model_inference():
122-
response = requests.post(
123-
REACT_APP_SERVER_PROTOCOL
124-
+ "://"
125-
+ REACT_APP_SERVER_URL
126-
+ "/stop_model_inference"
127-
)
146+
try:
147+
response = requests.post(
148+
REACT_APP_SERVER_PROTOCOL
149+
+ "://"
150+
+ REACT_APP_SERVER_URL
151+
+ "/stop_model_inference",
152+
timeout=30
153+
)
128154

129-
if response.status_code == 200:
130-
return {"message": "Model inference stopped successfully"}
131-
else:
132-
return {"message": "Failed to stop model inference"}
155+
if response.status_code == 200:
156+
return {"message": "Model inference stopped successfully", "data": response.json()}
157+
else:
158+
return {"message": f"Failed to stop model inference: {response.status_code}", "error": response.text}
159+
except requests.exceptions.ConnectionError:
160+
return {"message": "Failed to connect to PyTC server. Is server_pytc running?", "error": "ConnectionError"}
161+
except requests.exceptions.Timeout:
162+
return {"message": "Request timed out.", "error": "Timeout"}
163+
except Exception as e:
164+
return {"message": f"Failed to stop model inference: {str(e)}", "error": str(e)}
133165

134166

135167
@app.get("/get_tensorboard_url")

0 commit comments

Comments
 (0)