@@ -296,7 +296,14 @@ def is_healthy(host, instance_ip, ssh_identity_file) -> bool:
296296 health_checks = [
297297 (
298298 "postgres" ,
299- lambda h : h .run ("sudo -u postgres /usr/bin/pg_isready -U postgres" ),
299+ lambda h : (
300+ # First check if PostgreSQL is running
301+ h .run ("sudo systemctl is-active postgresql" ),
302+ # Then check if the socket directory exists and has correct permissions
303+ h .run ("sudo ls -la /run/postgresql" ),
304+ # Then try pg_isready
305+ h .run ("sudo -u postgres /usr/bin/pg_isready -U postgres" )
306+ ),
300307 ),
301308 (
302309 "adminapi" ,
@@ -322,23 +329,56 @@ def is_healthy(host, instance_ip, ssh_identity_file) -> bool:
322329
323330 for service , check in health_checks :
324331 try :
325- cmd = check (host )
326- if cmd .failed is True :
327- logger .warning (f"{ service } not ready" )
328- logger .error (f"{ service } command failed with rc={ cmd .rc } " )
329- logger .error (f"{ service } stdout: { cmd .stdout } " )
330- logger .error (f"{ service } stderr: { cmd .stderr } " )
332+ if service == "postgres" :
333+ # For PostgreSQL, we need to check multiple things
334+ systemd_status , socket_check , pg_isready = check (host )
335+
336+ if systemd_status .failed :
337+ logger .error ("PostgreSQL systemd service is not active" )
338+ logger .error (f"systemd status: { systemd_status .stdout } " )
339+ logger .error (f"systemd error: { systemd_status .stderr } " )
340+
341+ # Check init script logs
342+ logger .error ("Init script logs:" )
343+ host .run ("sudo journalctl -u cloud-init --no-pager" )
344+
345+ # Check cloud-init logs
346+ logger .error ("Cloud-init logs:" )
347+ host .run ("sudo cat /var/log/cloud-init-output.log" )
348+
349+ # Check if init script exists and its contents
350+ logger .error ("Init script status:" )
351+ host .run ("ls -la /tmp/init.sh" )
352+ host .run ("cat /tmp/init.sh" )
353+
354+ if socket_check .failed :
355+ logger .error ("PostgreSQL socket directory check failed" )
356+ logger .error (f"socket check: { socket_check .stdout } " )
357+ logger .error (f"socket error: { socket_check .stderr } " )
358+
359+ if pg_isready .failed :
360+ logger .error ("pg_isready check failed" )
361+ logger .error (f"pg_isready output: { pg_isready .stdout } " )
362+ logger .error (f"pg_isready error: { pg_isready .stderr } " )
331363
332- # For PostgreSQL, also check the logs and systemd status
333- if service == "postgres" :
334- logger .error ("PostgreSQL logs:" )
335- host .run ("sudo cat /var/log/postgresql/postgresql-*.log" )
336- logger .error ("PostgreSQL systemd status:" )
337- host .run ("sudo systemctl status postgresql" )
338- logger .error ("PostgreSQL journal logs:" )
339- host .run ("sudo journalctl -u postgresql --no-pager" )
364+ # Check PostgreSQL logs for startup issues
365+ logger .error ("PostgreSQL logs:" )
366+ host .run ("sudo cat /var/log/postgresql/postgresql-*.log" )
367+ logger .error ("PostgreSQL systemd status:" )
368+ host .run ("sudo systemctl status postgresql" )
369+ logger .error ("PostgreSQL journal logs:" )
370+ host .run ("sudo journalctl -u postgresql --no-pager" )
340371
341- return False
372+ if any (cmd .failed for cmd in [systemd_status , socket_check , pg_isready ]):
373+ return False
374+ else :
375+ cmd = check (host )
376+ if cmd .failed is True :
377+ logger .warning (f"{ service } not ready" )
378+ logger .error (f"{ service } command failed with rc={ cmd .rc } " )
379+ logger .error (f"{ service } stdout: { cmd .stdout } " )
380+ logger .error (f"{ service } stderr: { cmd .stderr } " )
381+ return False
342382 except Exception as e :
343383 logger .warning (
344384 f"Connection failed during { service } check, attempting reconnect..."
0 commit comments