@@ -338,89 +338,95 @@ def is_healthy(host, instance_ip, ssh_identity_file) -> bool:
338338 logger .error (f"systemd status: { systemd_status .stdout } " )
339339 logger .error (f"systemd error: { systemd_status .stderr } " )
340340
341- # Get detailed systemd status
342- logger .error ("Detailed systemd status:" )
343- host .run ("sudo systemctl status postgresql -l --no-pager" )
341+ # Check systemd service unit file
342+ logger .error ("PostgreSQL systemd service unit file:" )
343+ result = host .run ("sudo systemctl cat postgresql" )
344+ logger .error (f"service unit file:\n { result .stdout } \n { result .stderr } " )
344345
345- # Check init script logs
346- logger .error ("Init script logs:" )
347- host .run ("sudo journalctl -u cloud-init --no-pager" )
346+ # Check systemd service environment
347+ logger .error ("PostgreSQL systemd service environment:" )
348+ result = host .run ("sudo systemctl show postgresql" )
349+ logger .error (f"service environment:\n { result .stdout } \n { result .stderr } " )
348350
349- # Check cloud-init logs
350- logger .error ("Cloud-init logs:" )
351- host .run ("sudo cat /var/log/cloud-init-output.log" )
351+ # Check systemd service dependencies
352+ logger .error ("PostgreSQL systemd service dependencies:" )
353+ result = host .run ("sudo systemctl list-dependencies postgresql" )
354+ logger .error (f"service dependencies:\n { result .stdout } \n { result .stderr } " )
352355
353- # Check if init script exists and its contents
354- logger .error ("Init script status:" )
355- host .run ("ls -la /tmp/init.sh " )
356- host . run ( "cat /tmp/init.sh " )
356+ # Check if service is enabled
357+ logger .error ("PostgreSQL service enabled status:" )
358+ result = host .run ("sudo systemctl is-enabled postgresql " )
359+ logger . error ( f"service enabled status: \n { result . stdout } \n { result . stderr } " )
357360
358- # Check PostgreSQL configuration
359- logger .error ("PostgreSQL configuration :" )
360- host .run ("sudo cat /etc/ postgresql/*/main/postgresql.conf " )
361- host . run ( "sudo cat /etc/postgresql/*/main/pg_hba.conf " )
361+ # Check systemd journal for service execution logs
362+ logger .error ("Systemd journal entries for PostgreSQL service execution :" )
363+ result = host .run ("sudo journalctl -u postgresql -n 100 --no-pager " )
364+ logger . error ( f"systemd journal: \n { result . stdout } \n { result . stderr } " )
362365
363- # Check PostgreSQL data directory permissions
364- logger .error ("PostgreSQL data directory permissions:" )
365- host .run ("sudo ls -la /var/lib/postgresql/*/main/" )
366+ # Check systemd journal specifically for ExecStartPre and ExecStart
367+ logger .error ("Systemd journal entries for ExecStartPre and ExecStart:" )
368+ result = host .run ("sudo journalctl -u postgresql -n 100 --no-pager | grep -E 'ExecStartPre|ExecStart'" )
369+ logger .error (f"execution logs:\n { result .stdout } \n { result .stderr } " )
366370
367- # Check PostgreSQL startup logs
368- logger .error ("PostgreSQL startup logs:" )
369- host .run ("sudo cat /var/log/postgresql/postgresql-*.log" )
371+ # Check systemd journal for any errors
372+ logger .error ("Systemd journal entries with error level:" )
373+ result = host .run ("sudo journalctl -u postgresql -n 100 --no-pager -p err" )
374+ logger .error (f"error logs:\n { result .stdout } \n { result .stderr } " )
370375
371- # Check systemd journal for PostgreSQL
372- logger .error ("Systemd journal for PostgreSQL:" )
373- host .run ("sudo journalctl -u postgresql -n 100 --no-pager" )
376+ # Check pre-start script output
377+ logger .error ("Checking pre-start script output:" )
378+ result = host .run ("sudo -u postgres /usr/local/bin/postgres_prestart.sh" )
379+ logger .error (f"pre-start script output:\n { result .stdout } \n { result .stderr } " )
374380
375- # Check for any PostgreSQL-related errors in system logs
376- logger .error ("System logs with PostgreSQL errors:" )
377- host .run ("sudo journalctl | grep -i postgres | tail -n 100" )
381+ # Check PostgreSQL logs directory
382+ logger .error ("Checking PostgreSQL logs directory:" )
383+ result = host .run ("sudo ls -la /var/log/postgresql/" )
384+ logger .error (f"log directory contents:\n { result .stdout } \n { result .stderr } " )
378385
379- # Check for any disk space issues
380- logger .error ("Disk space information :" )
381- host .run ("df -h " )
382- host . run ( "sudo du -sh /var/lib/ postgresql/* " )
386+ # Check any existing PostgreSQL logs
387+ logger .error ("Checking existing PostgreSQL logs :" )
388+ result = host .run ("sudo cat /var/log/postgresql/*.log " )
389+ logger . error ( f" postgresql logs: \n { result . stdout } \n { result . stderr } " )
383390
384- # Check for any memory issues
385- logger .error ("Memory information:" )
386- host .run ("free -h" )
391+ # Try starting PostgreSQL directly with pg_ctl and capture output
392+ logger .error ("Attempting to start PostgreSQL directly with pg_ctl:" )
393+ startup_log = "/tmp/postgres-start.log"
394+ result = host .run (f"sudo -u postgres /usr/lib/postgresql/bin/pg_ctl -D /var/lib/postgresql/data start -l { startup_log } " )
395+ logger .error (f"pg_ctl start attempt:\n { result .stdout } \n { result .stderr } " )
387396
388- # Check for any process conflicts
389- logger .error ("Running processes:" )
390- host .run ("ps aux | grep postgres" )
397+ # Check the startup log
398+ logger .error ("PostgreSQL startup log:" )
399+ result = host .run (f"sudo cat { startup_log } " )
400+ logger .error (f"startup log contents:\n { result .stdout } \n { result .stderr } " )
391401
392- # Check for any port conflicts
393- logger .error ("Port usage:" )
394- host .run ("sudo netstat -tulpn | grep 5432" )
395-
396- if socket_check .failed :
397- logger .error ("PostgreSQL socket directory check failed" )
398- logger .error (f"socket check: { socket_check .stdout } " )
399- logger .error (f"socket error: { socket_check .stderr } " )
400-
401- if pg_isready .failed :
402- logger .error ("pg_isready check failed" )
403- logger .error (f"pg_isready output: { pg_isready .stdout } " )
404- logger .error (f"pg_isready error: { pg_isready .stderr } " )
405-
406- # Check PostgreSQL logs for startup issues
407- logger .error ("PostgreSQL logs:" )
408- host .run ("sudo cat /var/log/postgresql/postgresql-*.log" )
409- logger .error ("PostgreSQL systemd status:" )
410- host .run ("sudo systemctl status postgresql" )
411- logger .error ("PostgreSQL journal logs:" )
412- host .run ("sudo journalctl -u postgresql --no-pager" )
413-
414- if any (cmd .failed for cmd in [systemd_status , socket_check , pg_isready ]):
415- return False
416- else :
417- cmd = check (host )
418- if cmd .failed is True :
419- logger .warning (f"{ service } not ready" )
420- logger .error (f"{ service } command failed with rc={ cmd .rc } " )
421- logger .error (f"{ service } stdout: { cmd .stdout } " )
422- logger .error (f"{ service } stderr: { cmd .stderr } " )
423- return False
402+ # Clean up the startup log
403+ result = host .run (f"sudo rm -f { startup_log } " )
404+
405+ # Check PostgreSQL configuration
406+ logger .error ("PostgreSQL configuration:" )
407+ result = host .run ("sudo cat /etc/postgresql/postgresql.conf" )
408+ logger .error (f"postgresql.conf:\n { result .stdout } \n { result .stderr } " )
409+
410+ # Check PostgreSQL authentication configuration
411+ logger .error ("PostgreSQL authentication configuration:" )
412+ result = host .run ("sudo cat /etc/postgresql/pg_hba.conf" )
413+ logger .error (f"pg_hba.conf:\n { result .stdout } \n { result .stderr } " )
414+
415+ # Check PostgreSQL environment
416+ logger .error ("PostgreSQL environment:" )
417+ result = host .run ("sudo -u postgres env | grep POSTGRES" )
418+ logger .error (f"postgres environment:\n { result .stdout } \n { result .stderr } " )
419+
420+ if any (cmd .failed for cmd in [systemd_status , socket_check , pg_isready ]):
421+ return False
422+ else :
423+ cmd = check (host )
424+ if cmd .failed is True :
425+ logger .warning (f"{ service } not ready" )
426+ logger .error (f"{ service } command failed with rc={ cmd .rc } " )
427+ logger .error (f"{ service } stdout: { cmd .stdout } " )
428+ logger .error (f"{ service } stderr: { cmd .stderr } " )
429+ return False
424430 except Exception as e :
425431 logger .warning (
426432 f"Connection failed during { service } check, attempting reconnect..."
0 commit comments