@@ -348,171 +348,41 @@ def gzip_then_base64_encode(s: str) -> str:
348348
349349 def is_healthy (ssh ) -> bool :
350350 health_checks = [
351- < << << << HEAD
352351 ("postgres" , "sudo -u postgres /usr/bin/pg_isready -U postgres" ),
353352 ("adminapi" , f"curl -sf -k --connect-timeout 30 --max-time 60 https://localhost:8085/health -H 'apikey: { supabase_admin_key } '" ),
354353 ("postgrest" , "curl -sf --connect-timeout 30 --max-time 60 http://localhost:3001/ready" ),
355354 ("gotrue" , "curl -sf --connect-timeout 30 --max-time 60 http://localhost:8081/health" ),
356355 ("kong" , "sudo kong health" ),
357356 ("fail2ban" , "sudo fail2ban-client status" ),
358- == == == =
359- (
360- "postgres" ,
361- lambda h : (
362- # First check if PostgreSQL is running
363- h .run ("sudo systemctl is-active postgresql" ),
364- # Then check if the socket directory exists and has correct permissions
365- h .run ("sudo ls -la /run/postgresql" ),
366- # Then try pg_isready
367- h .run ("sudo -u postgres /usr/bin/pg_isready -U postgres" )
368- ),
369- ),
370- (
371- "adminapi" ,
372- lambda h : h .run (
373- f"curl -sf -k --connect-timeout 30 --max-time 60 https://localhost:8085/health -H 'apikey: { supabase_admin_key } '"
374- ),
375- ),
376- (
377- "postgrest" ,
378- lambda h : h .run (
379- "curl -sf --connect-timeout 30 --max-time 60 http://localhost:3001/ready"
380- ),
381- ),
382- (
383- "gotrue" ,
384- lambda h : h .run (
385- "curl -sf --connect-timeout 30 --max-time 60 http://localhost:8081/health"
386- ),
387- ),
388- ("kong" , lambda h : h .run ("sudo kong health" )),
389- ("fail2ban" , lambda h : h .run ("sudo fail2ban-client status" )),
390- > >> >> >> 2 bd7b6d9 (test : more logging for healthcheck )
391357 ]
392358
393359 for service , command in health_checks :
394360 try :
395- << < << < < HEAD
396361 result = run_ssh_command (ssh , command )
397362 if not result ['succeeded' ]:
398363 logger .warning (f"{ service } not ready" )
399364 logger .error (f"{ service } command failed with rc={ cmd .rc } " )
400365 logger .error (f"{ service } stdout: { cmd .stdout } " )
401366 logger .error (f"{ service } stderr: { cmd .stderr } " )
402- == == == =
403- if service == "postgres" :
404- # For PostgreSQL, we need to check multiple things
405- systemd_status , socket_check , pg_isready = check (host )
406- >> >> >> > 2 bd7b6d9 (test : more logging for healthcheck )
407367
408- # Log Nix profile setup checks
409- logger .info ("Checking Nix profile setup:" )
410- nix_profile_result = host .run ("ls -la /home/postgres/.nix-profile" )
411- logger .info (f"Nix profile directory:\n { nix_profile_result .stdout } \n { nix_profile_result .stderr } " )
412-
413- nix_bin_result = host .run ("ls -la /home/postgres/.nix-profile/bin" )
414- logger .info (f"Nix profile bin directory:\n { nix_bin_result .stdout } \n { nix_bin_result .stderr } " )
415-
416- nix_script_result = host .run ("test -x /home/postgres/.nix-profile/bin/switch_pg_cron_version" )
417- logger .info (f"Switch script executable check: { 'success' if not nix_script_result .failed else 'failed' } " )
418-
419- nix_script_output = host .run ("/home/postgres/.nix-profile/bin/switch_pg_cron_version" )
420- logger .info (f"Switch script output:\n { nix_script_output .stdout } \n { nix_script_output .stderr } " )
421-
422368 if systemd_status .failed :
423369 logger .error ("PostgreSQL systemd service is not active" )
424370 logger .error (f"systemd status: { systemd_status .stdout } " )
425371 logger .error (f"systemd error: { systemd_status .stderr } " )
426-
427- # Check systemd service unit file
428- logger .error ("PostgreSQL systemd service unit file:" )
429- result = host .run ("sudo systemctl cat postgresql" )
430- logger .error (f"service unit file:\n { result .stdout } \n { result .stderr } " )
431-
432- # Check systemd service environment
433- logger .error ("PostgreSQL systemd service environment:" )
434- result = host .run ("sudo systemctl show postgresql" )
435- logger .error (f"service environment:\n { result .stdout } \n { result .stderr } " )
436-
437- # Check systemd service dependencies
438- logger .error ("PostgreSQL systemd service dependencies:" )
439- result = host .run ("sudo systemctl list-dependencies postgresql" )
440- logger .error (f"service dependencies:\n { result .stdout } \n { result .stderr } " )
441-
442- # Check if service is enabled
443- logger .error ("PostgreSQL service enabled status:" )
444- result = host .run ("sudo systemctl is-enabled postgresql" )
445- logger .error (f"service enabled status:\n { result .stdout } \n { result .stderr } " )
446-
447- # Check systemd journal for service execution logs
448- logger .error ("Systemd journal entries for PostgreSQL service execution:" )
449- result = host .run ("sudo journalctl -u postgresql -n 100 --no-pager" )
450- logger .error (f"systemd journal:\n { result .stdout } \n { result .stderr } " )
451-
452- # Check systemd journal specifically for ExecStartPre and ExecStart
453- logger .error ("Systemd journal entries for ExecStartPre and ExecStart:" )
454- result = host .run ("sudo journalctl -u postgresql -n 100 --no-pager | grep -E 'ExecStartPre|ExecStart'" )
455- logger .error (f"execution logs:\n { result .stdout } \n { result .stderr } " )
456-
457- # Check systemd journal for any errors
458- logger .error ("Systemd journal entries with error level:" )
459- result = host .run ("sudo journalctl -u postgresql -n 100 --no-pager -p err" )
460- logger .error (f"error logs:\n { result .stdout } \n { result .stderr } " )
461-
462- # Check pre-start script output
463- logger .error ("Checking pre-start script output:" )
464- result = host .run ("sudo -u postgres /usr/local/bin/postgres_prestart.sh" )
465- logger .error (f"pre-start script output:\n { result .stdout } \n { result .stderr } " )
466372
467- # Check PostgreSQL logs directory
468- logger .error ("Checking PostgreSQL logs directory:" )
469- result = host .run ("sudo ls -la /var/log/postgresql/" )
470- logger .error (f"log directory contents:\n { result .stdout } \n { result .stderr } " )
471-
472- # Check any existing PostgreSQL logs
473- logger .error ("Checking existing PostgreSQL logs:" )
474- result = host .run ("sudo cat /var/log/postgresql/*.log" )
475- logger .error (f"postgresql logs:\n { result .stdout } \n { result .stderr } " )
476-
477- # Try starting PostgreSQL directly with pg_ctl and capture output
478- logger .error ("Attempting to start PostgreSQL directly with pg_ctl:" )
479- startup_log = "/tmp/postgres-start.log"
480- result = host .run (f"sudo -u postgres /usr/lib/postgresql/bin/pg_ctl -D /var/lib/postgresql/data start -l { startup_log } " )
481- logger .error (f"pg_ctl start attempt:\n { result .stdout } \n { result .stderr } " )
482-
483- # Check the startup log
484- logger .error ("PostgreSQL startup log:" )
485- result = host .run (f"sudo cat { startup_log } " )
486- logger .error (f"startup log contents:\n { result .stdout } \n { result .stderr } " )
487-
488- # Clean up the startup log
489- result = host .run (f"sudo rm -f { startup_log } " )
490-
491- # Check PostgreSQL configuration
492- logger .error ("PostgreSQL configuration:" )
493- result = host .run ("sudo cat /etc/postgresql/postgresql.conf" )
494- logger .error (f"postgresql.conf:\n { result .stdout } \n { result .stderr } " )
495-
496- # Check PostgreSQL authentication configuration
497- logger .error ("PostgreSQL authentication configuration:" )
498- result = host .run ("sudo cat /etc/postgresql/pg_hba.conf" )
499- logger .error (f"pg_hba.conf:\n { result .stdout } \n { result .stderr } " )
500-
501- # Check PostgreSQL environment
502- logger .error ("PostgreSQL environment:" )
503- result = host .run ("sudo -u postgres env | grep POSTGRES" )
504- logger .error (f"postgres environment:\n { result .stdout } \n { result .stderr } " )
373+ # Run detailed checks since we know we have a working connection
374+ run_detailed_checks (host )
505375
506376 if any (cmd .failed for cmd in [systemd_status , socket_check , pg_isready ]):
507377 return False
508- else :
509- cmd = check (host )
510- if cmd .failed is True :
511- logger .warning (f"{ service } not ready" )
512- logger .error (f"{ service } command failed with rc={ cmd .rc } " )
513- logger .error (f"{ service } stdout: { cmd .stdout } " )
514- logger .error (f"{ service } stderr: { cmd .stderr } " )
515- return False
378+ else :
379+ cmd = check (host )
380+ if cmd .failed is True :
381+ logger .warning (f"{ service } not ready" )
382+ logger .error (f"{ service } command failed with rc={ cmd .rc } " )
383+ logger .error (f"{ service } stdout: { cmd .stdout } " )
384+ logger .error (f"{ service } stderr: { cmd .stderr } " )
385+ return False
516386 except Exception as e :
517387 logger .warning (
518388 f"Connection failed during { service } check, attempting reconnect..."
0 commit comments