@@ -348,33 +348,109 @@ def gzip_then_base64_encode(s: str) -> str:
348
348
349
349
def is_healthy (ssh ) -> bool :
350
350
health_checks = [
351
+ < << << << HEAD
351
352
("postgres" , "sudo -u postgres /usr/bin/pg_isready -U postgres" ),
352
353
("adminapi" , f"curl -sf -k --connect-timeout 30 --max-time 60 https://localhost:8085/health -H 'apikey: { supabase_admin_key } '" ),
353
354
("postgrest" , "curl -sf --connect-timeout 30 --max-time 60 http://localhost:3001/ready" ),
354
355
("gotrue" , "curl -sf --connect-timeout 30 --max-time 60 http://localhost:8081/health" ),
355
356
("kong" , "sudo kong health" ),
356
357
("fail2ban" , "sudo fail2ban-client status" ),
358
+ == == == =
359
+ (
360
+ "postgres" ,
361
+ lambda h : (
362
+ # First check if PostgreSQL is running
363
+ h .run ("sudo systemctl is-active postgresql" ),
364
+ # Then check if the socket directory exists and has correct permissions
365
+ h .run ("sudo ls -la /run/postgresql" ),
366
+ # Then try pg_isready
367
+ h .run ("sudo -u postgres /usr/bin/pg_isready -U postgres" )
368
+ ),
369
+ ),
370
+ (
371
+ "adminapi" ,
372
+ lambda h : h .run (
373
+ f"curl -sf -k --connect-timeout 30 --max-time 60 https://localhost:8085/health -H 'apikey: { supabase_admin_key } '"
374
+ ),
375
+ ),
376
+ (
377
+ "postgrest" ,
378
+ lambda h : h .run (
379
+ "curl -sf --connect-timeout 30 --max-time 60 http://localhost:3001/ready"
380
+ ),
381
+ ),
382
+ (
383
+ "gotrue" ,
384
+ lambda h : h .run (
385
+ "curl -sf --connect-timeout 30 --max-time 60 http://localhost:8081/health"
386
+ ),
387
+ ),
388
+ ("kong" , lambda h : h .run ("sudo kong health" )),
389
+ ("fail2ban" , lambda h : h .run ("sudo fail2ban-client status" )),
390
+ > >> >> >> 2 bd7b6d9 (test : more logging for healthcheck )
357
391
]
358
392
359
393
for service , command in health_checks :
360
394
try :
395
+ << < << < < HEAD
361
396
result = run_ssh_command (ssh , command )
362
397
if not result ['succeeded' ]:
363
398
logger .warning (f"{ service } not ready" )
364
399
logger .error (f"{ service } command failed with rc={ cmd .rc } " )
365
400
logger .error (f"{ service } stdout: { cmd .stdout } " )
366
401
logger .error (f"{ service } stderr: { cmd .stderr } " )
402
+ == == == =
403
+ if service == "postgres" :
404
+ # For PostgreSQL, we need to check multiple things
405
+ systemd_status , socket_check , pg_isready = check (host )
406
+ >> >> >> > 2 bd7b6d9 (test : more logging for healthcheck )
367
407
368
- # For PostgreSQL, also check the logs and systemd status
369
- if service == "postgres" :
370
- logger .error ("PostgreSQL logs:" )
371
- host .run ("sudo cat /var/log/postgresql/postgresql-*.log" )
372
- logger .error ("PostgreSQL systemd status:" )
373
- host .run ("sudo systemctl status postgresql" )
374
- logger .error ("PostgreSQL journal logs:" )
375
- host .run ("sudo journalctl -u postgresql --no-pager" )
408
+ if systemd_status .failed :
409
+ logger .error ("PostgreSQL systemd service is not active" )
410
+ logger .error (f"systemd status: { systemd_status .stdout } " )
411
+ logger .error (f"systemd error: { systemd_status .stderr } " )
412
+
413
+ # Check init script logs
414
+ logger .error ("Init script logs:" )
415
+ host .run ("sudo journalctl -u cloud-init --no-pager" )
416
+
417
+ # Check cloud-init logs
418
+ logger .error ("Cloud-init logs:" )
419
+ host .run ("sudo cat /var/log/cloud-init-output.log" )
420
+
421
+ # Check if init script exists and its contents
422
+ logger .error ("Init script status:" )
423
+ host .run ("ls -la /tmp/init.sh" )
424
+ host .run ("cat /tmp/init.sh" )
376
425
377
- return False
426
+ if socket_check .failed :
427
+ logger .error ("PostgreSQL socket directory check failed" )
428
+ logger .error (f"socket check: { socket_check .stdout } " )
429
+ logger .error (f"socket error: { socket_check .stderr } " )
430
+
431
+ if pg_isready .failed :
432
+ logger .error ("pg_isready check failed" )
433
+ logger .error (f"pg_isready output: { pg_isready .stdout } " )
434
+ logger .error (f"pg_isready error: { pg_isready .stderr } " )
435
+
436
+ # Check PostgreSQL logs for startup issues
437
+ logger .error ("PostgreSQL logs:" )
438
+ host .run ("sudo cat /var/log/postgresql/postgresql-*.log" )
439
+ logger .error ("PostgreSQL systemd status:" )
440
+ host .run ("sudo systemctl status postgresql" )
441
+ logger .error ("PostgreSQL journal logs:" )
442
+ host .run ("sudo journalctl -u postgresql --no-pager" )
443
+
444
+ if any (cmd .failed for cmd in [systemd_status , socket_check , pg_isready ]):
445
+ return False
446
+ else :
447
+ cmd = check (host )
448
+ if cmd .failed is True :
449
+ logger .warning (f"{ service } not ready" )
450
+ logger .error (f"{ service } command failed with rc={ cmd .rc } " )
451
+ logger .error (f"{ service } stdout: { cmd .stdout } " )
452
+ logger .error (f"{ service } stderr: { cmd .stderr } " )
453
+ return False
378
454
except Exception as e :
379
455
logger .warning (
380
456
f"Connection failed during { service } check, attempting reconnect..."
0 commit comments