Skip to content

Commit 164752b

Browse files
committed
test: restructure checks to avoid race
1 parent c2631e8 commit 164752b

File tree

1 file changed

+30
-18
lines changed

1 file changed

+30
-18
lines changed

testinfra/test_ami_nix.py

Lines changed: 30 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,9 @@ def run_detailed_checks(host):
323323
result = host.run("sudo -u postgres env | grep POSTGRES")
324324
logger.info(f"postgres environment:\n{result.stdout}\n{result.stderr}")
325325

326-
def is_healthy(host, instance_ip, ssh_identity_file) -> bool:
326+
def is_healthy(host, instance_ip, ssh_identity_file) -> tuple[bool, dict]:
327+
service_status = {} # Track status of each service
328+
327329
health_checks = [
328330
(
329331
"postgres",
@@ -358,10 +360,9 @@ def is_healthy(host, instance_ip, ssh_identity_file) -> bool:
358360
if service == "postgres":
359361
pg_isready = check(host)
360362

361-
# Always read and log the PostgreSQL logs first
363+
# Always read and log the PostgreSQL logs
362364
logger.warning("PostgreSQL status check:")
363365
try:
364-
# Read both .log and .csv files
365366
log_files = [
366367
"/var/log/postgresql/*.log",
367368
"/var/log/postgresql/*.csv"
@@ -379,37 +380,48 @@ def is_healthy(host, instance_ip, ssh_identity_file) -> bool:
379380
except Exception as e:
380381
logger.error(f"Error reading PostgreSQL logs: {str(e)}")
381382

382-
# Then check the status and return
383-
if not pg_isready.failed:
384-
continue
385-
# Wait before next attempt
386-
sleep(5)
387-
return False
383+
service_status[service] = not pg_isready.failed
384+
388385
else:
389386
cmd = check(host)
390-
if cmd.failed is True:
387+
service_status[service] = not cmd.failed
388+
if cmd.failed:
391389
logger.warning(f"{service} not ready")
392390
logger.error(f"{service} command failed with rc={cmd.rc}")
393391
logger.error(f"{service} stdout: {cmd.stdout}")
394392
logger.error(f"{service} stderr: {cmd.stderr}")
395-
return False
393+
396394
except Exception as e:
397-
logger.warning(
398-
f"Connection failed during {service} check, attempting reconnect..."
399-
)
395+
logger.warning(f"Connection failed during {service} check, attempting reconnect...")
400396
logger.error(f"Error details: {str(e)}")
401397
host = get_ssh_connection(instance_ip, ssh_identity_file)
402-
return False
398+
service_status[service] = False
399+
400+
# Log overall status of all services
401+
logger.info("Service health status:")
402+
for service, healthy in service_status.items():
403+
logger.info(f"{service}: {'healthy' if healthy else 'unhealthy'}")
403404

404-
return True
405+
# If any service is unhealthy, wait and return False with status
406+
if not all(service_status.values()):
407+
if service_status.get("postgres", False): # If postgres is healthy but others aren't
408+
sleep(5) # Only wait if postgres is up but other services aren't
409+
logger.warning("Some services are not healthy, will retry...")
410+
return False, service_status
411+
412+
logger.info("All services are healthy, proceeding to tests...")
413+
return True, service_status
405414

406415
while True:
407-
if is_healthy(
416+
healthy, status = is_healthy(
408417
host=host,
409418
instance_ip=instance.public_ip_address,
410419
ssh_identity_file=temp_key.get_priv_key_file(),
411-
):
420+
)
421+
if healthy:
422+
logger.info("Health check passed, starting tests...")
412423
break
424+
logger.warning(f"Health check failed, service status: {status}")
413425
sleep(1)
414426

415427
# return a testinfra connection to the instance

0 commit comments

Comments
 (0)