diff --git a/ansible/files/gotrue.service.j2 b/ansible/files/gotrue.service.j2 index 144448cc6..2478e99e6 100644 --- a/ansible/files/gotrue.service.j2 +++ b/ansible/files/gotrue.service.j2 @@ -1,56 +1,14 @@ [Unit] Description=Gotrue -# Avoid starting gotrue while cloud-init is running. It makes a lot of changes -# and I would like to rule out side effects of it running concurrently along -# side services. -After=cloud-init.service -Wants=cloud-init.target - -# Given the fact that auth uses SO_REUSEADDR, I want to rule out capabilities -# being modified between restarts early in boot. This plugs up the scenario that -# EADDRINUSE errors originate from a previous gotrue process starting without -# the SO_REUSEADDR flag (due to lacking capability at that point in boot proc) -# so when the next gotrue starts it can't re-use a slow releasing socket. -After=apparmor.service - -# We want sysctl's to be applied -After=systemd-sysctl.service - -# UFW Is modified by cloud init, but started non-blocking, so configuration -# could be in-flight while gotrue is starting. I want to ensure future rules -# that are relied on for security posture are applied before gotrue runs. -After=ufw.service - -# We need networking & resolution, auth uses the Go DNS resolver (not libc) -# so it's possible `localhost` resolution could be unstable early in startup. We -# care about this because SO_REUSEADDR eligibility checks the tuple -# (proto, family, addr, port) meaning the AF_INET (ipv4, ipv6) could affect the -# binding resulting in a second way for EADDRINUSE errors to surface. -# -# Note: We should consider removing localhost usage given `localhost` resolution -# can often be racey early in boot, can be difficult to debug and offers no real -# advantage in our infra. At the very least avoiding DNS resolved binding would -# be a good idea. -Wants=network-online.target systemd-resolved.service -After=network-online.target systemd-resolved.service - -# Auth server can't start unless postgres is online, lets remove a lot of auth -# server noise during slow starts by requiring it. -Wants=postgresql.service -After=postgresql.service - -# Lower start limit ival and burst to prevent the noisy flapping -StartLimitIntervalSec=10 -StartLimitBurst=5 - [Service] -Type=exec +Type=simple WorkingDirectory=/opt/gotrue - -# Both v2 & v3 need a config-dir for reloading support. +{% if qemu_mode is defined and qemu_mode %} +ExecStart=/opt/gotrue/gotrue +{% else %} ExecStart=/opt/gotrue/gotrue --config-dir /etc/auth.d -ExecReload=/bin/kill -10 $MAINPID +{% endif %} User=gotrue Restart=always @@ -59,36 +17,11 @@ RestartSec=3 MemoryAccounting=true MemoryMax=50% -# These are the historical location of env files. The /etc/auth.d dir will -# override them when present. EnvironmentFile=-/etc/gotrue.generated.env EnvironmentFile=/etc/gotrue.env EnvironmentFile=-/etc/gotrue.overrides.env -# Both v2 & v3 support reloading via signals, on linux this is SIGUSR1. -Environment=GOTRUE_RELOADING_SIGNAL_ENABLED=true -Environment=GOTRUE_RELOADING_SIGNAL_NUMBER=10 - -# Both v2 & v3 disable the poller. While gotrue sets it to off by default we -# defensively set it to false here. -Environment=GOTRUE_RELOADING_POLLER_ENABLED=false - -# Determines how much idle time must pass before triggering a reload. This -# ensures only 1 reload operation occurs during a burst of config updates. -Environment=GOTRUE_RELOADING_GRACE_PERIOD_INTERVAL=2s - -{% if qemu_mode is defined and qemu_mode %} -# v3 does not use filesystem notifications for config reloads. -Environment=GOTRUE_RELOADING_NOTIFY_ENABLED=false -{% else %} -# v2 currently relies on notify support, so we will enable it until both v2 / v3 -# have migrated to strictly use signals across all projects. The default is true -# in gotrue but we will set it defensively here. -Environment=GOTRUE_RELOADING_NOTIFY_ENABLED=true -{% endif %} - Slice=services.slice [Install] WantedBy=multi-user.target - diff --git a/testinfra/test_ami_nix.py b/testinfra/test_ami_nix.py index 42442de18..2325ff3d7 100644 --- a/testinfra/test_ami_nix.py +++ b/testinfra/test_ami_nix.py @@ -374,18 +374,12 @@ def is_healthy(ssh) -> bool: try: result = run_ssh_command(ssh, command) if not result["succeeded"]: - info_text = "" - info_command = f"sudo journalctl -b -u {service} -n 20 --no-pager" - info_result = run_ssh_command(ssh, info_command) - if info_result["succeeded"]: - info_text = "\n" + info_result["stdout"].strip() - - logger.warning(f"{service} not ready{info_text}") + logger.warning(f"{service} not ready") return False - except Exception: logger.warning(f"Connection failed during {service} check") return False + return True while True: