Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 72 additions & 5 deletions ansible/files/gotrue.service.j2
Original file line number Diff line number Diff line change
@@ -1,14 +1,56 @@
[Unit]
Description=Gotrue

# Avoid starting gotrue while cloud-init is running. It makes a lot of changes
# and I would like to rule out side effects of it running concurrently along
# side services.
After=cloud-init.service
Wants=cloud-init.target

# Given the fact that auth uses SO_REUSEADDR, I want to rule out capabilities
# being modified between restarts early in boot. This plugs up the scenario that
# EADDRINUSE errors originate from a previous gotrue process starting without
# the SO_REUSEADDR flag (due to lacking capability at that point in boot proc)
# so when the next gotrue starts it can't re-use a slow releasing socket.
After=apparmor.service

# We want sysctl's to be applied
After=systemd-sysctl.service

# UFW Is modified by cloud init, but started non-blocking, so configuration
# could be in-flight while gotrue is starting. I want to ensure future rules
# that are relied on for security posture are applied before gotrue runs.
After=ufw.service

# We need networking & resolution, auth uses the Go DNS resolver (not libc)
# so it's possible `localhost` resolution could be unstable early in startup. We
# care about this because SO_REUSEADDR eligibility checks the tuple
# (proto, family, addr, port) meaning the AF_INET (ipv4, ipv6) could affect the
# binding resulting in a second way for EADDRINUSE errors to surface.
#
# Note: We should consider removing localhost usage given `localhost` resolution
# can often be racey early in boot, can be difficult to debug and offers no real
# advantage in our infra. At the very least avoiding DNS resolved binding would
# be a good idea.
Wants=network-online.target systemd-resolved.service
After=network-online.target systemd-resolved.service

# Auth server can't start unless postgres is online, lets remove a lot of auth
# server noise during slow starts by requiring it.
Wants=postgresql.service
After=postgresql.service

# Lower start limit ival and burst to prevent the noisy flapping
StartLimitIntervalSec=10
StartLimitBurst=5

[Service]
Type=simple
Type=exec
WorkingDirectory=/opt/gotrue
{% if qemu_mode is defined and qemu_mode %}
ExecStart=/opt/gotrue/gotrue
{% else %}

# Both v2 & v3 need a config-dir for reloading support.
ExecStart=/opt/gotrue/gotrue --config-dir /etc/auth.d
{% endif %}
ExecReload=/bin/kill -10 $MAINPID

User=gotrue
Restart=always
Expand All @@ -17,11 +59,36 @@ RestartSec=3
MemoryAccounting=true
MemoryMax=50%

# These are the historical location of env files. The /etc/auth.d dir will
# override them when present.
EnvironmentFile=-/etc/gotrue.generated.env
EnvironmentFile=/etc/gotrue.env
EnvironmentFile=-/etc/gotrue.overrides.env

# Both v2 & v3 support reloading via signals, on linux this is SIGUSR1.
Environment=GOTRUE_RELOADING_SIGNAL_ENABLED=true
Environment=GOTRUE_RELOADING_SIGNAL_NUMBER=10

# Both v2 & v3 disable the poller. While gotrue sets it to off by default we
# defensively set it to false here.
Environment=GOTRUE_RELOADING_POLLER_ENABLED=false

# Determines how much idle time must pass before triggering a reload. This
# ensures only 1 reload operation occurs during a burst of config updates.
Environment=GOTRUE_RELOADING_GRACE_PERIOD_INTERVAL=2s

{% if qemu_mode is defined and qemu_mode %}
# v3 does not use filesystem notifications for config reloads.
Environment=GOTRUE_RELOADING_NOTIFY_ENABLED=false
{% else %}
# v2 currently relies on notify support, so we will enable it until both v2 / v3
# have migrated to strictly use signals across all projects. The default is true
# in gotrue but we will set it defensively here.
Environment=GOTRUE_RELOADING_NOTIFY_ENABLED=true
{% endif %}

Slice=services.slice

[Install]
WantedBy=multi-user.target

10 changes: 8 additions & 2 deletions testinfra/test_ami_nix.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,12 +374,18 @@ def is_healthy(ssh) -> bool:
try:
result = run_ssh_command(ssh, command)
if not result["succeeded"]:
logger.warning(f"{service} not ready")
info_text = ""
info_command = f"sudo journalctl -b -u {service} -n 20 --no-pager"
info_result = run_ssh_command(ssh, info_command)
if info_result["succeeded"]:
info_text = "\n" + info_result["stdout"].strip()

logger.warning(f"{service} not ready{info_text}")
return False

except Exception:
logger.warning(f"Connection failed during {service} check")
return False

return True

while True:
Expand Down