Skip to content

Commit b8c9e31

Browse files
committed
test: use the right alias on machine
1 parent 4ab837e commit b8c9e31

File tree

3 files changed

+51
-109
lines changed

3 files changed

+51
-109
lines changed

.github/workflows/testinfra-nix.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,13 +108,13 @@ jobs:
108108
df -h / # Display available space
109109
110110
- name: Run tests
111-
timeout-minutes: 10
111+
timeout-minutes: 30
112112
env:
113113
AMI_NAME: "supabase-postgres-${{ steps.random.outputs.random_string }}"
114114
run: |
115115
# TODO: use poetry for pkg mgmt
116116
pip3 install boto3 boto3-stubs[essential] docker ec2instanceconnectcli pytest pytest-testinfra[paramiko,docker] requests
117-
pytest -vv -s testinfra/test_ami_nix.py
117+
pytest -vvvv -s testinfra/test_ami_nix.py
118118
119119
- name: Cleanup resources on build cancellation
120120
if: ${{ cancelled() }}

ansible/files/postgres_prestart.sh.j2

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ get_pg_cron_version() {
4141
fi
4242

4343
local version
44-
version=$(sudo -u postgres /home/postgres/.nix-profile/bin/jq -r '.pg_cron // empty' "/root/pg_extensions.json")
44+
version=$(sudo -u postgres /var/lib/postgresql/.nix-profile/bin/jq -r '.pg_cron // empty' "/root/pg_extensions.json")
4545
if [ -z "$version" ]; then
4646
echo "pg_cron: Not specified in extensions file"
4747
return
@@ -57,7 +57,7 @@ get_pg_cron_version() {
5757

5858
switch_pg_cron_version() {
5959
local version="$1"
60-
local switch_script="/home/postgres/.nix-profile/bin/switch_pg_cron_version"
60+
local switch_script="/var/lib/postgresql/.nix-profile/bin/switch_pg_cron_version"
6161

6262
if [ ! -x "$switch_script" ]; then
6363
echo "pg_cron: No version switch script available"

testinfra/test_ami_nix.py

Lines changed: 47 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -292,16 +292,48 @@ def get_ssh_connection(instance_ip, ssh_identity_file, max_retries=10):
292292
temp_key.get_priv_key_file(),
293293
)
294294

295+
def run_detailed_checks(host):
296+
logger.info("Running detailed system checks...")
297+
298+
# Log Nix profile setup checks
299+
logger.info("Checking Nix profile setup:")
300+
nix_profile_result = host.run("ls -la /var/lib/postgresql/.nix-profile")
301+
logger.info(f"Nix profile directory:\n{nix_profile_result.stdout}\n{nix_profile_result.stderr}")
302+
303+
nix_bin_result = host.run("ls -la /var/lib/postgresql/.nix-profile/bin")
304+
logger.info(f"Nix profile bin directory:\n{nix_bin_result.stdout}\n{nix_bin_result.stderr}")
305+
306+
# Check PostgreSQL logs directory
307+
logger.info("Checking PostgreSQL logs directory:")
308+
result = host.run("sudo ls -la /var/log/postgresql/")
309+
logger.info(f"log directory contents:\n{result.stdout}\n{result.stderr}")
310+
311+
# Check any existing PostgreSQL logs
312+
logger.info("Checking existing PostgreSQL logs:")
313+
result = host.run("sudo cat /var/log/postgresql/*.log")
314+
logger.info(f"postgresql logs:\n{result.stdout}\n{result.stderr}")
315+
316+
# Try starting PostgreSQL directly with pg_ctl and capture output
317+
logger.info("Attempting to start PostgreSQL directly with pg_ctl:")
318+
startup_log = "/tmp/postgres-start.log"
319+
result = host.run(f"sudo -u postgres /usr/lib/postgresql/bin/pg_ctl -D /var/lib/postgresql/data start -l {startup_log}")
320+
logger.info(f"pg_ctl start attempt:\n{result.stdout}\n{result.stderr}")
321+
322+
# Check the startup log
323+
logger.info("PostgreSQL startup log:")
324+
result = host.run(f"sudo cat {startup_log}")
325+
logger.info(f"startup log contents:\n{result.stdout}\n{result.stderr}")
326+
327+
# Check PostgreSQL environment
328+
logger.info("PostgreSQL environment:")
329+
result = host.run("sudo -u postgres env | grep POSTGRES")
330+
logger.info(f"postgres environment:\n{result.stdout}\n{result.stderr}")
331+
295332
def is_healthy(host, instance_ip, ssh_identity_file) -> bool:
296333
health_checks = [
297334
(
298335
"postgres",
299336
lambda h: (
300-
# First check if PostgreSQL is running
301-
h.run("sudo systemctl is-active postgresql"),
302-
# Then check if the socket directory exists and has correct permissions
303-
h.run("sudo ls -la /run/postgresql"),
304-
# Then try pg_isready
305337
h.run("sudo -u postgres /usr/bin/pg_isready -U postgres")
306338
),
307339
),
@@ -333,114 +365,24 @@ def is_healthy(host, instance_ip, ssh_identity_file) -> bool:
333365
# For PostgreSQL, we need to check multiple things
334366
systemd_status, socket_check, pg_isready = check(host)
335367

336-
# Log Nix profile setup checks
337-
logger.info("Checking Nix profile setup:")
338-
nix_profile_result = host.run("ls -la /home/postgres/.nix-profile")
339-
logger.info(f"Nix profile directory:\n{nix_profile_result.stdout}\n{nix_profile_result.stderr}")
340-
341-
nix_bin_result = host.run("ls -la /home/postgres/.nix-profile/bin")
342-
logger.info(f"Nix profile bin directory:\n{nix_bin_result.stdout}\n{nix_bin_result.stderr}")
343-
344-
nix_script_result = host.run("test -x /home/postgres/.nix-profile/bin/switch_pg_cron_version")
345-
logger.info(f"Switch script executable check: {'success' if not nix_script_result.failed else 'failed'}")
346-
347-
nix_script_output = host.run("/home/postgres/.nix-profile/bin/switch_pg_cron_version")
348-
logger.info(f"Switch script output:\n{nix_script_output.stdout}\n{nix_script_output.stderr}")
349-
350368
if systemd_status.failed:
351369
logger.error("PostgreSQL systemd service is not active")
352370
logger.error(f"systemd status: {systemd_status.stdout}")
353371
logger.error(f"systemd error: {systemd_status.stderr}")
354-
355-
# Check systemd service unit file
356-
logger.error("PostgreSQL systemd service unit file:")
357-
result = host.run("sudo systemctl cat postgresql")
358-
logger.error(f"service unit file:\n{result.stdout}\n{result.stderr}")
359-
360-
# Check systemd service environment
361-
logger.error("PostgreSQL systemd service environment:")
362-
result = host.run("sudo systemctl show postgresql")
363-
logger.error(f"service environment:\n{result.stdout}\n{result.stderr}")
364-
365-
# Check systemd service dependencies
366-
logger.error("PostgreSQL systemd service dependencies:")
367-
result = host.run("sudo systemctl list-dependencies postgresql")
368-
logger.error(f"service dependencies:\n{result.stdout}\n{result.stderr}")
369-
370-
# Check if service is enabled
371-
logger.error("PostgreSQL service enabled status:")
372-
result = host.run("sudo systemctl is-enabled postgresql")
373-
logger.error(f"service enabled status:\n{result.stdout}\n{result.stderr}")
374-
375-
# Check systemd journal for service execution logs
376-
logger.error("Systemd journal entries for PostgreSQL service execution:")
377-
result = host.run("sudo journalctl -u postgresql -n 100 --no-pager")
378-
logger.error(f"systemd journal:\n{result.stdout}\n{result.stderr}")
379-
380-
# Check systemd journal specifically for ExecStartPre and ExecStart
381-
logger.error("Systemd journal entries for ExecStartPre and ExecStart:")
382-
result = host.run("sudo journalctl -u postgresql -n 100 --no-pager | grep -E 'ExecStartPre|ExecStart'")
383-
logger.error(f"execution logs:\n{result.stdout}\n{result.stderr}")
384-
385-
# Check systemd journal for any errors
386-
logger.error("Systemd journal entries with error level:")
387-
result = host.run("sudo journalctl -u postgresql -n 100 --no-pager -p err")
388-
logger.error(f"error logs:\n{result.stdout}\n{result.stderr}")
389372

390-
# Check pre-start script output
391-
logger.error("Checking pre-start script output:")
392-
result = host.run("sudo -u postgres /usr/local/bin/postgres_prestart.sh")
393-
logger.error(f"pre-start script output:\n{result.stdout}\n{result.stderr}")
394-
395-
# Check PostgreSQL logs directory
396-
logger.error("Checking PostgreSQL logs directory:")
397-
result = host.run("sudo ls -la /var/log/postgresql/")
398-
logger.error(f"log directory contents:\n{result.stdout}\n{result.stderr}")
399-
400-
# Check any existing PostgreSQL logs
401-
logger.error("Checking existing PostgreSQL logs:")
402-
result = host.run("sudo cat /var/log/postgresql/*.log")
403-
logger.error(f"postgresql logs:\n{result.stdout}\n{result.stderr}")
404-
405-
# Try starting PostgreSQL directly with pg_ctl and capture output
406-
logger.error("Attempting to start PostgreSQL directly with pg_ctl:")
407-
startup_log = "/tmp/postgres-start.log"
408-
result = host.run(f"sudo -u postgres /usr/lib/postgresql/bin/pg_ctl -D /var/lib/postgresql/data start -l {startup_log}")
409-
logger.error(f"pg_ctl start attempt:\n{result.stdout}\n{result.stderr}")
410-
411-
# Check the startup log
412-
logger.error("PostgreSQL startup log:")
413-
result = host.run(f"sudo cat {startup_log}")
414-
logger.error(f"startup log contents:\n{result.stdout}\n{result.stderr}")
415-
416-
# Clean up the startup log
417-
result = host.run(f"sudo rm -f {startup_log}")
418-
419-
# Check PostgreSQL configuration
420-
logger.error("PostgreSQL configuration:")
421-
result = host.run("sudo cat /etc/postgresql/postgresql.conf")
422-
logger.error(f"postgresql.conf:\n{result.stdout}\n{result.stderr}")
423-
424-
# Check PostgreSQL authentication configuration
425-
logger.error("PostgreSQL authentication configuration:")
426-
result = host.run("sudo cat /etc/postgresql/pg_hba.conf")
427-
logger.error(f"pg_hba.conf:\n{result.stdout}\n{result.stderr}")
428-
429-
# Check PostgreSQL environment
430-
logger.error("PostgreSQL environment:")
431-
result = host.run("sudo -u postgres env | grep POSTGRES")
432-
logger.error(f"postgres environment:\n{result.stdout}\n{result.stderr}")
373+
# Run detailed checks since we know we have a working connection
374+
run_detailed_checks(host)
433375

434376
if any(cmd.failed for cmd in [systemd_status, socket_check, pg_isready]):
435377
return False
436-
else:
437-
cmd = check(host)
438-
if cmd.failed is True:
439-
logger.warning(f"{service} not ready")
440-
logger.error(f"{service} command failed with rc={cmd.rc}")
441-
logger.error(f"{service} stdout: {cmd.stdout}")
442-
logger.error(f"{service} stderr: {cmd.stderr}")
443-
return False
378+
else:
379+
cmd = check(host)
380+
if cmd.failed is True:
381+
logger.warning(f"{service} not ready")
382+
logger.error(f"{service} command failed with rc={cmd.rc}")
383+
logger.error(f"{service} stdout: {cmd.stdout}")
384+
logger.error(f"{service} stderr: {cmd.stderr}")
385+
return False
444386
except Exception as e:
445387
logger.warning(
446388
f"Connection failed during {service} check, attempting reconnect..."

0 commit comments

Comments
 (0)