Skip to content
230 changes: 230 additions & 0 deletions ansible/tasks/setup-coredumps.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,230 @@
---
# Setup coredump collection for debugging PostgreSQL crashes
# This configuration is temporary and should be cleaned up after debugging

- name: Create coredump directory
become: yes
file:
path: /pg_coredump_debug
state: directory
mode: '0777'
owner: root
group: root
when: stage2_nix

- name: Configure kernel core pattern
become: yes
ansible.posix.sysctl:
name: kernel.core_pattern
value: '/pg_coredump_debug/core.%e.%p.%t'
state: present
sysctl_file: /etc/sysctl.d/99-coredump.conf
reload: yes
when: stage2_nix

- name: Enable core dumps with PID
become: yes
ansible.posix.sysctl:
name: kernel.core_uses_pid
value: '1'
state: present
sysctl_file: /etc/sysctl.d/99-coredump.conf
reload: yes
when: stage2_nix

- name: Enable SUID dumpable
become: yes
ansible.posix.sysctl:
name: fs.suid_dumpable
value: '1'
state: present
sysctl_file: /etc/sysctl.d/99-coredump.conf
reload: yes
when: stage2_nix

- name: Create systemd system.conf.d directory
become: yes
file:
path: /etc/systemd/system.conf.d
state: directory
mode: '0755'
owner: root
group: root
when: stage2_nix

- name: Configure systemd for coredumps
become: yes
copy:
content: |
# Temporary coredump configuration - remove after debugging
[Manager]
DefaultLimitCORE=infinity
DumpCore=yes
dest: /etc/systemd/system.conf.d/50-coredump.conf
mode: '0644'
owner: root
group: root
when: stage2_nix

- name: Configure security limits for coredumps
become: yes
blockinfile:
path: /etc/security/limits.conf
block: |
# Temporary coredump limits - remove after debugging
* hard core 50000
* soft core 50000
marker: "# {mark} ANSIBLE MANAGED BLOCK - COREDUMP"
state: present
when: stage2_nix

- name: Check current GRUB_CMDLINE_LINUX_DEFAULT
become: yes
shell: |
if grep -q '^GRUB_CMDLINE_LINUX_DEFAULT=' /etc/default/grub; then
grep '^GRUB_CMDLINE_LINUX_DEFAULT=' /etc/default/grub | sed 's/^GRUB_CMDLINE_LINUX_DEFAULT="\(.*\)"$/\1/'
else
echo ""
fi
register: current_grub_cmdline
changed_when: false
when: stage2_nix

- name: Check for GRUB_CMDLINE_LINUX_DEFAULT in grub.d files
become: yes
shell: grep -l '^GRUB_CMDLINE_LINUX_DEFAULT=' /etc/default/grub.d/* 2>/dev/null || true
register: grub_d_files
changed_when: false
when: stage2_nix

- name: Prepare GRUB cmdline with coredump_filter
set_fact:
new_grub_cmdline: "{{ (current_grub_cmdline.stdout | regex_replace('coredump_filter=[0-9]+', '') | trim + ' coredump_filter=49') | trim }}"
when: stage2_nix

- name: Update GRUB configuration for coredump_filter in main grub file
become: yes
lineinfile:
path: /etc/default/grub
regexp: '^GRUB_CMDLINE_LINUX_DEFAULT='
line: 'GRUB_CMDLINE_LINUX_DEFAULT="{{ new_grub_cmdline }}"'
create: yes
when:
- stage2_nix
- grub_d_files.stdout == ""
register: grub_main_updated

- name: Update GRUB configuration for coredump_filter in grub.d files
become: yes
shell: |
for file in {{ grub_d_files.stdout }}; do
current=$(grep '^GRUB_CMDLINE_LINUX_DEFAULT=' "$file" | sed 's/^GRUB_CMDLINE_LINUX_DEFAULT="\(.*\)"$/\1/')
new=$(echo "$current" | sed 's/coredump_filter=[0-9]*//g')
new="${new} coredump_filter=49"
sed -i "s/^GRUB_CMDLINE_LINUX_DEFAULT=.*/GRUB_CMDLINE_LINUX_DEFAULT=\"${new}\"/" "$file"
done
when:
- stage2_nix
- grub_d_files.stdout != ""
register: grub_d_updated

- name: Update GRUB after configuration changes
become: yes
command: update-grub
when:
- stage2_nix
- (grub_main_updated is defined and grub_main_updated.changed) or (grub_d_updated is defined and grub_d_updated.changed)

- name: Install gdb for debugging
become: yes
apt:
pkg:
- gdb
state: present
update_cache: yes
cache_valid_time: 3600
when: stage2_nix

- name: Create root .gdbinit configuration
become: yes
copy:
content: |
# GDB configuration for PostgreSQL debugging
# Note: debug-file-directory and substitute-path need to be set manually
# after installing debug symbols and source files from nix
set auto-load safe-path /
# Example paths - will be set dynamically when debugging:
# set debug-file-directory /nix/store/<hash>-postgresql-<version>-debug/lib/debug/
# set substitute-path ./ /nix/store/<hash>-postgresql-<version>-src-<version>/
dest: /root/.gdbinit
mode: '0644'
owner: root
group: root
when: stage2_nix

- name: Create postgres user .gdbinit configuration
become: yes
become_user: postgres
copy:
content: |
# GDB configuration for PostgreSQL debugging
# Note: debug-file-directory and substitute-path need to be set manually
# after installing debug symbols and source files from nix
set auto-load safe-path /
# Example paths - will be set dynamically when debugging:
# set debug-file-directory /nix/store/<hash>-postgresql-<version>-debug/lib/debug/
# set substitute-path ./ /nix/store/<hash>-postgresql-<version>-src-<version>/
dest: /var/lib/postgresql/.gdbinit
mode: '0644'
owner: postgres
group: postgres
when: stage2_nix

- name: Create coredump setup documentation
become: yes
copy:
content: |
# PostgreSQL Coredump Configuration

This AMI has been configured to collect PostgreSQL coredumps for debugging.

## Configuration Files Modified:
- /etc/sysctl.d/99-coredump.conf - Kernel coredump settings
- /etc/systemd/system.conf.d/50-coredump.conf - Systemd coredump settings
- /etc/security/limits.conf - User limits for coredump size
- /etc/default/grub - GRUB configuration for coredump_filter

## Coredump Location:
Coredumps are saved to: /pg_coredump_debug/

## Debug Symbols:
Debug symbols and source files are already installed via nix during the build.
To find the paths:
```
sudo -u postgres nix profile list | grep postgresql_.*_debug
sudo -u postgres nix profile list | grep postgresql_.*_src
```

## Using GDB:
To analyze a coredump:
```
sudo gdb postmaster -c /pg_coredump_debug/<core_file>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had to pass full path instead of just postmaster:

sudo gdb /nix/store/56va3gd07cakgapybcw4iq1irhx9lryx-postgresql-and-plugins-17_11/bin/.postgres-wrapped -c /pg_coredump_debug/core..postgres-wrapp.2150.1755075766

I'm not sure how properly can I find the path. One can try:

ls -lh /nix/store/*/bin/.postgres-wrapped
-r-xr-xr-x 3 root root 11M Jan  1  1970 /nix/store/56va3gd07cakgapybcw4iq1irhx9lryx-postgresql-and-plugins-17_11/bin/.postgres-wrapped

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you can also find it in /var/lib/postgresql/.nix-profile/bin so there actually is already a conventional location (I just used the same "home" dir for the postgres user that infra team created before I did all the nix packaging, to make it easier for people to orient themselves who were already familiar with /var/lib/postgresql homedir)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In my case I had to use the .postgres-wrapped file which isn't in /var/lib/postgresql/.nix-profile/bin. TBH not sure how it is related to the original postgres binary file.

```

Then in gdb, set the debug symbols path:
```
symbol-file /nix/store/<hash>-postgresql-<version>-debug/lib/debug/postgres
```

## Cleanup:
To disable coredump collection after debugging:
1. Remove /etc/sysctl.d/99-coredump.conf
2. Remove /etc/systemd/system.conf.d/50-coredump.conf
3. Remove coredump block from /etc/security/limits.conf
4. Delete coredumps from /pg_coredump_debug/
5. Run: sudo sysctl -p && sudo systemctl daemon-reload
dest: /pg_coredump_debug/README.md
mode: '0644'
owner: root
group: root
when: stage2_nix
5 changes: 5 additions & 0 deletions ansible/tasks/setup-postgres.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
- name: Setup coredump collection
import_tasks: tasks/setup-coredumps.yml
when: stage2_nix

- name: Postgres - copy package
copy:
src: files/postgres/
Expand Down Expand Up @@ -170,6 +174,7 @@
import_tasks: tasks/setup-docker.yml
when: debpkg_mode or stage2_nix


#stage 2 postgres tasks
- name: stage2 postgres tasks
import_tasks: tasks/stage2-setup-postgres.yml
Expand Down
6 changes: 3 additions & 3 deletions ansible/vars.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ postgres_major:

# Full version strings for each major version
postgres_release:
postgresorioledb-17: "17.5.1.017-orioledb"
postgres17: "17.4.1.074"
postgres15: "15.8.1.131"
postgresorioledb-17: "17.5.1.017-orioledb-coredump-1"
postgres17: "17.4.1.074-coredump-1"
postgres15: "15.8.1.131-coredump-1"

# Non Postgres Extensions
pgbouncer_release: "1.19.0"
Expand Down