Skip to content

Commit 7893c72

Browse files
authored
Merge pull request #1330 from WildMeOrg/add-other-container-healthchecks
Add supporting docker container healthchecks for restart
2 parents 0ec38a3 + 87cfd4a commit 7893c72

File tree

1 file changed

+31
-13
lines changed

1 file changed

+31
-13
lines changed

devops/deploy/docker-compose.yml

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,13 @@ services:
22
db:
33
image: postgres:13.4
44
healthcheck:
5-
test: [ "CMD-SHELL", "pg_isready -U postgres || exit 1" ]
6-
interval: 10s
7-
timeout: 5s
8-
retries: 60
5+
# More robust check: verify PostgreSQL can actually execute a query
6+
# pg_isready only checks if accepting connections, not if responsive
7+
test: [ "CMD-SHELL", "pg_isready -U ${WILDBOOK_DB_USER:-wildbook} -d ${WILDBOOK_DB_NAME:-wildbook} && psql -U ${WILDBOOK_DB_USER:-wildbook} -d ${WILDBOOK_DB_NAME:-wildbook} -c 'SELECT 1' || exit 1" ]
8+
interval: 30s
9+
timeout: 10s
10+
retries: 3
11+
start_period: 60s
912
labels:
1013
- autoheal=true
1114
user: postgres
@@ -74,10 +77,13 @@ services:
7477
opensearch:
7578
image: opensearchproject/opensearch:2.15.0
7679
healthcheck:
77-
test: [ "CMD-SHELL", "curl --silent --fail 127.0.0.1:9200/_cluster/health || exit 1" ]
78-
interval: 10s
79-
timeout: 5s
80-
retries: 60
80+
# Check cluster health and verify status is not "red"
81+
# Green = all shards allocated, Yellow = primary shards ok, Red = cluster down
82+
test: [ "CMD-SHELL", "curl --silent --fail 127.0.0.1:9200/_cluster/health | grep -qE '\"status\":\"(green|yellow)\"' || exit 1" ]
83+
interval: 30s
84+
timeout: 10s
85+
retries: 3
86+
start_period: 120s
8187
labels:
8288
- autoheal=true
8389
volumes:
@@ -108,6 +114,15 @@ services:
108114
# TODO dkim and spf needs to be added/supported
109115
smtp:
110116
image: boky/postfix
117+
healthcheck:
118+
# Check if postfix master process is running
119+
test: [ "CMD-SHELL", "pgrep -x master || exit 1" ]
120+
interval: 60s
121+
timeout: 10s
122+
retries: 3
123+
start_period: 30s
124+
labels:
125+
- autoheal=true
111126
networks:
112127
- intranet
113128
ports:
@@ -125,11 +140,14 @@ services:
125140
image: nginx:1.23.4
126141
depends_on:
127142
- wildbook
128-
#healthcheck:
129-
#test: [ "CMD", "curl", "-f", "http://localhost:84/"]
130-
#interval: 10s
131-
#timeout: 5s
132-
#retries: 60
143+
healthcheck:
144+
# Check if nginx master process is running and can accept connections
145+
# nginx:1.23.4 doesn't include curl, so we check the pid file and use nginx -t
146+
test: [ "CMD-SHELL", "nginx -t 2>/dev/null && kill -0 $(cat /var/run/nginx.pid 2>/dev/null) || exit 1" ]
147+
interval: 30s
148+
timeout: 10s
149+
retries: 3
150+
start_period: 30s
133151
labels:
134152
- autoheal=true
135153
volumes:

0 commit comments

Comments
 (0)