Skip to content

Commit 7ec5ce0

Browse files
fix: fix a race condition preventing Docker metrics from being collected correctly (#116)
* fix: start monitor in `init` cgroup to avoid race condition * refactor: move cgroup manipulation to the top * fix: disable `containerd` image store on cgroup v1
1 parent eb49b36 commit 7ec5ce0

File tree

2 files changed

+37
-28
lines changed

2 files changed

+37
-28
lines changed

run.sh

Lines changed: 36 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,38 @@
11
#!/bin/bash
22

3+
echo "Entering $0 at $(date) "
4+
5+
# It is required to keep this block at the top of the script!
6+
# All processes started by this script must be moved to `./init` group on cgroup v2.
7+
if [ ! -f /sys/fs/cgroup/cgroup.controllers ]; then
8+
echo "Using cgroup v1"
9+
else
10+
echo "Using cgroup v2"
11+
CURRENT_CGROUP=$(cat /proc/self/cgroup | sed 's/0:://')
12+
CURRENT_CGROUP_PATH="/sys/fs/cgroup/${CURRENT_CGROUP}"
13+
echo "Current cgroup: ${CURRENT_CGROUP}"
14+
15+
# Move the processes from the current group to the `./init` group,
16+
# otherwise the current group will become of type "domain threaded",
17+
# and it will not be possible to enable required controllers for DinD group.
18+
# Ref: https://github.com/moby/moby/blob/38805f20f9bcc5e87869d6c79d432b166e1c88b4/hack/dind#L28-L38
19+
echo "Creating init cgroup ${CURRENT_CGROUP_PATH}/init"
20+
mkdir -p ${CURRENT_CGROUP_PATH}/init
21+
echo "Moving existing processes from ${CURRENT_CGROUP_PATH} to ${CURRENT_CGROUP_PATH}/init"
22+
xargs -rn1 < ${CURRENT_CGROUP_PATH}/cgroup.procs > ${CURRENT_CGROUP_PATH}/init/cgroup.procs || :
23+
echo "Done moving existing processes from ${CURRENT_CGROUP_PATH} to ${CURRENT_CGROUP_PATH}/init"
24+
25+
# Set `memory.oom.group=0` to disable killing all processes in cgroup at once on OOM.
26+
# if all processes are killed at once, the system will not be able to detect this event;
27+
# instead, we expect separate pipeline steps to be killed if total consumptions exceed limits.
28+
MEMORY_OOM_GROUP="${CURRENT_CGROUP_PATH}/memory.oom.group"
29+
echo "Ensuring memory.oom.group is set to 0 to disable killing all processes in cgroup at once on OOM"
30+
echo "0" > "${MEMORY_OOM_GROUP}"
31+
echo "Current memory.oom.group value: $(cat "${MEMORY_OOM_GROUP}")"
32+
fi
33+
334
DIR=$(dirname $0)
435

5-
echo "Entering $0 at $(date) "
636
DOCKERD_DATA_ROOT=${DOCKERD_DATA_ROOT:-/var/lib/docker}
737
DIND_VOLUME_STAT_DIR=${DIND_VOLUME_STAT_DIR:-${DOCKERD_DATA_ROOT}/dind-volume}
838
DIND_VOLUME_CREATED_TS_FILE=${DIND_VOLUME_STAT_DIR}/created
@@ -133,10 +163,6 @@ if [[ -n "${USE_DIND_IMAGES_LIB}" && "${USE_DIND_IMAGES_LIB}" != "false" ]]; the
133163
fi
134164
echo "DOCKERD_PARAMS = ${DOCKERD_PARAMS}"
135165

136-
# Starting monitor
137-
${DIR}/monitor/start.sh <&- &
138-
MONITOR_PID=$!
139-
140166
### start docker with retry
141167
DOCKERD_PID_FILE=/var/run/docker.pid
142168
DOCKERD_PID_MAXWAIT=${DOCKERD_PID_MAXWAIT:-20}
@@ -188,29 +214,8 @@ do
188214

189215
echo "Starting dockerd"
190216
if [ ! -f /sys/fs/cgroup/cgroup.controllers ]; then
191-
echo "Using cgroup v1"
192-
dockerd ${DOCKERD_PARAMS} <&- &
217+
dockerd --feature containerd-snapshotter=false ${DOCKERD_PARAMS} <&- &
193218
else
194-
echo "Using cgroup v2"
195-
CURRENT_CGROUP=$(cat /proc/self/cgroup | sed 's/0:://')
196-
CURRENT_CGROUP_PATH="/sys/fs/cgroup/${CURRENT_CGROUP}"
197-
echo "Current cgroup: ${CURRENT_CGROUP}"
198-
199-
# Move the processes from the current group to the `./init` group,
200-
# otherwise the current group will become of type "domain threaded",
201-
# and it will not be possible to enable required controllers for DinD group.
202-
# Ref: https://github.com/moby/moby/blob/38805f20f9bcc5e87869d6c79d432b166e1c88b4/hack/dind#L28-L38
203-
mkdir -p ${CURRENT_CGROUP_PATH}/init
204-
xargs -rn1 < ${CURRENT_CGROUP_PATH}/cgroup.procs > ${CURRENT_CGROUP_PATH}/init/cgroup.procs || :
205-
206-
# Set `memory.oom.group=0` to disable killing all processes in cgroup at once on OOM.
207-
# if all processes are killed at once, the system will not be able to detect this event;
208-
# instead, we expect separate pipeline steps to be killed if total consumptions exceed limits.
209-
MEMORY_OOM_GROUP="${CURRENT_CGROUP_PATH}/memory.oom.group"
210-
echo "Ensuring memory.oom.group is set to 0 to disable killing all processes in cgroup at once on OOM"
211-
echo "0" > "${MEMORY_OOM_GROUP}"
212-
echo "Current memory.oom.group value: $(cat "${MEMORY_OOM_GROUP}")"
213-
214219
# Explicitly set --cgroup-parent to prevent DinD containers escaping the pod cgroup on cgroup v2.
215220
dockerd --feature containerd-snapshotter=false --cgroup-parent "${CURRENT_CGROUP}/codefresh-dind" ${DOCKERD_PARAMS} <&- &
216221
fi
@@ -246,6 +251,10 @@ do
246251
break
247252
done
248253

254+
# Starting monitor
255+
${DIR}/monitor/start.sh <&- &
256+
MONITOR_PID=$!
257+
249258
# Starting cleaner agent
250259
if [[ -z "${DISABLE_CLEANER_AGENT}" && -z "${SIGTERM}" ]]; then
251260
${DIR}/cleaner/cleaner-agent.sh <&- &

service.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
version: 3.0.10
1+
version: 3.0.11

0 commit comments

Comments
 (0)