Skip to content

Commit d1d13d9

Browse files
committed
tests/int/checkpoint: don't hide stderr
For test cases where we used pipes for container's stdin/stdout/stderr, stderr was redirected to the same pipe as stdout, which practically means it is lost. These redirects to fd is needed not because we check that container is working by writing to its stdin and reading from stdout (see check_pipes), but also because bats redirects test stdout/stderr to a file, which makes c/r impossible (as the file is outside of container). This is why we can't just do something like `2>stderr.log`, and have to do what is done in this commit. Introduce and use another pipe for stdout, to be used for both runc run and runc restore, so it will be shown in case of errors. Since its handling is somewhat complicated and is used from 4 places (2 for run, 2 for restore), separate it into a helper functions. NOTE the code assumes that runc exits with non-zero exit code in case there is anything that needs to be shown to a user from runc's stderr. While at it, add error checking to runc run calls. Hopefully, this will help debug those rare checkpoint failures in CI. Signed-off-by: Kir Kolyshkin <[email protected]>
1 parent 627074d commit d1d13d9

File tree

1 file changed

+59
-40
lines changed

1 file changed

+59
-40
lines changed

tests/integration/checkpoint.bats

Lines changed: 59 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,18 @@ function setup_pipes() {
2020
update_config ' (.. | select(.terminal? != null)) .terminal |= false
2121
| (.. | select(.[]? == "sh")) += ["-c", "for i in `seq 10`; do read xxx || continue; echo ponG $xxx; done"]'
2222

23-
# Create two sets of pipes
24-
# for stdout/stderr
23+
# Create three sets of pipes for __runc run.
24+
# for stderr
25+
exec {pipe}<> <(:)
26+
exec {err_r}</proc/self/fd/$pipe
27+
exec {err_w}>/proc/self/fd/$pipe
28+
exec {pipe}>&-
29+
# for stdout
2530
exec {pipe}<> <(:)
2631
exec {out_r}</proc/self/fd/$pipe
2732
exec {out_w}>/proc/self/fd/$pipe
2833
exec {pipe}>&-
29-
# ... and stdin
34+
# for stdin
3035
exec {pipe}<> <(:)
3136
exec {in_r}</proc/self/fd/$pipe
3237
exec {in_w}>/proc/self/fd/$pipe
@@ -42,6 +47,50 @@ function check_pipes() {
4247
[[ "${output}" == *"ponG Ping"* ]]
4348
}
4449

50+
# Usage: runc_run_with_pipes container-name
51+
function runc_run_with_pipes() {
52+
# Start a container to be checkpointed, with stdin/stdout redirected
53+
# so that check_pipes can be used to check it's working fine.
54+
# We have to redirect stderr as well because otherwise it is
55+
# redirected to a bats log file, which is not accessible to CRIU
56+
# (i.e. outside of container) so checkpointing will fail.
57+
ret=0
58+
__runc run -d "$1" <&${in_r} >&${out_w} 2>&${err_w} || ret=$?
59+
if [ "$ret" -ne 0 ]; then
60+
echo "runc run -d $1 (status: $ret):"
61+
exec {err_w}>&-
62+
cat <&${err_r}
63+
fail "runc run failed"
64+
fi
65+
66+
testcontainer "$1" running
67+
}
68+
69+
# Usage: runc_restore_with_pipes work-dir container-name [optional-arguments ...]
70+
function runc_restore_with_pipes() {
71+
workdir="$1"
72+
shift
73+
name="$1"
74+
shift
75+
76+
ret=0
77+
__runc --criu "$CRIU" restore -d --work-path "$workdir" --image-path ./image-dir "$@" "$name" <&${in_r} >&${out_w} 2>&${err_w} || ret=$?
78+
if [ "$ret" -ne 0 ]; then
79+
echo "__runc restore $name failed (status: $ret)"
80+
exec {err_w}>&-
81+
cat <&${err_r}
82+
echo "CRIU restore log errors (if any):"
83+
grep -B 5 Error "$workdir"/restore.log || true
84+
fail "runc restore failed"
85+
fi
86+
87+
testcontainer "$name" running
88+
89+
runc exec --cwd /bin "$name" echo ok
90+
[ "$status" -eq 0 ]
91+
[[ ${output} == "ok" ]]
92+
}
93+
4594
function simple_cr() {
4695
runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
4796
[ "$status" -eq 0 ]
@@ -83,11 +132,7 @@ function simple_cr() {
83132

84133
@test "checkpoint --pre-dump and restore" {
85134
setup_pipes
86-
87-
# run busybox
88-
__runc run -d test_busybox <&${in_r} >&${out_w} 2>&${out_w}
89-
90-
testcontainer test_busybox running
135+
runc_run_with_pipes test_busybox
91136

92137
#test checkpoint pre-dump
93138
mkdir parent-dir
@@ -107,19 +152,7 @@ function simple_cr() {
107152
# after checkpoint busybox is no longer running
108153
testcontainer test_busybox checkpointed
109154

110-
# restore from checkpoint
111-
ret=0
112-
__runc --criu "$CRIU" restore -d --work-path ./work-dir --image-path ./image-dir test_busybox <&${in_r} >&${out_w} 2>&${out_w} || ret=$?
113-
grep -B 5 Error ./work-dir/restore.log || true
114-
[ $ret -eq 0 ]
115-
116-
# busybox should be back up and running
117-
testcontainer test_busybox running
118-
119-
runc exec --cwd /bin test_busybox echo ok
120-
[ "$status" -eq 0 ]
121-
[[ ${output} == "ok" ]]
122-
155+
runc_restore_with_pipes ./work-dir test_busybox
123156
check_pipes
124157
}
125158

@@ -131,14 +164,7 @@ function simple_cr() {
131164
fi
132165

133166
setup_pipes
134-
135-
# TCP port for lazy migration
136-
port=27277
137-
138-
# run busybox
139-
__runc run -d test_busybox <&${in_r} >&${out_w} 2>&${out_w}
140-
141-
testcontainer test_busybox running
167+
runc_run_with_pipes test_busybox
142168

143169
# checkpoint the running container
144170
mkdir image-dir
@@ -151,6 +177,9 @@ function simple_cr() {
151177
exec {lazy_r}</proc/self/fd/$pipe {lazy_w}>/proc/self/fd/$pipe
152178
exec {pipe}>&-
153179

180+
# TCP port for lazy migration
181+
port=27277
182+
154183
__runc --criu "$CRIU" checkpoint --lazy-pages --page-server 0.0.0.0:${port} --status-fd ${lazy_w} --work-path ./work-dir --image-path ./image-dir test_busybox &
155184
cpt_pid=$!
156185

@@ -177,17 +206,7 @@ function simple_cr() {
177206
# in time when the last page is lazily transferred to the destination.
178207
# Killing the CRIU on the checkpoint side will let the container
179208
# continue to run if the migration failed at some point.
180-
ret=0
181-
__runc --criu "$CRIU" restore -d --work-path ./image-dir --image-path ./image-dir --lazy-pages test_busybox_restore <&${in_r} >&${out_w} 2>&${out_w} || ret=$?
182-
grep -B 5 Error ./image-dir/restore.log || true
183-
[ $ret -eq 0 ]
184-
185-
# busybox should be back up and running
186-
testcontainer test_busybox_restore running
187-
188-
runc exec --cwd /bin test_busybox_restore echo ok
189-
[ "$status" -eq 0 ]
190-
[[ ${output} == "ok" ]]
209+
runc_restore_with_pipes ./image-dir test_busybox_restore --lazy-pages
191210

192211
wait $cpt_pid
193212

0 commit comments

Comments
 (0)