Skip to content

Commit 172b137

Browse files
authored
Merge pull request containerd#10683 from samuelkarp/shim-exec-fp-test-1.6
[release/1.6] integration: regression test for issue 10589
2 parents 5f43e4d + ab9fedd commit 172b137

File tree

5 files changed

+490
-0
lines changed

5 files changed

+490
-0
lines changed

integration/client/container_linux_test.go

Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,17 @@ import (
3535
. "github.com/containerd/containerd"
3636
"github.com/containerd/containerd/cio"
3737
"github.com/containerd/containerd/containers"
38+
"github.com/containerd/containerd/integration/failpoint"
3839
"github.com/containerd/containerd/oci"
40+
"github.com/containerd/containerd/pkg/fifosync"
3941
"github.com/containerd/containerd/plugin"
4042
"github.com/containerd/containerd/runtime/linux/runctypes"
4143
"github.com/containerd/containerd/runtime/v2/runc/options"
4244
"github.com/containerd/containerd/sys"
4345
"github.com/containerd/errdefs"
4446

4547
"github.com/opencontainers/runtime-spec/specs-go"
48+
"github.com/stretchr/testify/assert"
4649
"github.com/stretchr/testify/require"
4750
exec "golang.org/x/sys/execabs"
4851
"golang.org/x/sys/unix"
@@ -1591,3 +1594,210 @@ func TestIssue9103(t *testing.T) {
15911594
})
15921595
}
15931596
}
1597+
1598+
// TestIssue10589 is used as regression case for issue 10589.
1599+
//
1600+
// This issue was caused by a race between init exits and new exec process tracking inside the shim. The test operates
1601+
// by controlling the time between when the shim invokes "runc exec" and when the actual "runc exec" is triggered. This
1602+
// allows validating that races for shim state tracking between pre- and post-start of the exec process do not exist.
1603+
//
1604+
// The workflow is as follows:
1605+
// 1. Create a container as normal
1606+
// 2. Make an exec1 using runc-fp with delayexec
1607+
// 3. Wait until the exec is waiting to start (triggered by delayexec)
1608+
// 4. Kill the container init process (signalling it is easiest)
1609+
// 5. Make an exec2 using runc-fp with delayexec
1610+
// 6. Wait until the exec is waiting to start
1611+
// 7. Allow exec1 to proceed
1612+
// 8. Allow exec2 to proceed
1613+
// 9. See that the container has exited and all execs have exited too
1614+
//
1615+
// https://github.com/containerd/containerd/issues/10589
1616+
func TestIssue10589(t *testing.T) {
1617+
if f := os.Getenv("RUNC_FLAVOR"); f != "" && f != "runc" {
1618+
t.Skip("test requires runc")
1619+
}
1620+
if rt := os.Getenv("TEST_RUNTIME"); rt != "" && rt != plugin.RuntimeRuncV2 {
1621+
t.Skip("test requires io.containerd.runc.v2")
1622+
}
1623+
1624+
client, err := newClient(t, address)
1625+
require.NoError(t, err)
1626+
t.Cleanup(func() {
1627+
client.Close()
1628+
})
1629+
1630+
var (
1631+
image Image
1632+
ctx, cancel = testContext(t)
1633+
id = t.Name()
1634+
)
1635+
t.Cleanup(cancel)
1636+
1637+
image, err = client.GetImage(ctx, testImage)
1638+
require.NoError(t, err)
1639+
1640+
// 1. Create a sleeping container
1641+
t.Log("1. Create a sleeping container")
1642+
container, err := client.NewContainer(ctx, id,
1643+
WithNewSnapshot(id, image),
1644+
WithNewSpec(oci.WithImageConfig(image),
1645+
withProcessArgs("sleep", "inf"),
1646+
oci.WithAnnotations(map[string]string{
1647+
"oci.runc.failpoint.profile": "delayExec",
1648+
}),
1649+
),
1650+
WithRuntime(client.Runtime(), &options.Options{
1651+
BinaryName: "runc-fp",
1652+
}),
1653+
)
1654+
require.NoError(t, err, "create container")
1655+
t.Cleanup(func() {
1656+
ctx, cancel := context.WithTimeout(ctx, 10*time.Second)
1657+
err := container.Delete(ctx, WithSnapshotCleanup)
1658+
if err != nil {
1659+
t.Log("delete err", err)
1660+
}
1661+
cancel()
1662+
})
1663+
1664+
task, err := container.NewTask(ctx, empty())
1665+
require.NoError(t, err, "create task")
1666+
t.Cleanup(func() {
1667+
ctx, cancel := context.WithTimeout(ctx, 2*time.Second)
1668+
st, err := task.Delete(ctx, WithProcessKill)
1669+
t.Log("exit status", st)
1670+
if err != nil {
1671+
t.Log("kill err", err)
1672+
}
1673+
cancel()
1674+
})
1675+
1676+
err = task.Start(ctx)
1677+
require.NoError(t, err, "start container")
1678+
1679+
status, err := task.Status(ctx)
1680+
require.NoError(t, err, "container status")
1681+
require.Equal(t, Running, status.Status)
1682+
1683+
// 2. Create an exec
1684+
t.Log("2. Create exec1")
1685+
exec1ReadyFifo, err := fifosync.NewWaiter(filepath.Join(t.TempDir(), "exec1-ready.fifo"), 0600)
1686+
require.NoError(t, err, "create exec1 ready fifo")
1687+
exec1DelayFifo, err := fifosync.NewTrigger(filepath.Join(t.TempDir(), "exec1-delay.fifo"), 0600)
1688+
require.NoError(t, err, "create exec1 delay fifo")
1689+
exec1, err := task.Exec(ctx, "exec1", &specs.Process{
1690+
Args: []string{"/bin/sleep", "301"},
1691+
Cwd: "/",
1692+
Env: []string{
1693+
failpoint.DelayExecReadyEnv + "=" + exec1ReadyFifo.Name(),
1694+
failpoint.DelayExecDelayEnv + "=" + exec1DelayFifo.Name(),
1695+
},
1696+
}, cio.NullIO)
1697+
require.NoError(t, err, "create exec1")
1698+
1699+
exec1done := make(chan struct{})
1700+
go func() {
1701+
defer close(exec1done)
1702+
t.Log("Starting exec1")
1703+
err := exec1.Start(ctx)
1704+
assert.Error(t, err, "start exec1")
1705+
t.Logf("error starting exec1: %s", err)
1706+
}()
1707+
1708+
// 3. Wait until the exec is waiting to start
1709+
t.Log("3. Wait until exec1 is waiting to start")
1710+
err = exec1ReadyFifo.Wait()
1711+
require.NoError(t, err, "open exec1 fifo")
1712+
1713+
// 4. Kill the container init process
1714+
t.Log("4. Kill the container init process")
1715+
target := task.Pid()
1716+
t.Logf("Killing main pid (%v) of container %s", target, container.ID())
1717+
syscall.Kill(int(target), syscall.SIGKILL)
1718+
status, err = task.Status(ctx)
1719+
require.NoError(t, err, "container status")
1720+
t.Log("container status", status.Status)
1721+
1722+
// 5. Make an exec (2) using this failpoint
1723+
t.Log("5. Create exec2")
1724+
exec2ReadyFifo, err := fifosync.NewWaiter(filepath.Join(t.TempDir(), "exec2-ready.fifo"), 0600)
1725+
require.NoError(t, err, "create exec2 ready fifo: %q", exec2ReadyFifo)
1726+
exec2DelayFifo, err := fifosync.NewTrigger(filepath.Join(t.TempDir(), "exec2-delay.fifo"), 0600)
1727+
require.NoError(t, err, "create exec2 delay fifo: %q", exec2DelayFifo)
1728+
exec2, err := task.Exec(ctx, "exec2", &specs.Process{
1729+
Args: []string{"/bin/sleep", "302"},
1730+
Cwd: "/",
1731+
Env: []string{
1732+
failpoint.DelayExecReadyEnv + "=" + exec2ReadyFifo.Name(),
1733+
failpoint.DelayExecDelayEnv + "=" + exec2DelayFifo.Name(),
1734+
},
1735+
}, cio.NullIO)
1736+
require.NoError(t, err, "create exec2")
1737+
1738+
exec2done := make(chan struct{})
1739+
didExec2Run := true
1740+
go func() {
1741+
defer close(exec2done)
1742+
t.Log("Starting exec2")
1743+
err := exec2.Start(ctx)
1744+
assert.Error(t, err, "start exec2")
1745+
t.Logf("error starting exec2: %s", err)
1746+
}()
1747+
1748+
// 6. Wait until the exec is waiting to start
1749+
t.Log("6. Wait until exec2 is waiting to start")
1750+
exec2ready := make(chan struct{})
1751+
go func() {
1752+
exec2ReadyFifo.Wait()
1753+
close(exec2ready)
1754+
}()
1755+
select {
1756+
case <-exec2ready:
1757+
case <-exec2done:
1758+
didExec2Run = false
1759+
}
1760+
1761+
// 7. Allow exec=1 to proceed
1762+
t.Log("7. Allow exec=1 to proceed")
1763+
err = exec1DelayFifo.Trigger()
1764+
assert.NoError(t, err, "trigger exec1 fifo")
1765+
status, err = task.Status(ctx)
1766+
require.NoError(t, err, "container status")
1767+
t.Log("container status", status.Status)
1768+
<-exec1done
1769+
status, err = task.Status(ctx)
1770+
require.NoError(t, err, "container status")
1771+
t.Log("container status", status.Status)
1772+
1773+
// 8. Allow exec=2 to proceed
1774+
if didExec2Run {
1775+
t.Log("8. Allow exec2 to proceed")
1776+
err = exec2DelayFifo.Trigger()
1777+
assert.NoError(t, err, "trigger exec2 fifo")
1778+
status, err = task.Status(ctx)
1779+
require.NoError(t, err, "container status")
1780+
t.Log("container status", status.Status)
1781+
<-exec2done
1782+
status, err = task.Status(ctx)
1783+
require.NoError(t, err, "container status")
1784+
t.Log("container status", status.Status)
1785+
} else {
1786+
t.Log("8. Skip exec2")
1787+
}
1788+
1789+
// 9. Validate
1790+
t.Log("9. Validate")
1791+
status, err = exec1.Status(ctx)
1792+
require.NoError(t, err, "exec1 status")
1793+
t.Logf("exec1 status: %s", status.Status)
1794+
assert.Equal(t, Created, status.Status)
1795+
status, err = exec2.Status(ctx)
1796+
require.NoError(t, err, "exec2 status")
1797+
t.Logf("exec2 status: %s", status.Status)
1798+
assert.Equal(t, Created, status.Status)
1799+
status, err = task.Status(ctx)
1800+
t.Logf("task status: %s", status.Status)
1801+
require.NoError(t, err, "container status")
1802+
assert.Equal(t, Stopped, status.Status)
1803+
}
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
//go:build linux
2+
3+
/*
4+
Copyright The containerd Authors.
5+
6+
Licensed under the Apache License, Version 2.0 (the "License");
7+
you may not use this file except in compliance with the License.
8+
You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing, software
13+
distributed under the License is distributed on an "AS IS" BASIS,
14+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
See the License for the specific language governing permissions and
16+
limitations under the License.
17+
*/
18+
19+
package main
20+
21+
import (
22+
"context"
23+
"encoding/json"
24+
"errors"
25+
"fmt"
26+
"io"
27+
"os"
28+
"strings"
29+
30+
"github.com/opencontainers/runtime-spec/specs-go"
31+
"github.com/sirupsen/logrus"
32+
33+
"github.com/containerd/containerd/integration/failpoint"
34+
"github.com/containerd/containerd/pkg/fifosync"
35+
)
36+
37+
// delayExec delays an "exec" command until a trigger is received from the calling test program. This can be used to
38+
// test races around container lifecycle and exec processes.
39+
func delayExec(ctx context.Context, method invoker) error {
40+
isExec := strings.Contains(strings.Join(os.Args, ","), ",exec,")
41+
if !isExec {
42+
if err := method(ctx); err != nil {
43+
return err
44+
}
45+
return nil
46+
}
47+
logrus.Debug("EXEC!")
48+
49+
if err := delay(); err != nil {
50+
return err
51+
}
52+
if err := method(ctx); err != nil {
53+
return err
54+
}
55+
return nil
56+
}
57+
58+
func delay() error {
59+
ready, delay, err := fifoFromProcessEnv()
60+
if err != nil {
61+
return err
62+
}
63+
if err := ready.Trigger(); err != nil {
64+
return err
65+
}
66+
return delay.Wait()
67+
}
68+
69+
// fifoFromProcessEnv finds a fifo specified in the environment variables of an exec process
70+
func fifoFromProcessEnv() (fifosync.Trigger, fifosync.Waiter, error) {
71+
env, err := processEnvironment()
72+
if err != nil {
73+
return nil, nil, err
74+
}
75+
76+
readyName, ok := env[failpoint.DelayExecReadyEnv]
77+
if !ok {
78+
return nil, nil, fmt.Errorf("fifo: failed to find %q env var in %v", failpoint.DelayExecReadyEnv, env)
79+
}
80+
delayName, ok := env[failpoint.DelayExecDelayEnv]
81+
if !ok {
82+
return nil, nil, fmt.Errorf("fifo: failed to find %q env var in %v", failpoint.DelayExecDelayEnv, env)
83+
}
84+
logrus.WithField("ready", readyName).WithField("delay", delayName).Debug("Found FIFOs!")
85+
readyFIFO, err := fifosync.NewTrigger(readyName, 0600)
86+
if err != nil {
87+
return nil, nil, err
88+
}
89+
delayFIFO, err := fifosync.NewWaiter(delayName, 0600)
90+
if err != nil {
91+
return nil, nil, err
92+
}
93+
return readyFIFO, delayFIFO, nil
94+
}
95+
96+
func processEnvironment() (map[string]string, error) {
97+
idx := 2
98+
for ; idx < len(os.Args); idx++ {
99+
if os.Args[idx] == "--process" {
100+
break
101+
}
102+
}
103+
104+
if idx >= len(os.Args)-1 || os.Args[idx] != "--process" {
105+
return nil, errors.New("env: option --process required")
106+
}
107+
108+
specFile := os.Args[idx+1]
109+
f, err := os.OpenFile(specFile, os.O_RDONLY, 0o644)
110+
if err != nil {
111+
return nil, fmt.Errorf("env: failed to open %s: %w", specFile, err)
112+
}
113+
114+
b, err := io.ReadAll(f)
115+
if err != nil {
116+
return nil, fmt.Errorf("env: failed to read spec from %q", specFile)
117+
}
118+
var spec specs.Process
119+
if err := json.Unmarshal(b, &spec); err != nil {
120+
return nil, fmt.Errorf("env: failed to unmarshal spec from %q: %w", specFile, err)
121+
}
122+
123+
// XXX: env vars can be specified multiple times, but we only keep one
124+
env := make(map[string]string)
125+
for _, e := range spec.Env {
126+
k, v, _ := strings.Cut(e, "=")
127+
env[k] = v
128+
}
129+
130+
return env, nil
131+
}

integration/failpoint/cmd/runc-fp/main.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626
"syscall"
2727

2828
"github.com/containerd/containerd/oci"
29+
2930
"github.com/sirupsen/logrus"
3031
)
3132

@@ -40,6 +41,7 @@ type invokerInterceptor func(context.Context, invoker) error
4041
var (
4142
failpointProfiles = map[string]invokerInterceptor{
4243
"issue9103": issue9103KillInitAfterCreate,
44+
"delayExec": delayExec,
4345
}
4446
)
4547

integration/failpoint/const.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
/*
2+
Copyright The containerd Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package failpoint
18+
19+
const (
20+
DelayExecReadyEnv = "_RUNC_FP_DELAY_EXEC_READY"
21+
DelayExecDelayEnv = "_RUNC_FP_DELAY_EXEC_DELAY"
22+
)

0 commit comments

Comments
 (0)