Skip to content
This repository was archived by the owner on Aug 29, 2018. It is now read-only.

Commit c105889

Browse files
Merge pull request #102 from smarterclayton/more_integration_resilience
Make integration tests more resilient
2 parents e76e589 + 3ce34f8 commit c105889

File tree

4 files changed

+76
-122
lines changed

4 files changed

+76
-122
lines changed

cleanup/cleanup_failures.go

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ package cleanup
33
import (
44
"github.com/openshift/geard/docker"
55
"os"
6-
"time"
76
"strings"
7+
"time"
88
)
99

1010
type FailureCleanup struct {
@@ -42,17 +42,17 @@ func (r *FailureCleanup) Clean(ctx *CleanerContext) {
4242

4343
retentionAge, err := time.ParseDuration(r.retentionAge)
4444
for _, cinfo := range gears {
45-
container, err := client.GetContainer(cinfo.ID, false)
45+
container, err := client.InspectContainer(cinfo.ID)
4646
if err != nil {
4747
ctx.LogError.Printf("Unable to retrieve container information for %s: %s", container.Name, err.Error())
4848
continue
4949
}
5050

5151
// Happy container or not...
5252
if 0 == container.State.ExitCode ||
53-
container.State.Running ||
54-
strings.HasSuffix(container.Name, "-data") ||
55-
time.Since(container.State.FinishedAt) < retentionAge {
53+
container.State.Running ||
54+
strings.HasSuffix(container.Name, "-data") ||
55+
time.Since(container.State.FinishedAt) < retentionAge {
5656
continue
5757
}
5858

@@ -69,4 +69,3 @@ func (r *FailureCleanup) Clean(ctx *CleanerContext) {
6969
}
7070
}
7171
}
72-

cmd/switchns/main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ func runCommandInContainer(name string, command []string, environment []string)
159159
os.Exit(3)
160160
}
161161

162-
container, err := client.GetContainer(name, false)
162+
container, err := client.InspectContainer(name)
163163
if err != nil {
164164
fmt.Printf("Unable to locate container named %v\n", name)
165165
os.Exit(3)

docker/docker.go

Lines changed: 1 addition & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ import (
1010
"path/filepath"
1111
"strconv"
1212
"strings"
13-
"time"
1413
)
1514

1615
type containerLookupResult struct {
@@ -34,27 +33,6 @@ func (d *DockerClient) ForceCleanContainer(ID string) error {
3433
return d.client.RemoveContainer(docker.RemoveContainerOptions{ID, true, true})
3534
}
3635

37-
func lookupContainer(containerName string, client *docker.Client, waitForContainer bool) containerLookupResult {
38-
timeout := 0
39-
if waitForContainer {
40-
timeout = 60
41-
}
42-
43-
for i := 0; i <= timeout; i++ {
44-
if container, err := client.InspectContainer(containerName); err != nil {
45-
if !strings.HasPrefix(err.Error(), "No such container") {
46-
return containerLookupResult{nil, err}
47-
}
48-
if timeout > 0 {
49-
time.Sleep(time.Second)
50-
}
51-
} else {
52-
return containerLookupResult{container, nil}
53-
}
54-
}
55-
return containerLookupResult{nil, fmt.Errorf("container not active")}
56-
}
57-
5836
func GetConnection(dockerSocket string) (*DockerClient, error) {
5937
var (
6038
client *docker.Client
@@ -87,23 +65,6 @@ func (d *DockerClient) InspectContainer(containerName string) (*docker.Container
8765
return c, err
8866
}
8967

90-
func (d *DockerClient) GetContainer(containerName string, waitForContainer bool) (*docker.Container, error) {
91-
timeoutChannel := make(chan containerLookupResult, 1)
92-
var container *docker.Container
93-
go func() { timeoutChannel <- lookupContainer(containerName, d.client, waitForContainer) }()
94-
select {
95-
case cInfo := <-timeoutChannel:
96-
if cInfo.Error != nil {
97-
return nil, cInfo.Error
98-
}
99-
container = cInfo.Container
100-
case <-time.After(time.Minute):
101-
return nil, fmt.Errorf("timeout waiting for container")
102-
}
103-
104-
return container, nil
105-
}
106-
10768
func (d *DockerClient) GetImage(imageName string) (*docker.Image, error) {
10869
if img, err := d.client.InspectImage(imageName); err != nil {
10970
if err == docker.ErrNoSuchImage {
@@ -121,7 +82,7 @@ func (d *DockerClient) GetImage(imageName string) (*docker.Image, error) {
12182
func (d *DockerClient) GetContainerIPs(ids []string) (map[string]string, error) {
12283
ips := make(map[string]string)
12384
for _, id := range ids {
124-
if cInfo, err := d.GetContainer(id, false); err == nil {
85+
if cInfo, err := d.InspectContainer(id); err == nil {
12586
ips[cInfo.NetworkSettings.IPAddress] = id
12687
}
12788
}

tests/integration_test.go

Lines changed: 69 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,15 @@ import (
2020
)
2121

2222
const (
23-
CONTAINER_STATE_CHANGE_TIMEOUT = time.Second * 15
24-
DOCKER_STATE_CHANGE_TIMEOUT = time.Second * 5
25-
SYSTEMD_ACTION_DELAY = time.Second * 1
26-
CONTAINER_CHECK_INTERVAL = time.Second / 20
27-
TestImage = "pmorie/sti-html-app"
28-
EnvImage = "ccoleman/envtest"
23+
TimeoutContainerStateChange = time.Second * 15
24+
TimeoutDockerStateChange = time.Second * 5
25+
TimeoutDockerWait = time.Second * 2
26+
27+
IntervalContainerCheck = time.Second / 20
28+
IntervalHttpCheck = time.Second / 10
29+
30+
TestImage = "pmorie/sti-html-app"
31+
EnvImage = "ccoleman/envtest"
2932
)
3033

3134
//Hookup gocheck with go test
@@ -72,7 +75,10 @@ func (s *IntegrationTestSuite) assertFileAbsent(c *chk.C, path string) {
7275
}
7376

7477
func (s *IntegrationTestSuite) getContainerPid(id containers.Identifier) int {
75-
container, _ := s.dockerClient.GetContainer(id.ContainerFor(), true)
78+
container, err := s.dockerClient.InspectContainer(id.ContainerFor())
79+
if err != nil {
80+
return 0
81+
}
7682
return container.State.Pid
7783
}
7884

@@ -124,6 +130,20 @@ func until(duration, every time.Duration, f func() bool) bool {
124130
}
125131
}
126132

133+
func isContainerAvailable(client *docker.DockerClient, id string) (bool, error) {
134+
container, err := client.InspectContainer(id)
135+
if err == docker.ErrNoSuchContainer {
136+
return false, nil
137+
}
138+
if err != nil {
139+
return true, err
140+
}
141+
if container.State.Running && container.State.Pid != 0 {
142+
return true, nil
143+
}
144+
return false, nil
145+
}
146+
127147
func (s *IntegrationTestSuite) assertContainerStarts(c *chk.C, id containers.Identifier) {
128148
active, _ := s.unitState(id)
129149
switch active {
@@ -150,20 +170,29 @@ func (s *IntegrationTestSuite) assertContainerStarts(c *chk.C, id containers.Ide
150170
return false
151171
}
152172

153-
if !until(CONTAINER_STATE_CHANGE_TIMEOUT, time.Second/20, isRunning) {
173+
if !until(TimeoutContainerStateChange, time.Second/20, isRunning) {
154174
c.Errorf("Timeout during start of %s, never got to 'active' state", id)
155175
c.FailNow()
156176
}
157177

158-
container, err := s.dockerClient.GetContainer(id.ContainerFor(), false)
159-
if err != nil {
160-
c.Error("Can't check container "+id, err)
161-
c.FailNow()
178+
// Docker does not immediately return container status - possibly due to races inside of the
179+
// daemon
180+
failed := false
181+
isContainerUp := func() bool {
182+
done, err := isContainerAvailable(s.dockerClient, id.ContainerFor())
183+
if err != nil {
184+
failed = true
185+
c.Error("Docker couldn't return container info", err)
186+
c.FailNow()
187+
}
188+
return done
162189
}
163-
if !container.State.Running {
164-
c.Logf("Container %s exists, but is not running - race condition %+v", id, container.State)
165-
//c.Errorf("Container %s is not running %+v", id, container)
166-
//c.FailNow()
190+
191+
if !until(TimeoutDockerWait, IntervalHttpCheck, isContainerUp) {
192+
if !failed {
193+
c.Errorf("Docker never reported the container running %s", id)
194+
}
195+
c.FailNow()
167196
}
168197
}
169198

@@ -192,12 +221,12 @@ func (s *IntegrationTestSuite) assertContainerStops(c *chk.C, id containers.Iden
192221
return false
193222
}
194223

195-
if !until(CONTAINER_STATE_CHANGE_TIMEOUT, CONTAINER_CHECK_INTERVAL, isStopped) {
224+
if !until(TimeoutContainerStateChange, IntervalContainerCheck, isStopped) {
196225
c.Errorf("Timeout during start of %s, never got to 'inactive' state", id)
197226
c.FailNow()
198227
}
199228

200-
_, err := s.dockerClient.GetContainer(id.ContainerFor(), false)
229+
_, err := s.dockerClient.InspectContainer(id.ContainerFor())
201230
if err == nil {
202231
c.Errorf("Container %s is still active in docker, should be stopped and removed", id.ContainerFor())
203232
c.FailNow()
@@ -218,19 +247,30 @@ func (s *IntegrationTestSuite) assertContainerRestarts(c *chk.C, id containers.I
218247
return false
219248
}
220249

221-
if !until(CONTAINER_STATE_CHANGE_TIMEOUT, CONTAINER_CHECK_INTERVAL, isStarted) {
250+
if !until(TimeoutContainerStateChange, IntervalContainerCheck, isStarted) {
222251
active, sub := s.unitState(id)
223252
c.Errorf("Timeout during restart of %s, never got back to 'active' state (%s/%s)", id, active, sub)
224253
c.FailNow()
225254
}
226255

227-
container, err := s.dockerClient.GetContainer(id.ContainerFor(), false)
228-
if err != nil {
229-
c.Error("Can't check container "+id, err)
230-
c.FailNow()
256+
// Docker does not immediately return container status - possibly due to races inside of the
257+
// daemon
258+
failed := false
259+
isContainerUp := func() bool {
260+
done, err := isContainerAvailable(s.dockerClient, id.ContainerFor())
261+
if err != nil {
262+
failed = true
263+
c.Error("Docker couldn't return container info", err)
264+
c.FailNow()
265+
}
266+
return done
231267
}
232-
if !container.State.Running {
233-
c.Logf("Container %s exists, but is not running - race condition %+v", id, container.State)
268+
269+
if !until(TimeoutDockerWait, IntervalHttpCheck, isContainerUp) {
270+
if !failed {
271+
c.Errorf("Docker never reported the container running %s", id)
272+
}
273+
c.FailNow()
234274
}
235275
}
236276

@@ -257,7 +297,7 @@ func (s *IntegrationTestSuite) SetUpSuite(c *chk.C) {
257297
containers, err := s.dockerClient.ListContainers()
258298
c.Assert(err, chk.IsNil)
259299
for _, cinfo := range containers {
260-
container, err := s.dockerClient.GetContainer(cinfo.ID, false)
300+
container, err := s.dockerClient.InspectContainer(cinfo.ID)
261301
c.Assert(err, chk.IsNil)
262302
if strings.HasPrefix(container.Name, "IntTest") {
263303
s.dockerClient.ForceCleanContainer(cinfo.ID)
@@ -332,7 +372,6 @@ func (s *IntegrationTestSuite) TestSimpleInstallWithEnv(c *chk.C) {
332372
c.Log(string(data))
333373
c.Assert(err, chk.IsNil)
334374
s.assertContainerStarts(c, id)
335-
//time.Sleep(time.Second * 5) // startup time is indeterminate unfortunately because gear init --post continues to run
336375

337376
cmd = exec.Command("/usr/bin/gear", "status", hostContainerId)
338377
data, err = cmd.CombinedOutput()
@@ -375,7 +414,7 @@ func (s *IntegrationTestSuite) TestIsolateInstallAndStartImage(c *chk.C) {
375414
}
376415
return false
377416
}
378-
if !until(time.Second*15, time.Second/10, httpAlive) {
417+
if !until(TimeoutContainerStateChange, IntervalHttpCheck, httpAlive) {
379418
c.Errorf("Unable to retrieve a 200 status code from port %d", ports[0].External)
380419
c.FailNow()
381420
}
@@ -435,7 +474,7 @@ func (s *IntegrationTestSuite) TestStartStopContainer(c *chk.C) {
435474
}
436475
return false
437476
}
438-
if !until(time.Second*15, time.Second/10, httpAlive) {
477+
if !until(TimeoutContainerStateChange, IntervalHttpCheck, httpAlive) {
439478
c.Errorf("Unable to retrieve a 200 status code from port %d", ports[0].External)
440479
c.FailNow()
441480
}
@@ -550,7 +589,7 @@ func (s *IntegrationTestSuite) TestLongContainerName(c *chk.C) {
550589
}
551590
return false
552591
}
553-
if !until(time.Second*15, time.Second/10, httpAlive) {
592+
if !until(TimeoutContainerStateChange, IntervalHttpCheck, httpAlive) {
554593
c.Errorf("Unable to retrieve a 200 status code from port %d", ports[0].External)
555594
c.FailNow()
556595
}
@@ -585,51 +624,6 @@ func (s *IntegrationTestSuite) TestContainerNetLinks(c *chk.C) {
585624
c.Assert(strings.Contains(string(data), "tcp dpt:tproxy to:74.125.239.114"), chk.Equals, true)
586625
}
587626

588-
// func (s *IntegrationTestSuite) TestSocketActivatedInstallAndStartImage(c *chk.C) {
589-
// id, err := containers.NewIdentifier("IntTest007")
590-
// c.Assert(err, chk.IsNil)
591-
// s.containerIds = append(s.containerIds, id)
592-
//
593-
// hostContainerId := fmt.Sprintf("%v/%v", s.daemonURI, id)
594-
//
595-
// cmd := exec.Command("/usr/bin/gear", "install", "pmorie/sti-html-app", hostContainerId, "--start", "--ports=8080:4005", "--socket-activated")
596-
// data, err := cmd.CombinedOutput()
597-
// c.Log(string(data))
598-
// c.Assert(err, chk.IsNil)
599-
//
600-
// s.assertFilePresent(c, id.UnitPathFor(), 0664, true)
601-
// paths, err := filepath.Glob(id.VersionedUnitPathFor("*"))
602-
// c.Assert(err, chk.IsNil)
603-
// for _, p := range paths {
604-
// s.assertFilePresent(c, p, 0664, true)
605-
// }
606-
//
607-
// ports, err := containers.GetExistingPorts(id)
608-
// c.Assert(err, chk.IsNil)
609-
// c.Assert(len(ports), chk.Equals, 1)
610-
//
611-
// t := time.NewTicker(time.Second/10)
612-
// defer t.Stop()
613-
// for true {
614-
// select {
615-
// case <-t.C:
616-
// resp, err := http.Get(fmt.Sprintf("http://0.0.0.0:%v", ports[0].External))
617-
// if err == nil {
618-
// c.Logf("attempting http .. response code %v", resp.StatusCode)
619-
// if resp.StatusCode == 200 {
620-
// break
621-
// }
622-
// }else{
623-
// c.Logf("attempting http .. error %v", err)
624-
// }
625-
// case <-time.After(time.Second * 15):
626-
// c.Fail()
627-
// }
628-
// }
629-
// s.assertFilePresent(c, filepath.Join(id.RunPathFor(), "container-init.sh"), 0700, false)
630-
// s.assertContainerState(c, id, CONTAINER_STARTED)
631-
// }
632-
633627
func (s *IntegrationTestSuite) TearDownSuite(c *chk.C) {
634628
for _, id := range s.containerIds {
635629
hostContainerId := fmt.Sprintf("%v/%v", s.daemonURI, id)

0 commit comments

Comments
 (0)