Skip to content
This repository was archived by the owner on Aug 29, 2018. It is now read-only.

Commit 5b16003

Browse files
Merge pull request #99 from smarterclayton/more_resilience_in_linking_init
Make gear init --post more resilient
2 parents 230f9cb + 4c318c8 commit 5b16003

File tree

3 files changed

+35
-14
lines changed

3 files changed

+35
-14
lines changed

cmd/gear/commands.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ var (
8282
)
8383

8484
func init() {
85+
log.SetFlags(0)
8586
defaultTransport.Set("http")
8687
}
8788

cmd/gear/support.go

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -155,26 +155,35 @@ func InitPostStart(dockerSocket string, id containers.Identifier) error {
155155
if file, err := os.Open(id.NetworkLinksPathFor()); err == nil {
156156
defer file.Close()
157157

158-
const ContainerInterval = time.Second / 3
159-
const ContainerWait = time.Second * 12
158+
const ContainerInterval = time.Second / 10
159+
const ContainerWait = time.Second * 15
160160
for i := 0; i < int(ContainerWait/ContainerInterval); i++ {
161-
if container, err = d.GetContainer(id.ContainerFor(), true); err != nil {
161+
if container, err = d.InspectContainer(id.ContainerFor()); err != nil {
162+
if err == docker.ErrNoSuchContainer {
163+
//log.Printf("Waiting for container to be available.")
164+
time.Sleep(ContainerInterval)
165+
continue
166+
}
162167
return err
163168
}
164-
if container.State.Running {
169+
if container.State.Running && container.State.Pid != 0 {
165170
break
166171
} else {
167-
log.Printf("Waiting for container to run.")
172+
//log.Printf("Waiting for container to report available.")
168173
time.Sleep(ContainerInterval)
169174
}
170175
}
171176

177+
if container == nil {
178+
return fmt.Errorf("container %s was not visible through Docker before timeout", id.ContainerFor())
179+
}
180+
172181
pid, err := d.ChildProcessForContainer(container)
173182
if err != nil {
174183
return err
175184
}
176-
if pid < 2 {
177-
return errors.New("support: child PID is not correct")
185+
if pid <= 1 {
186+
return errors.New("child PID is not correct")
178187
}
179188
log.Printf("Updating network namespaces for %d", pid)
180189
if err := updateNamespaceNetworkLinks(pid, file); err != nil {
@@ -321,6 +330,8 @@ func updateNamespaceNetworkLinks(pid int, ports io.Reader) error {
321330
continue
322331
}
323332

333+
log.Printf("Mapping %s(%s):%d -> %s:%d", sourceAddr.String(), srcIP.String(), link.FromPort, destIP.String(), link.ToPort)
334+
324335
data := containers.OutboundNetworkIptables{sourceAddr.String(), srcIP.IP.String(), link.FromPort, destIP.String(), link.ToPort}
325336
if err := containers.OutboundNetworkIptablesTemplate.Execute(stdin, &data); err != nil {
326337
log.Printf("gear: Unable to write network link rules: %v", err)

docker/docker.go

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ import (
66
"github.com/fsouza/go-dockerclient"
77
"github.com/fsouza/go-dockerclient/engine"
88
"io/ioutil"
9-
"log"
109
"os"
1110
"path/filepath"
1211
"strconv"
@@ -53,7 +52,7 @@ func lookupContainer(containerName string, client *docker.Client, waitForContain
5352
return containerLookupResult{container, nil}
5453
}
5554
}
56-
return containerLookupResult{nil, fmt.Errorf("Container not active")}
55+
return containerLookupResult{nil, fmt.Errorf("container not active")}
5756
}
5857

5958
func GetConnection(dockerSocket string) (*DockerClient, error) {
@@ -78,6 +77,16 @@ func GetConnection(dockerSocket string) (*DockerClient, error) {
7877
return &DockerClient{client, executionDriver}, nil
7978
}
8079

80+
var ErrNoSuchContainer = errors.New("can't find container")
81+
82+
func (d *DockerClient) InspectContainer(containerName string) (*docker.Container, error) {
83+
c, err := d.client.InspectContainer(containerName)
84+
if err != nil && strings.HasPrefix(err.Error(), "No such container") {
85+
err = ErrNoSuchContainer
86+
}
87+
return c, err
88+
}
89+
8190
func (d *DockerClient) GetContainer(containerName string, waitForContainer bool) (*docker.Container, error) {
8291
timeoutChannel := make(chan containerLookupResult, 1)
8392
var container *docker.Container
@@ -89,7 +98,7 @@ func (d *DockerClient) GetContainer(containerName string, waitForContainer bool)
8998
}
9099
container = cInfo.Container
91100
case <-time.After(time.Minute):
92-
return nil, fmt.Errorf("Timeout waiting for container")
101+
return nil, fmt.Errorf("timeout waiting for container")
93102
}
94103

95104
return container, nil
@@ -120,11 +129,11 @@ func (d *DockerClient) GetContainerIPs(ids []string) (map[string]string, error)
120129
}
121130

122131
func (d *DockerClient) ChildProcessForContainer(container *docker.Container) (int, error) {
123-
log.Printf("docker: execution driver %s", d.executionDriver)
132+
//log.Printf("docker: execution driver %s", d.executionDriver)
124133
if d.executionDriver == "" || strings.HasPrefix(d.executionDriver, "lxc") {
125134
//Parent pid (LXC or N-Spawn)
126135
ppid := strconv.Itoa(container.State.Pid)
127-
log.Printf("docker: parent pid %s", ppid)
136+
//log.Printf("docker: parent pid %s", ppid)
128137

129138
//Lookup any child of parent pid
130139
files, _ := filepath.Glob(filepath.Join("/proc", "*", "stat"))
@@ -147,7 +156,7 @@ func (d *DockerClient) ChildProcessForContainer(container *docker.Container) (in
147156
if container.State.Pid != 0 {
148157
return container.State.Pid, nil
149158
}
150-
return 0, fmt.Errorf("Container not found")
159+
return 0, fmt.Errorf("unable to find child process for container %s - race condition with Docker?", container.ID)
151160
}
152-
return 0, errors.New(fmt.Sprintf("Unable to find child process for container", container.ID))
161+
return 0, fmt.Errorf("unable to find child process for container %s", container.ID)
153162
}

0 commit comments

Comments
 (0)