Skip to content

Commit b13df42

Browse files
authored
Merge pull request #96 from noxiouz/porto_wait
isolate(porto): add waitLoop to cleanup dead containers
2 parents e4100ec + f49cfd4 commit b13df42

File tree

3 files changed

+82
-0
lines changed

3 files changed

+82
-0
lines changed

isolate/porto/box.go

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ type Box struct {
5757
muContainers sync.Mutex
5858
containers map[string]*container
5959
blobRepo BlobRepository
60+
61+
onClose context.CancelFunc
6062
}
6163

6264
// NewBox creates new Box
@@ -111,19 +113,95 @@ func NewBox(ctx context.Context, cfg isolate.BoxConfig) (isolate.Box, error) {
111113
},
112114
}
113115

116+
ctx, onClose := context.WithCancel(ctx)
114117
box := &Box{
115118
config: config,
116119
instanceID: uuid.New(),
117120
transport: tr,
118121
spawnSM: semaphore.New(config.SpawnConcurrency),
119122
containers: make(map[string]*container),
123+
onClose: onClose,
120124

121125
blobRepo: blobRepo,
122126
}
123127

128+
go box.waitLoop(ctx)
129+
124130
return box, nil
125131
}
126132

133+
func (b *Box) waitLoop(ctx context.Context) {
134+
apexctx.GetLogger(ctx).Info("start waitLoop")
135+
var (
136+
portoConn porto.API
137+
err error
138+
)
139+
140+
var waitTimeout = 30 * time.Second
141+
142+
closed := func(portoConn porto.API) bool {
143+
select {
144+
case <-ctx.Done():
145+
if portoConn != nil {
146+
portoConn.Close()
147+
}
148+
return true
149+
default:
150+
return false
151+
}
152+
}
153+
154+
LOOP:
155+
for {
156+
apexctx.GetLogger(ctx).Info("next iteration of waitLoop")
157+
if closed(portoConn) {
158+
return
159+
}
160+
// Connect to Porto if we have not connected yet.
161+
// In case of error: wait either a fixed timeout or closing of Box
162+
if portoConn == nil {
163+
apexctx.GetLogger(ctx).Info("waitLoop: connect to Portod")
164+
portoConn, err = porto.Connect()
165+
if err != nil {
166+
apexctx.GetLogger(ctx).WithError(err).Warn("unable to connect to Portod")
167+
select {
168+
case <-time.After(time.Second):
169+
continue LOOP
170+
case <-ctx.Done():
171+
return
172+
}
173+
}
174+
}
175+
176+
// * means all containers
177+
// if no containers dead for waitTimeout, name will be an empty string
178+
containerName, err := portoConn.Wait([]string{"*"}, 30*waitTimeout)
179+
if err != nil {
180+
portoConn.Close()
181+
portoConn = nil
182+
continue LOOP
183+
}
184+
185+
if containerName != "" {
186+
apexctx.GetLogger(ctx).Infof("Wait reports %s to be dead", containerName)
187+
b.muContainers.Lock()
188+
container, ok := b.containers[containerName]
189+
if ok {
190+
delete(b.containers, containerName)
191+
}
192+
rest := len(b.containers)
193+
b.muContainers.Unlock()
194+
if ok {
195+
if err = container.Kill(); err != nil {
196+
apexctx.GetLogger(ctx).WithError(err).Errorf("Killing %s error", containerName)
197+
}
198+
}
199+
200+
apexctx.GetLogger(ctx).Infof("%d containers are being tracked now", rest)
201+
}
202+
}
203+
}
204+
127205
func (b *Box) appLayerName(appname string) string {
128206
return b.instanceID + appname
129207
}
@@ -271,5 +349,6 @@ func (b *Box) Spawn(ctx context.Context, config isolate.SpawnConfig, output io.W
271349
// Close releases all resources such as idle connections from http.Transport
272350
func (b *Box) Close() error {
273351
b.transport.CloseIdleConnections()
352+
b.onClose()
274353
return nil
275354
}

isolate/porto/container.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ func (c *container) start(portoConn porto.API, output io.Writer) error {
158158

159159
func (c *container) Kill() (err error) {
160160
defer apexctx.GetLogger(c.ctx).WithField("id", c.containerID).Trace("Kill container").Stop(&err)
161+
containersKilledCounter.Inc(1)
161162
portoConn, err := porto.Connect()
162163
if err != nil {
163164
return err

isolate/porto/metrics.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ var (
1212
containersCreatedCounter = metrics.NewCounter()
1313
// containers that crashed during spawning
1414
containersErroredCounter = metrics.NewCounter()
15+
containersKilledCounter = metrics.NewCounter()
1516

1617
totalSpawnTimer = metrics.NewTimer()
1718
)
@@ -21,5 +22,6 @@ func init() {
2122
registry.Register("spawning_queue_size", spawningQueueSize)
2223
registry.Register("containers_created", containersCreatedCounter)
2324
registry.Register("containers_errored", containersErroredCounter)
25+
registry.Register("containers_killed", containersKilledCounter)
2426
registry.Register("total_spawn_timer", totalSpawnTimer)
2527
}

0 commit comments

Comments
 (0)