Skip to content

Commit e074a43

Browse files
Merge pull request #210 from circleci/ONPREM-2431/improve-readiness-check
[ONPREM-2431] Improve service container readiness check resiliency
2 parents b4ad127 + 4c5e3ee commit e074a43

File tree

2 files changed

+33
-2
lines changed

2 files changed

+33
-2
lines changed

task/orchestrator.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ func (o *Orchestrator) Run(parentCtx context.Context) (err error) {
7272
if len(o.config.ReadinessFilePath) > 0 {
7373
// Wait for readiness from the other containers before starting the task agent process
7474
if err := o.waitForReadiness(ctx); err != nil {
75-
return err
75+
return taskerrors.RetryableErrorf("error waiting for service containers to become ready: %w", err)
7676
}
7777
}
7878

@@ -120,7 +120,7 @@ func (o *Orchestrator) waitForReadiness(ctx context.Context) (err error) {
120120
ctx, cancel := context.WithTimeout(ctx, waitForReadinessTimeout) // so we don't wait indefinitely if there's a problem
121121
defer cancel()
122122

123-
watcher, err := fsnotify.NewWatcher()
123+
watcher, err := fsnotify.NewBufferedWatcher(100) // use a buffered chan to prevent a possible race condition
124124
if err != nil {
125125
return err
126126
}

task/orchestrator_test.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ func TestOrchestrator(t *testing.T) {
5959
gracePeriod time.Duration
6060
timeout time.Duration
6161
additionalTasks []fakerunnerapi.Task
62+
cleanup func()
6263

6364
wantError string
6465
wantTimeout bool
@@ -196,6 +197,32 @@ func TestOrchestrator(t *testing.T) {
196197
},
197198
},
198199
},
200+
{
201+
name: "retryable error: service containers didn't become ready in time",
202+
config: func() Config {
203+
waitForReadinessTimeout = 1 * time.Millisecond
204+
c := defaultConfig
205+
c.ReadinessFilePath = "does-not-exist"
206+
return c
207+
}(),
208+
cleanup: func() {
209+
waitForReadinessTimeout = 10 * time.Minute
210+
},
211+
wantError: "error waiting for service containers to become ready: context deadline exceeded",
212+
wantTaskUnclaims: []fakerunnerapi.TaskUnclaim{
213+
{
214+
Token: "testtoken",
215+
},
216+
},
217+
wantTaskEvents: []fakerunnerapi.TaskEvent{
218+
{
219+
Allocation: defaultConfig.Allocation,
220+
TimestampMilli: time.Now().UnixMilli(),
221+
Message: []byte("error waiting for service containers to become ready: context deadline exceeded: " +
222+
"Check container logs for more details"),
223+
},
224+
},
225+
},
199226
{
200227
name: "retryable error: an unsafe retry",
201228
config: Config{
@@ -252,6 +279,10 @@ func TestOrchestrator(t *testing.T) {
252279
t.Run(tt.name, func(t *testing.T) {
253280
t.Setenv("BE_TASK_AGENT", "true")
254281

282+
if tt.cleanup != nil {
283+
t.Cleanup(tt.cleanup)
284+
}
285+
255286
for k, v := range tt.env {
256287
t.Setenv(k, v)
257288
}

0 commit comments

Comments
 (0)