Skip to content
This repository was archived by the owner on Oct 9, 2023. It is now read-only.

Commit 6db081f

Browse files
authored
Writing errors for subtask failures (#260)
* writing an error.pb file on error Signed-off-by: Daniel Rammer <daniel@union.ai> * fixing Signed-off-by: Daniel Rammer <daniel@union.ai> * fixed unit tests Signed-off-by: Daniel Rammer <daniel@union.ai>
1 parent 60ea4b6 commit 6db081f

File tree

3 files changed

+48
-8
lines changed

3 files changed

+48
-8
lines changed

go/tasks/plugins/array/k8s/management.go

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ import (
1010
"github.com/flyteorg/flyteplugins/go/tasks/errors"
1111
"github.com/flyteorg/flyteplugins/go/tasks/logs"
1212
"github.com/flyteorg/flyteplugins/go/tasks/pluginmachinery/core"
13+
"github.com/flyteorg/flyteplugins/go/tasks/pluginmachinery/io"
14+
"github.com/flyteorg/flyteplugins/go/tasks/pluginmachinery/ioutils"
1315
"github.com/flyteorg/flyteplugins/go/tasks/plugins/array"
1416
"github.com/flyteorg/flyteplugins/go/tasks/plugins/array/arraystatus"
1517
arrayCore "github.com/flyteorg/flyteplugins/go/tasks/plugins/array/core"
@@ -211,6 +213,30 @@ func LaunchAndCheckSubTasksState(ctx context.Context, tCtx core.TaskExecutionCon
211213

212214
if phaseInfo.Err() != nil {
213215
messageCollector.Collect(childIdx, phaseInfo.Err().String())
216+
217+
// If the service reported an error but there is no error.pb written, write one with the
218+
// service-provided error message.
219+
or, err := array.ConstructOutputReader(ctx, dataStore, outputPrefix, baseOutputDataSandbox, originalIdx)
220+
if err != nil {
221+
return currentState, externalResources, err
222+
}
223+
224+
if hasErr, err := or.IsError(ctx); err != nil {
225+
return currentState, externalResources, err
226+
} else if !hasErr {
227+
// The subtask has not produced an error.pb, write one.
228+
ow, err := array.ConstructOutputWriter(ctx, dataStore, outputPrefix, baseOutputDataSandbox, originalIdx)
229+
if err != nil {
230+
return currentState, externalResources, err
231+
}
232+
233+
if err = ow.Put(ctx, ioutils.NewInMemoryOutputReader(nil, &io.ExecutionError{
234+
ExecutionError: phaseInfo.Err(),
235+
IsRecoverable: phaseInfo.Phase() != core.PhasePermanentFailure,
236+
})); err != nil {
237+
return currentState, externalResources, err
238+
}
239+
}
214240
}
215241

216242
if phaseInfo.Err() != nil && phaseInfo.Err().GetKind() == idlCore.ExecutionError_SYSTEM {
@@ -250,10 +276,6 @@ func LaunchAndCheckSubTasksState(ctx context.Context, tCtx core.TaskExecutionCon
250276
}
251277

252278
// process phaseInfo
253-
if phaseInfo.Err() != nil {
254-
messageCollector.Collect(childIdx, phaseInfo.Err().String())
255-
}
256-
257279
var logLinks []*idlCore.TaskLog
258280
if phaseInfo.Info() != nil {
259281
logLinks = phaseInfo.Info().Logs

go/tasks/plugins/array/k8s/management_test.go

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,19 @@ import (
3131
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3232
)
3333

34+
type metadata struct {
35+
exists bool
36+
size int64
37+
}
38+
39+
func (m metadata) Exists() bool {
40+
return m.exists
41+
}
42+
43+
func (m metadata) Size() int64 {
44+
return m.size
45+
}
46+
3447
func createSampleContainerTask() *core2.Container {
3548
return &core2.Container{
3649
Command: []string{"cmd"},
@@ -106,8 +119,13 @@ func getMockTaskExecutionContext(ctx context.Context, parallelism int) *mocks.Ta
106119
ir.OnGetInputPath().Return("/prefix/inputs.pb")
107120
ir.OnGetMatch(mock.Anything).Return(&core2.LiteralMap{}, nil)
108121

122+
composedProtobufStore := &stdmocks.ComposedProtobufStore{}
123+
matchedBy := mock.MatchedBy(func(s storage.DataReference) bool {
124+
return true
125+
})
126+
composedProtobufStore.On("Head", mock.Anything, matchedBy).Return(metadata{true, 0}, nil)
109127
dataStore := &storage.DataStore{
110-
ComposedProtobufStore: &stdmocks.ComposedProtobufStore{},
128+
ComposedProtobufStore: composedProtobufStore,
111129
ReferenceConstructor: &storage.URLPathConstructor{},
112130
}
113131

@@ -446,7 +464,7 @@ func TestCheckSubTasksState(t *testing.T) {
446464
}
447465

448466
// execute
449-
newState, _, err := LaunchAndCheckSubTasksState(ctx, tCtx, &kubeClient, &config, nil, "/prefix/", "/prefix-sand/", currentState)
467+
newState, _, err := LaunchAndCheckSubTasksState(ctx, tCtx, &kubeClient, &config, tCtx.DataStore(), "/prefix/", "/prefix-sand/", currentState)
450468

451469
// validate results
452470
assert.Nil(t, err)
@@ -495,7 +513,7 @@ func TestCheckSubTasksState(t *testing.T) {
495513
}
496514

497515
// execute
498-
newState, _, err := LaunchAndCheckSubTasksState(ctx, tCtx, &kubeClient, &config, nil, "/prefix/", "/prefix-sand/", currentState)
516+
newState, _, err := LaunchAndCheckSubTasksState(ctx, tCtx, &kubeClient, &config, tCtx.DataStore(), "/prefix/", "/prefix-sand/", currentState)
499517

500518
// validate results
501519
assert.Nil(t, err)

go/tasks/plugins/array/k8s/subtask.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ func launchSubtask(ctx context.Context, stCtx SubTaskExecutionContext, cfg *Conf
198198
return pluginsCore.PhaseInfoRetryableFailure("RuntimeFailure", err.Error(), nil), nil
199199
} else if k8serrors.IsBadRequest(err) || k8serrors.IsInvalid(err) {
200200
logger.Errorf(ctx, "Badly formatted resource for plugin [%s], err %s", executorName, err)
201-
// return pluginsCore.DoTransition(pluginsCore.PhaseInfoFailure("BadTaskFormat", err.Error(), nil)), nil
201+
return pluginsCore.PhaseInfoFailure("BadTaskFormat", err.Error(), nil), nil
202202
} else if k8serrors.IsRequestEntityTooLargeError(err) {
203203
logger.Errorf(ctx, "Badly formatted resource for plugin [%s], err %s", executorName, err)
204204
return pluginsCore.PhaseInfoFailure("EntityTooLarge", err.Error(), nil), nil

0 commit comments

Comments
 (0)