Skip to content

Commit 1d9a163

Browse files
authored
Fix nexus operation outcome when the min request timeout is reached (#8598)
## Why? This fixes an issue when we realize that the operation is about to time out and we do not have enough time to issue the last start request attempt. Resolving the operation as failed is misleading. ## How did you test it? - [x] covered by existing tests (with adjustments to the new behavior)
1 parent 0520a1b commit 1d9a163

File tree

2 files changed

+10
-6
lines changed

2 files changed

+10
-6
lines changed

components/nexusoperations/executors.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -453,8 +453,8 @@ func (e taskExecutor) handleStartOperationError(env hsm.Environment, node *hsm.N
453453
// operation if the response's operation token is too large.
454454
return handleNonRetryableStartOperationError(node, operation, callErr)
455455
case errors.Is(callErr, ErrOperationTimeoutBelowMin):
456-
// Operation timeout is not retryable
457-
return handleNonRetryableStartOperationError(node, operation, callErr)
456+
// Not enough time to execute another request, resolve the operation with a timeout.
457+
return e.recordOperationTimeout(node)
458458
case errors.Is(callErr, context.DeadlineExceeded) || errors.Is(callErr, context.Canceled):
459459
// If timed out, we don't leak internal info to the user
460460
callErr = errRequestTimedOut
@@ -513,6 +513,10 @@ func (e taskExecutor) executeBackoffTask(env hsm.Environment, node *hsm.Node, ta
513513
}
514514

515515
func (e taskExecutor) executeTimeoutTask(env hsm.Environment, node *hsm.Node, task TimeoutTask) error {
516+
return e.recordOperationTimeout(node)
517+
}
518+
519+
func (e taskExecutor) recordOperationTimeout(node *hsm.Node) error {
516520
return hsm.MachineTransition(node, func(op Operation) (hsm.TransitionOutput, error) {
517521
eventID, err := hsm.EventIDFromToken(op.ScheduledEventToken)
518522
if err != nil {

components/nexusoperations/executors_test.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -333,11 +333,11 @@ func TestProcessInvocationTask(t *testing.T) {
333333
expectedMetricOutcome: "operation-timeout",
334334
onStartOperation: nil, // This should not be called if the operation has timed out.
335335
checkOutcome: func(t *testing.T, op nexusoperations.Operation, events []*historypb.HistoryEvent) {
336-
require.Equal(t, enumsspb.NEXUS_OPERATION_STATE_FAILED, op.State())
336+
require.Equal(t, enumsspb.NEXUS_OPERATION_STATE_TIMED_OUT, op.State())
337337
require.Equal(t, 1, len(events))
338-
failure := events[0].GetNexusOperationFailedEventAttributes().Failure.Cause
339-
require.NotNil(t, failure.GetApplicationFailureInfo())
340-
require.Equal(t, "remaining operation timeout is less than required minimum", failure.Message)
338+
failure := events[0].GetNexusOperationTimedOutEventAttributes().Failure.Cause
339+
require.NotNil(t, failure.GetTimeoutFailureInfo())
340+
require.Equal(t, "operation timed out", failure.Message)
341341
},
342342
},
343343
{

0 commit comments

Comments
 (0)