@@ -347,6 +347,7 @@ def __init__(self, child: TaskBase, retry_options: RetryOptions, context):
347
347
self .context = context
348
348
self .actions = child .action_repr
349
349
self .is_waiting_on_timer = False
350
+ self .error = None
350
351
351
352
@property
352
353
def id_ (self ):
@@ -373,10 +374,21 @@ def try_set_value(self, child: TaskBase):
373
374
if self .is_waiting_on_timer :
374
375
# timer fired, re-scheduling original task
375
376
self .is_waiting_on_timer = False
376
- rescheduled_task = self .context ._generate_task (
377
- action = NoOpAction ("rescheduled task" ), parent = self )
378
- self .pending_tasks .add (rescheduled_task )
379
- self .context ._add_to_open_tasks (rescheduled_task )
377
+ # As per DTFx semantics: we need to check the number of retires only after the final
378
+ # timer has fired. This means we essentially have to wait for one "extra" timer after
379
+ # the maximum number of attempts has been reached. Removing this extra timer will cause
380
+ # stuck orchestrators as we need to be "in sync" with the replay logic of DTFx.
381
+ if self .num_attempts >= self .retry_options .max_number_of_attempts :
382
+ self .is_waiting_on_timer = True
383
+ # we have reached the maximum number of attempts, set error
384
+ self .set_value (is_error = True , value = self .error )
385
+ else :
386
+ rescheduled_task = self .context ._generate_task (
387
+ action = NoOpAction ("rescheduled task" ), parent = self )
388
+ self .pending_tasks .add (rescheduled_task )
389
+ self .context ._add_to_open_tasks (rescheduled_task )
390
+ self .num_attempts += 1
391
+
380
392
return
381
393
if child .state is TaskState .SUCCEEDED :
382
394
if len (self .pending_tasks ) == 0 :
@@ -386,17 +398,11 @@ def try_set_value(self, child: TaskBase):
386
398
self .set_value (is_error = False , value = child .result )
387
399
388
400
else : # child.state is TaskState.FAILED:
389
- if self .num_attempts >= self .retry_options .max_number_of_attempts :
390
- # we have reached the maximum number of attempts, set error
391
- self .set_value (is_error = True , value = child .result )
392
- else :
393
- # still have some retries left.
394
- # increase size of pending tasks by adding a timer task
395
- # when it completes, we'll retry the original task
396
- timer_task = self .context ._generate_task (
397
- action = NoOpAction ("-WithRetry timer" ), parent = self )
398
- self .pending_tasks .add (timer_task )
399
- self .context ._add_to_open_tasks (timer_task )
400
- self .is_waiting_on_timer = True
401
-
402
- self .num_attempts += 1
401
+ # increase size of pending tasks by adding a timer task
402
+ # when it completes, we'll retry the original task
403
+ timer_task = self .context ._generate_task (
404
+ action = NoOpAction ("-WithRetry timer" ), parent = self )
405
+ self .pending_tasks .add (timer_task )
406
+ self .context ._add_to_open_tasks (timer_task )
407
+ self .is_waiting_on_timer = True
408
+ self .error = child .result
0 commit comments