@@ -472,6 +472,65 @@ type (
472
472
// ParentClosePolicy - Optional policy to decide what to do for the child.
473
473
// Default is Terminate (if onboarded to this feature)
474
474
ParentClosePolicy ParentClosePolicy
475
+
476
+ // Bugports allows opt-in enabling of older, possibly buggy behavior, primarily intended to allow temporarily
477
+ // emulating old behavior until a fix is deployed.
478
+ //
479
+ // Bugports are always deprecated and may be removed in future versions.
480
+ // Generally speaking they will *likely* remain in place for one minor version, and then they may be removed to
481
+ // allow cleaning up the additional code complexity that they cause.
482
+ //
483
+ // deprecated
484
+ Bugports Bugports
485
+ }
486
+
487
+ // Bugports allows opt-in enabling of older, possibly buggy behavior, primarily intended to allow temporarily
488
+ // emulating old behavior until a fix is deployed.
489
+ // By default, bugs (especially rarely-occurring ones) are fixed and all users are opted into the new behavior.
490
+ // Back-ported buggy behavior *may* be available via these flags.
491
+ //
492
+ // Fields in here are NOT guaranteed to be stable. They will almost certainly be removed in the next major
493
+ // release, and might be removed earlier if a need arises, e.g. if the historical behavior causes too much of an
494
+ // increase in code complexity.
495
+ //
496
+ // See each individual field for details.
497
+ //
498
+ // Bugports are always deprecated and may be removed in future versions.
499
+ // Generally speaking they will *likely* remain in place for one minor version, and then they may be removed to
500
+ // allow cleaning up the additional code complexity that they cause.
501
+ //
502
+ // deprecated
503
+ Bugports struct {
504
+ // StartChildWorkflowsOnCanceledContext allows emulating older, buggy behavior that existed prior to v0.18.4.
505
+ //
506
+ // Prior to the fix, child workflows would be started and keep running when their context was canceled in two
507
+ // situations:
508
+ // 1) when the context was canceled before ExecuteChildWorkflow is called, and
509
+ // 2) when the context was canceled after ExecuteChildWorkflow but before the child workflow was started.
510
+ //
511
+ // 1 is unfortunately easy to trigger, though many workflows will encounter an error earlier and not reach the
512
+ // child-workflow-executing code. 2 is expected to be very rare in practice.
513
+ //
514
+ // To permanently emulate old behavior, use a disconnected context when starting child workflows, and
515
+ // cancel it only after `childfuture.GetWorkflowExecution().Get(...)` returns. This can be used when this flag
516
+ // is removed in the future.
517
+ //
518
+ // If you have currently-broken workflows and need to repair them, there are two primary options:
519
+ //
520
+ // 1: Check the BinaryChecksum value of your new deploy and/or of the decision that is currently failing
521
+ // workflows. Then set this flag when replaying history on those not-fixed checksums. Concretely, this means
522
+ // checking both `workflow.GetInfo(ctx).BinaryChecksum` (note that sufficiently old clients may not have
523
+ // recorded a value, and it may be nil) and `workflow.IsReplaying(ctx)`.
524
+ //
525
+ // 2: Reset broken workflows back to either before the buggy behavior was recorded, or before the fixed behavior
526
+ // was deployed. A "bad binary" reset type can do the latter in bulk, see the CLI's
527
+ // `cadence workflow reset-batch --reset_type BadBinary --help` for details. For the former, check the failing
528
+ // histories, identify the point at which the bug occurred, and reset to prior to that decision task.
529
+ //
530
+ // Added in 0.18.4, this may be removed in or after v0.19.0, so please migrate off of it ASAP.
531
+ //
532
+ // deprecated
533
+ StartChildWorkflowsOnCanceledContext bool
475
534
}
476
535
)
477
536
@@ -896,15 +955,23 @@ func (wc *workflowEnvironmentInterceptor) ExecuteChildWorkflow(ctx Context, chil
896
955
decodeFutureImpl : mainFuture .(* decodeFutureImpl ),
897
956
executionFuture : executionFuture .(* futureImpl ),
898
957
}
958
+ // clients prior to v0.18.4 would incorrectly start child workflows that were started with cancelled contexts,
959
+ // and did not react to cancellation between requested and started.
960
+ correctChildCancellation := true
961
+ workflowOptionsFromCtx := getWorkflowEnvOptions (ctx )
899
962
900
963
// Starting with a canceled context should immediately fail, no need to even try.
901
964
if ctx .Err () != nil {
902
- mainSettable .SetError (ctx .Err ())
903
- executionSettable .SetError (ctx .Err ())
904
- return result
965
+ if workflowOptionsFromCtx .bugports .StartChildWorkflowsOnCanceledContext {
966
+ // backport the bug
967
+ correctChildCancellation = false
968
+ } else {
969
+ mainSettable .SetError (ctx .Err ())
970
+ executionSettable .SetError (ctx .Err ())
971
+ return result
972
+ }
905
973
}
906
974
907
- workflowOptionsFromCtx := getWorkflowEnvOptions (ctx )
908
975
dc := workflowOptionsFromCtx .dataConverter
909
976
env := getWorkflowEnvironment (ctx )
910
977
wfType , input , err := getValidatedWorkflowFunction (childWorkflowType , args , dc , env .GetRegistry ())
@@ -951,7 +1018,11 @@ func (wc *workflowEnvironmentInterceptor) ExecuteChildWorkflow(ctx Context, chil
951
1018
952
1019
// forward the delayed cancellation if necessary
953
1020
if shouldCancelAsync && e == nil && ! mainFuture .IsReady () {
954
- getWorkflowEnvironment (ctx ).RequestCancelChildWorkflow (* options .domain , childWorkflowExecution .ID )
1021
+ if workflowOptionsFromCtx .bugports .StartChildWorkflowsOnCanceledContext {
1022
+ // do nothing: buggy behavior did not forward the cancellation
1023
+ } else {
1024
+ getWorkflowEnvironment (ctx ).RequestCancelChildWorkflow (* options .domain , childWorkflowExecution .ID )
1025
+ }
955
1026
}
956
1027
})
957
1028
@@ -967,7 +1038,7 @@ func (wc *workflowEnvironmentInterceptor) ExecuteChildWorkflow(ctx Context, chil
967
1038
if childWorkflowExecution != nil && ! mainFuture .IsReady () {
968
1039
// child workflow started, and ctx cancelled. forward cancel to the child.
969
1040
getWorkflowEnvironment (ctx ).RequestCancelChildWorkflow (* options .domain , childWorkflowExecution .ID )
970
- } else if childWorkflowExecution == nil {
1041
+ } else if childWorkflowExecution == nil && correctChildCancellation {
971
1042
// decision to start the child has been made, but it has not yet started.
972
1043
973
1044
// TODO: ideal, but not strictly necessary for correctness:
@@ -1294,6 +1365,7 @@ func WithChildWorkflowOptions(ctx Context, cwo ChildWorkflowOptions) Context {
1294
1365
wfOptions .memo = cwo .Memo
1295
1366
wfOptions .searchAttributes = cwo .SearchAttributes
1296
1367
wfOptions .parentClosePolicy = cwo .ParentClosePolicy
1368
+ wfOptions .bugports = cwo .Bugports
1297
1369
1298
1370
return ctx1
1299
1371
}
0 commit comments