@@ -28,6 +28,7 @@ import (
28
28
"strings"
29
29
30
30
"go.uber.org/cadence/.gen/go/shared"
31
+ "go.uber.org/cadence/internal/common/util"
31
32
)
32
33
33
34
/*
@@ -125,6 +126,51 @@ type (
125
126
stackTrace string
126
127
}
127
128
129
+ // NonDeterministicError contains some structured data related to a non-deterministic
130
+ // replay failure, and is primarily intended for allowing richer error reporting.
131
+ //
132
+ // WorkflowType, WorkflowID, RunID, TaskList, and DomainName will likely be long-term stable
133
+ // and included in some form in future library versions, but the rest of these fields may
134
+ // change at any time, or be removed in a future major version change.
135
+ NonDeterministicError struct {
136
+
137
+ // Reason is a relatively free-form description of what kind of non-determinism
138
+ // was detected.
139
+ //
140
+ // You are *strongly* encouraged to not rely on these strings for behavior, only
141
+ // explanation, for a few reasons. More will likely appear in the future, they may
142
+ // change, and there is little that can be safely decided on in an automated way.
143
+ //
144
+ // Currently, values roughly match the historical error strings, and are:
145
+ // - "missing replay decision" (The error will contain HistoryEventText, as there
146
+ // is at least one history event that has no matching replayed decision)
147
+ // - "extra replay decision" (The error will contain DecisionText, as there is
148
+ // at least one decision from replay that has no matching history event)
149
+ // - "mismatch" (Both HistoryEventText and DecisionText will exist, as there
150
+ // are issues with both. This was previously shown as "history event is ...,
151
+ // replay decision is ..." error text.)
152
+ Reason string
153
+
154
+ WorkflowType string
155
+ WorkflowID string
156
+ RunID string
157
+ TaskList string
158
+ DomainName string
159
+
160
+ // intentionally avoiding "history event" and "decision" names
161
+ // because we *do* have types for them, but they are in thrift and should
162
+ // not be exposed directly.
163
+ // we should consider doing that eventually though, or providing a
164
+ // simplified object for richer failure information.
165
+
166
+ // HistoryEventText contains a String() representation of a history
167
+ // event (i.e. previously recorded) that is related to the problem.
168
+ HistoryEventText string
169
+ // DecisionText contains a String() representation of a replay decision
170
+ // event (i.e. created during replay) that is related to the problem.
171
+ DecisionText string
172
+ }
173
+
128
174
// ContinueAsNewError contains information about how to continue the workflow as new.
129
175
ContinueAsNewError struct {
130
176
wfn interface {}
@@ -419,3 +465,57 @@ func (b ErrorDetailsValues) Get(valuePtr ...interface{}) error {
419
465
}
420
466
return nil
421
467
}
468
+
469
+ // NewNonDeterminsticError constructs a new *NonDeterministicError.
470
+ //
471
+ // - reason should be a documented NonDeterminsticError.Reason value
472
+ // - info is always required. only a portion of it is used, but it is a convenient
473
+ // and currently always-available object.
474
+ // - history and decision may each be present or nil at any time
475
+ func NewNonDeterminsticError (reason string , info * WorkflowInfo , history * shared.HistoryEvent , decision * shared.Decision ) error {
476
+ var historyText string
477
+ if history != nil {
478
+ historyText = util .HistoryEventToString (history )
479
+ }
480
+ var decisionText string
481
+ if decision != nil {
482
+ decisionText = util .DecisionToString (decision )
483
+ }
484
+ return & NonDeterministicError {
485
+ Reason : reason ,
486
+
487
+ WorkflowType : info .WorkflowType .Name ,
488
+ WorkflowID : info .WorkflowExecution .ID ,
489
+ RunID : info .WorkflowExecution .RunID ,
490
+ TaskList : info .TaskListName ,
491
+ DomainName : info .Domain ,
492
+
493
+ HistoryEventText : historyText ,
494
+ DecisionText : decisionText ,
495
+ }
496
+ }
497
+
498
+ func (e * NonDeterministicError ) Error () string {
499
+ switch e .Reason {
500
+ case "missing replay decision" :
501
+ // historical text
502
+ return "nondeterministic workflow: " +
503
+ "missing replay decision for " + e .HistoryEventText
504
+ case "extra replay decision" :
505
+ // historical text
506
+ return "nondeterministic workflow: " +
507
+ "extra replay decision for " + e .DecisionText
508
+ case "mismatch" :
509
+ // historical text
510
+ return "nondeterministic workflow: " +
511
+ "history event is " + e .HistoryEventText + ", " +
512
+ "replay decision is " + e .DecisionText
513
+ default :
514
+ // should not occur in practice, but it's basically fine if it does.
515
+ // ideally this should crash in internal builds / tests, to prevent mismatched values.
516
+ return fmt .Sprintf (
517
+ "unknown reason %q, history event is: %s, replay decision is: %s" ,
518
+ e .Reason , e .HistoryEventText , e .DecisionText ,
519
+ )
520
+ }
521
+ }
0 commit comments