@@ -823,6 +823,13 @@ func (w *workflowExecutionContextImpl) ProcessWorkflowTask(workflowTask *workflo
823
823
824
824
skipReplayCheck := w .skipReplayCheck ()
825
825
isReplayTest := task .GetPreviousStartedEventId () == replayPreviousStartedEventID
826
+ if isReplayTest {
827
+ w .wth .logger .Info ("Processing workflow task in replay test mode" ,
828
+ zap .String (tagWorkflowType , task .WorkflowType .GetName ()),
829
+ zap .String (tagWorkflowID , task .WorkflowExecution .GetWorkflowId ()),
830
+ zap .String (tagRunID , task .WorkflowExecution .GetRunId ()),
831
+ )
832
+ }
826
833
// Process events
827
834
ProcessEvents:
828
835
for {
@@ -917,12 +924,19 @@ ProcessEvents:
917
924
}
918
925
}
919
926
if nonDeterministicErr == nil && w .err != nil {
920
- if isReplayTest {
921
- // NOTE: we should check the following error regardless if it's in replay test or not
922
- // but since we are not checking it previously, it may break existing customers workflow
923
- if panicErr , ok := w . err .( * workflowPanicError ); ok && panicErr . value != nil {
924
- if _ , isStateMachinePanic := panicErr . value .( stateMachineIllegalStatePanic ); isStateMachinePanic {
927
+ if panicErr , ok := w . err .( * workflowPanicError ); ok && panicErr . value != nil {
928
+ if _ , isStateMachinePanic := panicErr . value .( stateMachineIllegalStatePanic ); isStateMachinePanic {
929
+ if isReplayTest {
930
+ // NOTE: we should do this regardless if it's in replay test or not
931
+ // but since previously we checked the wrong error type, it may break existing customers workflow
925
932
nonDeterministicErr = panicErr
933
+ } else {
934
+ w .wth .logger .Warn ("Ignored workflow panic error" ,
935
+ zap .String (tagWorkflowType , task .WorkflowType .GetName ()),
936
+ zap .String (tagWorkflowID , task .WorkflowExecution .GetWorkflowId ()),
937
+ zap .String (tagRunID , task .WorkflowExecution .GetRunId ()),
938
+ zap .Error (nonDeterministicErr ),
939
+ )
926
940
}
927
941
}
928
942
}
@@ -1455,12 +1469,32 @@ func (wth *workflowTaskHandlerImpl) completeWorkflow(
1455
1469
// for query task
1456
1470
if task .Query != nil {
1457
1471
queryCompletedRequest := & s.RespondQueryTaskCompletedRequest {TaskToken : task .TaskToken }
1458
- if panicErr , ok := workflowContext .err .(* workflowPanicError ); ok {
1472
+ if panicErr , ok := workflowContext .err .(* PanicError ); ok {
1473
+ // NOTE: this code path should never be executed, we should check for workflowPanicError instead of PanicError
1474
+ wth .logger .Warn ("Encountered PanicError in workflow query task" ,
1475
+ zap .String (tagWorkflowID , task .WorkflowExecution .GetWorkflowId ()),
1476
+ zap .String (tagRunID , task .WorkflowExecution .GetRunId ()),
1477
+ zap .String (tagPanicError , panicErr .Error ()),
1478
+ zap .String (tagPanicStack , panicErr .StackTrace ()),
1479
+ )
1480
+
1459
1481
queryCompletedRequest .CompletedType = common .QueryTaskCompletedTypePtr (s .QueryTaskCompletedTypeFailed )
1460
1482
queryCompletedRequest .ErrorMessage = common .StringPtr ("Workflow panic: " + panicErr .Error ())
1461
1483
return queryCompletedRequest
1462
1484
}
1463
1485
1486
+ if workflowPanicErr , ok := workflowContext .err .(* workflowPanicError ); ok {
1487
+ // NOTE: in this case we should return complete query task with CompletedTypeFailed
1488
+ // but we didn't check for the right error type before, this may break existing customer
1489
+ wth .logger .Warn ("Ignored workflow panic error for query, query result may be partial" ,
1490
+ zap .String (tagWorkflowID , task .WorkflowExecution .GetWorkflowId ()),
1491
+ zap .String (tagRunID , task .WorkflowExecution .GetRunId ()),
1492
+ zap .String (tagPanicError , workflowPanicErr .Error ()),
1493
+ zap .String (tagPanicStack , workflowPanicErr .StackTrace ()),
1494
+ zap .Int64 ("PreviousStartedEventID" , task .GetPreviousStartedEventId ()),
1495
+ )
1496
+ }
1497
+
1464
1498
result , err := eventHandler .ProcessQuery (task .Query .GetQueryType (), task .Query .QueryArgs )
1465
1499
if err != nil {
1466
1500
queryCompletedRequest .CompletedType = common .QueryTaskCompletedTypePtr (s .QueryTaskCompletedTypeFailed )
@@ -1481,8 +1515,8 @@ func (wth *workflowTaskHandlerImpl) completeWorkflow(
1481
1515
wth .logger .Error ("Workflow panic." ,
1482
1516
zap .String (tagWorkflowID , task .WorkflowExecution .GetWorkflowId ()),
1483
1517
zap .String (tagRunID , task .WorkflowExecution .GetRunId ()),
1484
- zap .String ("PanicError" , panicErr .Error ()),
1485
- zap .String ("PanicStack" , panicErr .StackTrace ()))
1518
+ zap .String (tagPanicError , panicErr .Error ()),
1519
+ zap .String (tagPanicStack , panicErr .StackTrace ()))
1486
1520
return errorToFailDecisionTask (task .TaskToken , panicErr , wth .identity )
1487
1521
}
1488
1522
@@ -1840,8 +1874,8 @@ func (ath *activityTaskHandlerImpl) Execute(taskList string, t *s.PollForActivit
1840
1874
zap .String (tagWorkflowID , t .WorkflowExecution .GetWorkflowId ()),
1841
1875
zap .String (tagRunID , t .WorkflowExecution .GetRunId ()),
1842
1876
zap .String (tagActivityType , activityType ),
1843
- zap .String ("PanicError" , fmt .Sprintf ("%v" , p )),
1844
- zap .String ("PanicStack" , st ))
1877
+ zap .String (tagPanicError , fmt .Sprintf ("%v" , p )),
1878
+ zap .String (tagPanicStack , st ))
1845
1879
metricsScope .Counter (metrics .ActivityTaskPanicCounter ).Inc (1 )
1846
1880
panicErr := newPanicError (p , st )
1847
1881
result , err = convertActivityResultToRespondRequest (ath .identity , t .TaskToken , nil , panicErr , ath .dataConverter ), nil
0 commit comments