Skip to content

Commit 6a4e77a

Browse files
fix: changing defaults for history cleanup (#7661)
**What changed?** This changes the default for cleaning up history on workflow start if the workflow fails to start correctly. There's a bunch of scenarios in which workflows cannot start (because the workflowID is already in use, for example) in which the history tree and nodes will remain unless this flag is turned on. The result can be quite a bit of garbage data. **How did you test it?** This was tested in production for 15+ environments, particularly with Cassandra but also with a SQL implementation. **Potential risks** If there's an unforeseen bug in the implementation, this could break workflows on startup. An earlier iteration of the code did, however, assuming there are no unforseen bugs, this should be safe. --------- Signed-off-by: David Porter <david.porter@uber.com>
1 parent d7f7eb4 commit 6a4e77a

File tree

3 files changed

+11
-3
lines changed

3 files changed

+11
-3
lines changed

common/dynamicconfig/dynamicproperties/constants.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4367,7 +4367,7 @@ var BoolKeys = map[BoolKey]DynamicBool{
43674367
EnableCleanupOrphanedHistoryBranchOnWorkflowCreation: {
43684368
KeyName: "history.enableCleanupOrphanedHistoryBranchOnWorkflowCreation",
43694369
Description: "EnableCleanupOrphanedHistoryBranchOnWorkflowCreation enables cleanup of orphaned history branches when CreateWorkflowExecution fails",
4370-
DefaultValue: false,
4370+
DefaultValue: true,
43714371
},
43724372
DisableListVisibilityByFilter: {
43734373
KeyName: "frontend.disableListVisibilityByFilter",

service/history/engine/engineimpl/history_engine2_test.go

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1473,6 +1473,7 @@ func (s *engine2Suite) TestStartWorkflowExecution_BrandNew_DuplicateRequestError
14731473
s.mockShard.Resource.ActiveClusterMgr.EXPECT().GetActiveClusterInfoByClusterAttribute(gomock.Any(), constants.TestDomainID, gomock.Any()).Return(testActiveClusterInfo, nil).Times(1)
14741474

14751475
s.mockHistoryV2Mgr.On("AppendHistoryNodes", mock.Anything, mock.Anything).Return(&p.AppendHistoryNodesResponse{}, nil).Once()
1476+
s.mockHistoryV2Mgr.On("DeleteHistoryBranch", mock.Anything, mock.Anything).Return(nil).Once()
14761477
s.mockExecutionMgr.On("CreateWorkflowExecution", mock.Anything, mock.MatchedBy(func(request *p.CreateWorkflowExecutionRequest) bool {
14771478
return !request.NewWorkflowSnapshot.ExecutionInfo.StartTimestamp.IsZero() && reflect.DeepEqual(partitionConfig, request.NewWorkflowSnapshot.ExecutionInfo.PartitionConfig)
14781479
})).Return(nil, &p.DuplicateRequestError{RequestType: p.WorkflowRequestTypeSignal, RunID: "test-run-id"}).Once()
@@ -1555,6 +1556,7 @@ func (s *engine2Suite) TestStartWorkflowExecution_StillRunning_NonDeDup() {
15551556
s.mockShard.Resource.ActiveClusterMgr.EXPECT().GetActiveClusterInfoByClusterAttribute(gomock.Any(), constants.TestDomainID, gomock.Any()).Return(testActiveClusterInfo, nil).Times(1)
15561557

15571558
s.mockHistoryV2Mgr.On("AppendHistoryNodes", mock.Anything, mock.Anything).Return(&p.AppendHistoryNodesResponse{}, nil).Once()
1559+
s.mockHistoryV2Mgr.On("DeleteHistoryBranch", mock.Anything, mock.Anything).Return(nil).Once()
15581560
s.mockExecutionMgr.On("CreateWorkflowExecution", mock.Anything, mock.Anything).Return(nil, &p.WorkflowExecutionAlreadyStartedError{
15591561
Msg: "random message",
15601562
StartRequestID: "oldRequestID",
@@ -1687,6 +1689,7 @@ func (s *engine2Suite) TestStartWorkflowExecution_NotRunning_PrevSuccess_Duplica
16871689
LastWriteVersion: lastWriteVersion,
16881690
}).Once()
16891691

1692+
s.mockHistoryV2Mgr.On("DeleteHistoryBranch", mock.Anything, mock.Anything).Return(nil).Once()
16901693
s.mockExecutionMgr.On(
16911694
"CreateWorkflowExecution",
16921695
mock.Anything,
@@ -1744,7 +1747,8 @@ func (s *engine2Suite) TestStartWorkflowExecution_NotRunning_PrevSuccess() {
17441747

17451748
expectedErrs := []bool{true, false, true}
17461749

1747-
s.mockHistoryV2Mgr.On("AppendHistoryNodes", mock.Anything, mock.Anything).Return(&p.AppendHistoryNodesResponse{}, nil).Times(len(expectedErrs))
1750+
s.mockHistoryV2Mgr.On("AppendHistoryNodes", mock.Anything, mock.Anything).Return(&p.AppendHistoryNodesResponse{}, nil).Times(3)
1751+
s.mockHistoryV2Mgr.On("DeleteHistoryBranch", mock.Anything, mock.Anything).Return(nil).Times(2)
17481752
s.mockExecutionMgr.On(
17491753
"CreateWorkflowExecution",
17501754
mock.Anything,
@@ -1840,7 +1844,8 @@ func (s *engine2Suite) TestStartWorkflowExecution_NotRunning_PrevFail() {
18401844

18411845
for i, closeState := range closeStates {
18421846

1843-
s.mockHistoryV2Mgr.On("AppendHistoryNodes", mock.Anything, mock.Anything).Return(&p.AppendHistoryNodesResponse{}, nil).Times(len(expectedErrs))
1847+
s.mockHistoryV2Mgr.On("AppendHistoryNodes", mock.Anything, mock.Anything).Return(&p.AppendHistoryNodesResponse{}, nil).Times(3)
1848+
s.mockHistoryV2Mgr.On("DeleteHistoryBranch", mock.Anything, mock.Anything).Return(nil).Times(1)
18441849
s.mockExecutionMgr.On(
18451850
"CreateWorkflowExecution",
18461851
mock.Anything,
@@ -2175,6 +2180,7 @@ func (s *engine2Suite) TestSignalWithStartWorkflowExecution_WorkflowNotExist_Dup
21752180

21762181
s.mockExecutionMgr.On("GetCurrentExecution", mock.Anything, mock.Anything).Return(nil, notExistErr).Once()
21772182
s.mockHistoryV2Mgr.On("AppendHistoryNodes", mock.Anything, mock.Anything).Return(&p.AppendHistoryNodesResponse{}, nil).Once()
2183+
s.mockHistoryV2Mgr.On("DeleteHistoryBranch", mock.Anything, mock.Anything).Return(nil).Once()
21782184
s.mockExecutionMgr.On("CreateWorkflowExecution", mock.Anything, mock.MatchedBy(func(request *p.CreateWorkflowExecutionRequest) bool {
21792185
return !request.NewWorkflowSnapshot.ExecutionInfo.StartTimestamp.IsZero() && reflect.DeepEqual(partitionConfig, request.NewWorkflowSnapshot.ExecutionInfo.PartitionConfig)
21802186
})).Return(nil, &p.DuplicateRequestError{RequestType: p.WorkflowRequestTypeCancel, RunID: "test-run-id"}).Once()
@@ -2469,6 +2475,7 @@ func (s *engine2Suite) TestSignalWithStartWorkflowExecution_Start_WorkflowAlread
24692475
s.mockExecutionMgr.On("GetCurrentExecution", mock.Anything, mock.Anything).Return(gceResponse, nil).Once()
24702476
s.mockExecutionMgr.On("GetWorkflowExecution", mock.Anything, mock.Anything).Return(gwmsResponse, nil).Once()
24712477
s.mockHistoryV2Mgr.On("AppendHistoryNodes", mock.Anything, mock.Anything).Return(&p.AppendHistoryNodesResponse{}, nil).Once()
2478+
s.mockHistoryV2Mgr.On("DeleteHistoryBranch", mock.Anything, mock.Anything).Return(nil).Once()
24722479
s.mockExecutionMgr.On("CreateWorkflowExecution", mock.Anything, mock.Anything).Return(nil, workflowAlreadyStartedErr).Once()
24732480

24742481
resp, err := s.historyEngine.SignalWithStartWorkflowExecution(context.Background(), sRequest)

service/history/engine/engineimpl/start_workflow_execution_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,7 @@ func TestStartWorkflowExecution(t *testing.T) {
236236
historyV2Mgr := eft.ShardCtx.Resource.HistoryMgr
237237
historyV2Mgr.On("AppendHistoryNodes", mock.Anything, mock.AnythingOfType("*persistence.AppendHistoryNodesRequest")).
238238
Return(&persistence.AppendHistoryNodesResponse{}, nil).Once()
239+
historyV2Mgr.On("DeleteHistoryBranch", mock.Anything, mock.Anything).Return(nil).Once()
239240
},
240241
wantErr: true,
241242
},

0 commit comments

Comments
 (0)