Skip to content

Commit fbf79f2

Browse files
sankari165ins-tril
authored andcommitted
[Wf-Diagnostics] Limit number of issues per type returned in diagnostics (cadence-workflow#7189)
* [Wf-Diagnostics] Reduce number of issues per type returned in diagnostics * Update activities_test.go * Update activities.go * Update activities_test.go
1 parent 27db977 commit fbf79f2

File tree

2 files changed

+75
-20
lines changed

2 files changed

+75
-20
lines changed

service/worker/diagnostics/activities.go

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,9 @@ const (
4040
linkToRetriesRunbook = "https://cadenceworkflow.io/docs/workflow-troubleshooting/retries"
4141
WfDiagnosticsAppName = "workflow-diagnostics"
4242

43-
_maxPageSize = 1000 // current maximum page size for fetching workflow history
44-
_contextTimeout = 1 * time.Minute // timeout to fetch the whole execution history
43+
_maxPageSize = 1000 // current maximum page size for fetching workflow history
44+
_contextTimeout = 1 * time.Minute // timeout to fetch the whole execution history
45+
_maxIssuesPerInvariant = 10 // maximum number of issues to return per invariant check
4546
)
4647

4748
type identifyIssuesParams struct {
@@ -65,7 +66,7 @@ func (w *dw) identifyIssues(ctx context.Context, info identifyIssuesParams) ([]i
6566
if err != nil {
6667
return nil, err
6768
}
68-
result = append(result, issues...)
69+
result = append(result, capNumberOfIssues(issues)...)
6970
}
7071

7172
return result, nil
@@ -154,3 +155,11 @@ func (w *dw) emit(ctx context.Context, info analytics.WfDiagnosticsUsageData, cl
154155
})
155156
return emitter.EmitUsageData(ctx, info)
156157
}
158+
159+
// capNumberOfIssues limits the number of issues to avoid overwhelming the result with too many issues.
160+
func capNumberOfIssues(issues []invariant.InvariantCheckResult) []invariant.InvariantCheckResult {
161+
if len(issues) > _maxIssuesPerInvariant {
162+
return issues[:_maxIssuesPerInvariant]
163+
}
164+
return issues
165+
}

service/worker/diagnostics/activities_test.go

Lines changed: 63 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ const (
4949
)
5050

5151
func Test__identifyIssues(t *testing.T) {
52-
dwtest := testDiagnosticWorkflow(t)
52+
dwtest := testDiagnosticWorkflow(t, testWorkflowExecutionHistoryResponseWithMultipleIssues())
5353
actMetadata := failure.FailureIssuesMetadata{
5454
Identity: "localhost",
5555
ActivityType: "test-activity",
@@ -58,39 +58,42 @@ func Test__identifyIssues(t *testing.T) {
5858
}
5959
actMetadataInBytes, err := json.Marshal(actMetadata)
6060
require.NoError(t, err)
61-
retryMetadata := retry.RetryMetadata{
62-
EventID: 2,
63-
RetryPolicy: &types.RetryPolicy{
64-
InitialIntervalInSeconds: 1,
65-
MaximumAttempts: 1,
66-
},
67-
}
68-
retryMetadataInBytes, err := json.Marshal(retryMetadata)
69-
require.NoError(t, err)
7061
expectedResult := []invariant.InvariantCheckResult{
7162
{
7263
IssueID: 0,
7364
InvariantType: failure.ActivityFailed.String(),
7465
Reason: failure.GenericError.String(),
7566
Metadata: actMetadataInBytes,
7667
},
77-
{
78-
IssueID: 0,
68+
}
69+
for i := 0; i < _maxIssuesPerInvariant; i++ {
70+
retryMetadata := retry.RetryMetadata{
71+
EventID: int64(i),
72+
RetryPolicy: &types.RetryPolicy{
73+
InitialIntervalInSeconds: 1,
74+
MaximumAttempts: 1,
75+
},
76+
}
77+
retryMetadataInBytes, err := json.Marshal(retryMetadata)
78+
require.NoError(t, err)
79+
expectedResult = append(expectedResult, invariant.InvariantCheckResult{
80+
IssueID: i,
7981
InvariantType: retry.ActivityRetryIssue.String(),
8082
Reason: retry.RetryPolicyValidationMaxAttempts.String(),
8183
Metadata: retryMetadataInBytes,
82-
},
84+
})
8385
}
8486
result, err := dwtest.identifyIssues(context.Background(), identifyIssuesParams{Execution: &types.WorkflowExecution{
8587
WorkflowID: "123",
8688
RunID: "abc",
8789
}})
8890
require.NoError(t, err)
91+
require.Equal(t, _maxIssuesPerInvariant+1, len(result)) // retry invariant returns 10 issues (capped) , failure invariant returns 1 issue
8992
require.Equal(t, expectedResult, result)
9093
}
9194

9295
func Test__rootCauseIssues(t *testing.T) {
93-
dwtest := testDiagnosticWorkflow(t)
96+
dwtest := testDiagnosticWorkflow(t, testWorkflowExecutionHistoryResponse())
9497
actMetadata := failure.FailureIssuesMetadata{
9598
Identity: "localhost",
9699
ActivityScheduledID: 1,
@@ -120,7 +123,7 @@ func Test__rootCauseIssues(t *testing.T) {
120123

121124
func Test__emit(t *testing.T) {
122125
ctrl := gomock.NewController(t)
123-
dwtest := testDiagnosticWorkflow(t)
126+
dwtest := testDiagnosticWorkflow(t, testWorkflowExecutionHistoryResponse())
124127
mockClient := messaging.NewMockClient(ctrl)
125128
mockProducer := messaging.NewMockProducer(ctrl)
126129
mockProducer.EXPECT().Publish(gomock.Any(), gomock.Any()).Return(nil)
@@ -129,12 +132,12 @@ func Test__emit(t *testing.T) {
129132
require.NoError(t, err)
130133
}
131134

132-
func testDiagnosticWorkflow(t *testing.T) *dw {
135+
func testDiagnosticWorkflow(t *testing.T, history *types.GetWorkflowExecutionHistoryResponse) *dw {
133136
ctrl := gomock.NewController(t)
134137
mockClientBean := client.NewMockBean(ctrl)
135138
mockFrontendClient := frontend.NewMockClient(ctrl)
136139
mockClientBean.EXPECT().GetFrontendClient().Return(mockFrontendClient).AnyTimes()
137-
mockFrontendClient.EXPECT().GetWorkflowExecutionHistory(gomock.Any(), gomock.Any()).Return(testWorkflowExecutionHistoryResponse(), nil).AnyTimes()
140+
mockFrontendClient.EXPECT().GetWorkflowExecutionHistory(gomock.Any(), gomock.Any()).Return(history, nil).AnyTimes()
138141
return &dw{
139142
clientBean: mockClientBean,
140143
invariants: []invariant.Invariant{failure.NewInvariant(), retry.NewInvariant()},
@@ -193,6 +196,49 @@ func testWorkflowExecutionHistoryResponse() *types.GetWorkflowExecutionHistoryRe
193196
}
194197
}
195198

199+
func testWorkflowExecutionHistoryResponseWithMultipleIssues() *types.GetWorkflowExecutionHistoryResponse {
200+
testResponse := &types.GetWorkflowExecutionHistoryResponse{History: &types.History{
201+
Events: []*types.HistoryEvent{
202+
{
203+
ID: 1,
204+
Timestamp: common.Int64Ptr(testTimeStamp),
205+
WorkflowExecutionStartedEventAttributes: &types.WorkflowExecutionStartedEventAttributes{
206+
ExecutionStartToCloseTimeoutSeconds: common.Int32Ptr(workflowTimeoutSecond),
207+
},
208+
},
209+
},
210+
}}
211+
for i := 0; i <= 20; i++ {
212+
testResponse.History.Events = append(testResponse.History.Events, &types.HistoryEvent{
213+
ID: int64(i),
214+
ActivityTaskScheduledEventAttributes: &types.ActivityTaskScheduledEventAttributes{
215+
ActivityID: string(rune(i)),
216+
ActivityType: &types.ActivityType{Name: "test-activity"},
217+
StartToCloseTimeoutSeconds: common.Int32Ptr(int32(10)),
218+
HeartbeatTimeoutSeconds: common.Int32Ptr(int32(5)),
219+
RetryPolicy: &types.RetryPolicy{
220+
InitialIntervalInSeconds: 1,
221+
MaximumAttempts: 1,
222+
},
223+
},
224+
})
225+
226+
}
227+
testResponse.History.Events = append(testResponse.History.Events, &types.HistoryEvent{
228+
ID: 4,
229+
Timestamp: common.Int64Ptr(testTimeStamp),
230+
ActivityTaskFailedEventAttributes: &types.ActivityTaskFailedEventAttributes{
231+
Reason: common.StringPtr("cadenceInternal:Generic"),
232+
Details: []byte("test-activity-failure"),
233+
Identity: "localhost",
234+
ScheduledEventID: 2,
235+
StartedEventID: 3,
236+
},
237+
})
238+
239+
return testResponse
240+
}
241+
196242
func Test__identifyIssuesWithPaginatedHistory(t *testing.T) {
197243
ctrl := gomock.NewController(t)
198244
mockClientBean := client.NewMockBean(ctrl)

0 commit comments

Comments
 (0)