Skip to content

Commit eb84e8b

Browse files
committed
refactor(investigation): Rework ResourceBuilder for improved usability
This commit refactors the \`ResourceBuilder\` to be more user-friendly and robust. The previous implementation required callers to manage dependencies between resources manually and was prone to runtime errors if resources were not requested in the correct order. The key changes include: - A new \`NewResourceBuilder\` constructor now handles the initial setup of required components like OCM and PagerDuty clients, simplifying the builder's API. - The \`Build()\` method now lazily initializes resources only when they are explicitly requested via \`With...\` methods. This includes deferring the \`GetClusterInfo\` call until necessary. - Dependency management is now handled internally by the builder. For example, requesting an AWS client via \`WithAwsClient()\` will automatically ensure the cluster object is also built. - Investigation \`Run\` methods have been updated to explicitly declare their resource dependencies using the new builder methods. This refactoring resolves several build and runtime errors, improves the developer experience by providing a clearer and safer API, and makes investigations easier to write and maintain. Co-authored-by: Aider <using Gemini>
1 parent 62aca9c commit eb84e8b

File tree

6 files changed

+48
-82
lines changed

6 files changed

+48
-82
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,4 @@ e2e-suite.test
1010
payload
1111
test/testinfra/*.log
1212
test/testinfra/*.pem
13+
.aider*

cadctl/cmd/investigate/investigate.go

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ import (
2828
"github.com/openshift/configuration-anomaly-detection/pkg/investigations/precheck"
2929
"github.com/openshift/configuration-anomaly-detection/pkg/logging"
3030
"github.com/openshift/configuration-anomaly-detection/pkg/metrics"
31-
ocm "github.com/openshift/configuration-anomaly-detection/pkg/ocm"
3231
"github.com/openshift/configuration-anomaly-detection/pkg/pagerduty"
3332

3433
"github.com/spf13/cobra"
@@ -101,26 +100,28 @@ func run(cmd *cobra.Command, _ []string) error {
101100
return err
102101
}
103102

104-
ocmClient, err := ocm.New()
103+
builder, err := investigation.NewResourceBuilder(pdClient, clusterID, alertInvestigation.Name(), logLevelFlag, pipelineNameEnv)
105104
if err != nil {
106-
return fmt.Errorf("could not initialize ocm client: %w", err)
105+
return fmt.Errorf("failed to create resource builder: %w", err)
107106
}
108107

109-
builder := &investigation.ResourceBuilderT{}
110108
defer func() {
111109
if err != nil {
112110
handleCADFailure(err, builder, pdClient)
113111
}
114112
}()
115113

116-
// Prime the builder with information required for all investigations.
117-
builder.WithName(alertInvestigation.Name()).WithCluster(clusterID).WithPagerDutyClient(pdClient).WithOcmClient(ocmClient).WithLogger(logLevelFlag, pipelineNameEnv)
118-
119114
precheck := precheck.ClusterStatePrecheck{}
120115
result, err := precheck.Run(builder)
121-
if err != nil && strings.Contains(err.Error(), "no cluster found") {
122-
logging.Warnf("No cluster found with ID '%s'. Escalating and exiting.", clusterID)
123-
return pdClient.EscalateIncidentWithNote("CAD was unable to find the incident cluster in OCM. An alert for a non-existing cluster is unexpected. Please investigate manually.")
116+
if err != nil {
117+
if strings.Contains(err.Error(), "no cluster found") {
118+
logging.Warnf("No cluster found with ID '%s'. Escalating and exiting.", clusterID)
119+
return pdClient.EscalateIncidentWithNote("CAD was unable to find the incident cluster in OCM. An alert for a non-existing cluster is unexpected. Please investigate manually.")
120+
}
121+
return err
122+
}
123+
if result.StopInvestigations {
124+
return nil
124125
}
125126

126127
ccamInvestigation := ccam.Investigation{}
@@ -140,7 +141,7 @@ func run(cmd *cobra.Command, _ []string) error {
140141
return updateIncidentTitle(pdClient)
141142
}
142143

143-
func handleCADFailure(err error, rb *investigation.ResourceBuilderT, pdClient *pagerduty.SdkClient) {
144+
func handleCADFailure(err error, rb investigation.ResourceBuilder, pdClient *pagerduty.SdkClient) {
144145
logging.Errorf("CAD investigation failed: %v", err)
145146

146147
var notes string

pkg/investigations/cpd/cpd.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ var byovpcRoutingSL = &ocm.ServiceLog{Severity: "Major", Summary: "Installation
2929
// In the future, we want to automate service logs based on the network verifier output.
3030
func (c *Investigation) Run(rb investigation.ResourceBuilder) (investigation.InvestigationResult, error) {
3131
result := investigation.InvestigationResult{}
32-
r, err := rb.WithClusterDeployment().Build()
32+
r, err := rb.WithClusterDeployment().WithAwsClient().Build()
3333
if err != nil {
3434
return result, err
3535
}

pkg/investigations/investigation/investigation.go

Lines changed: 32 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
package investigation
22

33
import (
4-
"errors"
54
"fmt"
65

76
cmv1 "github.com/openshift-online/ocm-sdk-go/clustersmgmt/v1"
@@ -29,6 +28,28 @@ type InvestigationResult struct {
2928
StopInvestigations bool
3029
}
3130

31+
func NewResourceBuilder(pdClient pagerduty.Client, clusterId string, name string, logLevel string, pipelineName string) (ResourceBuilder, error) {
32+
ocmClient, err := ocm.New()
33+
if err != nil {
34+
return nil, fmt.Errorf("could not create ocm client: %w", err)
35+
}
36+
37+
rb := &ResourceBuilderT{
38+
buildLogger: true,
39+
clusterId: clusterId,
40+
name: name,
41+
logLevel: logLevel,
42+
pipelineName: pipelineName,
43+
ocmClient: ocmClient,
44+
builtResources: &Resources{
45+
PdClient: pdClient,
46+
OcmClient: ocmClient,
47+
},
48+
}
49+
50+
return rb, nil
51+
}
52+
3253
type Investigation interface {
3354
Run(builder ResourceBuilder) (InvestigationResult, error)
3455
// Please note that when adding an investigation the name and the directory currently need to be the same,
@@ -51,14 +72,10 @@ type Resources struct {
5172
}
5273

5374
type ResourceBuilder interface {
54-
WithCluster(clusterId string) ResourceBuilder
75+
WithCluster() ResourceBuilder
5576
WithClusterDeployment() ResourceBuilder
5677
WithAwsClient() ResourceBuilder
57-
WithOcmClient(*ocm.SdkClient) ResourceBuilder
58-
WithPagerDutyClient(client *pagerduty.SdkClient) ResourceBuilder
5978
WithNotes() ResourceBuilder
60-
WithName(name string) ResourceBuilder
61-
WithLogger(logLevel string, pipelineName string) ResourceBuilder
6279
Build() (*Resources, error)
6380
}
6481

@@ -69,87 +86,50 @@ type ResourceBuilderT struct {
6986
buildNotes bool
7087
buildLogger bool
7188

72-
// These clients are required for all investigations and all clusters, so they are pre-filled.
73-
pdClient *pagerduty.SdkClient
74-
ocmClient *ocm.SdkClient
75-
7689
clusterId string
7790
name string
7891
logLevel string
7992
pipelineName string
8093

94+
ocmClient *ocm.SdkClient
95+
8196
// cache
8297
builtResources *Resources
8398
buildErr error
8499
}
85100

86-
func (r *ResourceBuilderT) WithCluster(clusterId string) ResourceBuilder {
101+
func (r *ResourceBuilderT) WithCluster() ResourceBuilder {
87102
r.buildCluster = true
88-
r.clusterId = clusterId
89103
return r
90104
}
91105

92106
func (r *ResourceBuilderT) WithClusterDeployment() ResourceBuilder {
107+
r.WithCluster()
93108
r.buildClusterDeployment = true
94109
return r
95110
}
96111

97112
func (r *ResourceBuilderT) WithAwsClient() ResourceBuilder {
113+
r.WithCluster()
98114
r.buildAwsClient = true
99115
return r
100116
}
101117

102-
func (r *ResourceBuilderT) WithOcmClient(client *ocm.SdkClient) ResourceBuilder {
103-
r.ocmClient = client
104-
return r
105-
}
106-
107-
func (r *ResourceBuilderT) WithPagerDutyClient(client *pagerduty.SdkClient) ResourceBuilder {
108-
r.pdClient = client
109-
return r
110-
}
111-
112118
func (r *ResourceBuilderT) WithNotes() ResourceBuilder {
113119
r.buildNotes = true
114120
return r
115121
}
116122

117-
func (r *ResourceBuilderT) WithName(name string) ResourceBuilder {
118-
r.name = name
119-
return r
120-
}
121-
122-
func (r *ResourceBuilderT) WithLogger(logLevel string, pipelineName string) ResourceBuilder {
123-
r.buildLogger = true
124-
r.logLevel = logLevel
125-
r.pipelineName = pipelineName
126-
return r
127-
}
128-
129123
func (r *ResourceBuilderT) Build() (*Resources, error) {
130124
if r.buildErr != nil {
131125
return nil, r.buildErr
132126
}
133127

134-
if r.builtResources == nil {
135-
r.builtResources = &Resources{
136-
Name: r.name,
137-
OcmClient: r.ocmClient,
138-
PdClient: r.pdClient,
139-
}
140-
}
128+
// The Name is now set during construction.
129+
r.builtResources.Name = r.name
141130

142131
var err error
143132

144-
if r.buildClusterDeployment && !r.buildCluster {
145-
r.buildErr = errors.New("cannot build ClusterDeployment without Cluster")
146-
return nil, r.buildErr
147-
}
148-
if r.buildAwsClient && !r.buildCluster {
149-
r.buildErr = errors.New("cannot build AwsClient without Cluster")
150-
return nil, r.buildErr
151-
}
152-
153133
if r.buildCluster && r.builtResources.Cluster == nil {
154134
r.builtResources.Cluster, err = r.ocmClient.GetClusterInfo(r.clusterId)
155135
if err != nil {
@@ -183,7 +163,7 @@ func (r *ResourceBuilderT) Build() (*Resources, error) {
183163

184164
if r.buildLogger {
185165
// Re-initialize the logger with the cluster ID.
186-
logging.RawLogger = logging.InitLogger(r.logLevel, "", internalClusterId)
166+
logging.RawLogger = logging.InitLogger(r.logLevel, r.pipelineName, internalClusterId)
187167
}
188168
}
189169

@@ -200,7 +180,7 @@ type ResourceBuilderMock struct {
200180
BuildError error
201181
}
202182

203-
func (r *ResourceBuilderMock) WithCluster(clusterId string) ResourceBuilder {
183+
func (r *ResourceBuilderMock) WithCluster() ResourceBuilder {
204184
return r
205185
}
206186

@@ -212,26 +192,10 @@ func (r *ResourceBuilderMock) WithAwsClient() ResourceBuilder {
212192
return r
213193
}
214194

215-
func (r *ResourceBuilderMock) WithOcmClient(client *ocm.SdkClient) ResourceBuilder {
216-
return r
217-
}
218-
219-
func (r *ResourceBuilderMock) WithPagerDutyClient(client *pagerduty.SdkClient) ResourceBuilder {
220-
return r
221-
}
222-
223195
func (r *ResourceBuilderMock) WithNotes() ResourceBuilder {
224196
return r
225197
}
226198

227-
func (r *ResourceBuilderMock) WithName(name string) ResourceBuilder {
228-
return r
229-
}
230-
231-
func (r *ResourceBuilderMock) WithLogger(logLevel string, pipelineName string) ResourceBuilder {
232-
return r
233-
}
234-
235199
func (r *ResourceBuilderMock) Build() (*Resources, error) {
236200
if r.BuildError != nil {
237201
return nil, r.BuildError

pkg/investigations/machinehealthcheckunterminatedshortcircuitsre/machinehealthcheckunterminatedshortcircuitsre.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ func (i *Investigation) teardown() error {
6363
func (i *Investigation) Run(rb investigation.ResourceBuilder) (investigation.InvestigationResult, error) {
6464
ctx := context.Background()
6565
result := investigation.InvestigationResult{}
66-
r, err := rb.Build()
66+
r, err := rb.WithCluster().Build()
6767
if err != nil {
6868
return result, err
6969
}

pkg/investigations/precheck/precheck.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ type ClusterStatePrecheck struct{}
1414
// Performs according pagerduty actions and returns whether CAD needs to investigate the cluster
1515
func (c *ClusterStatePrecheck) Run(rb investigation.ResourceBuilder) (investigation.InvestigationResult, error) {
1616
result := investigation.InvestigationResult{}
17-
r, err := rb.Build()
17+
r, err := rb.WithCluster().Build()
1818
if err != nil {
1919
return result, err
2020
}

0 commit comments

Comments
 (0)