1515package gcp
1616
1717import (
18+ "context"
19+ "encoding/json"
1820 "fmt"
21+ "regexp"
1922 "strings"
2023 "time"
2124
2225 "github.com/GoogleCloudPlatform/cloud-foundation-toolkit/infra/blueprint-test/pkg/gcloud"
26+ "github.com/GoogleCloudPlatform/cloud-foundation-toolkit/infra/blueprint-test/pkg/utils"
2327 "github.com/mitchellh/go-testing-interface"
2428 "github.com/tidwall/gjson"
2529
2630 "github.com/terraform-google-modules/terraform-example-foundation/test/integration/testutils"
31+
32+ "google.golang.org/api/cloudbuild/v1"
33+ "google.golang.org/api/option"
2734)
2835
2936const (
@@ -34,16 +41,73 @@ const (
3441 StatusCancelled = "CANCELLED"
3542)
3643
44+ type RetryOp struct {
45+ Type string `json:"@type"`
46+ Build Build `json:"build"`
47+ }
48+ type Build struct {
49+ ID string `json:"id"`
50+ Status string `json:"status"`
51+ CreateTime string `json:"createTime"`
52+ }
53+
54+ var (
55+ retryRegexp = map [* regexp.Regexp ]string {}
56+ ctx = context .Background ()
57+ )
58+
59+ func init () {
60+ if len (retryRegexp ) == 0 {
61+ for e , m := range testutils .RetryableTransientErrors {
62+ r , err := regexp .Compile (fmt .Sprintf ("(?s)%s" , e )) //(?s) enables dot (.) to match newline.
63+ if err != nil {
64+ fmt .Printf ("failed to compile regex %s: %s" , e , err .Error ())
65+ }
66+ retryRegexp [r ] = m
67+ }
68+ }
69+ }
70+
3771type GCP struct {
38- Runf func (t testing.TB , cmd string , args ... interface {}) gjson.Result
39- sleepTime time.Duration
72+ Runf func (t testing.TB , cmd string , args ... interface {}) gjson.Result
73+ RunCmd func (t testing.TB , cmd string , args ... interface {}) string
74+ TriggerNewBuild func (t testing.TB , buildName string ) (string , error )
75+ sleepTime time.Duration
76+ }
77+
78+ // runCmd is a wrapper around gcloud.RunCmd because the original function has an input with a private type
79+ func runCmd (t testing.TB , cmd string , args ... interface {}) string {
80+ return gcloud .RunCmd (t , utils .StringFromTextAndArgs (append ([]interface {}{cmd }, args ... )... ))
81+ }
82+
83+ // triggerNewBuild triggers a new build based on the build provided
84+ func triggerNewBuild (t testing.TB , buildName string ) (string , error ) {
85+
86+ buildService , err := cloudbuild .NewService (ctx , option .WithScopes (cloudbuild .CloudPlatformScope ))
87+ if err != nil {
88+ return "" , fmt .Errorf ("failed to create Cloud Build service: %w" , err )
89+ }
90+ retryOperation , err := buildService .Projects .Locations .Builds .Retry (buildName , & cloudbuild.RetryBuildRequest {}).Do ()
91+ if err != nil {
92+ return "" , fmt .Errorf ("failed to retry build: %w" , err )
93+ }
94+
95+ var data RetryOp
96+ err = json .Unmarshal (retryOperation .Metadata , & data )
97+ if err != nil {
98+ return "" , fmt .Errorf ("Error unmarshaling retry operation metadata: %v" , err )
99+ }
100+
101+ return data .Build .ID , nil
40102}
41103
42104// NewGCP creates a new wrapper for Google Cloud Platform CLI.
43105func NewGCP () GCP {
44106 return GCP {
45- Runf : gcloud .Runf ,
46- sleepTime : 20 ,
107+ Runf : gcloud .Runf ,
108+ RunCmd : runCmd ,
109+ TriggerNewBuild : triggerNewBuild ,
110+ sleepTime : 20 ,
47111 }
48112}
49113
@@ -70,8 +134,9 @@ func (g GCP) GetBuilds(t testing.TB, projectID, region, filter string) map[strin
70134}
71135
72136// GetLastBuildStatus gets the status of the last build form a project and region that satisfy the given filter.
73- func (g GCP ) GetLastBuildStatus (t testing.TB , projectID , region , filter string ) string {
74- return g .Runf (t , "builds list --project %s --region %s --limit 1 --sort-by ~createTime --filter %s" , projectID , region , filter ).Array ()[0 ].Get ("status" ).String ()
137+ func (g GCP ) GetLastBuildStatus (t testing.TB , projectID , region , filter string ) (string , string ) {
138+ build := g .Runf (t , "builds list --project %s --region %s --limit 1 --sort-by ~createTime --filter %s" , projectID , region , filter ).Array ()[0 ]
139+ return build .Get ("status" ).String (), build .Get ("id" ).String ()
75140}
76141
77142// GetBuildStatus gets the status of the given build
@@ -91,16 +156,21 @@ func (g GCP) GetRunningBuildID(t testing.TB, projectID, region, filter string) s
91156 return ""
92157}
93158
159+ // GetBuildLogs get the execution logs of the given build
160+ func (g GCP ) GetBuildLogs (t testing.TB , projectID , region , buildID string ) string {
161+ return g .RunCmd (t , "builds log %s --project %s --region %s" , buildID , projectID , region )
162+ }
163+
94164// GetFinalBuildState gets the terminal status of the given build. It will wait if build is not finished.
95- func (g GCP ) GetFinalBuildState (t testing.TB , projectID , region , buildID string , maxRetry int ) (string , error ) {
165+ func (g GCP ) GetFinalBuildState (t testing.TB , projectID , region , buildID string , maxBuildRetry int ) (string , error ) {
96166 var status string
97167 count := 0
98168 fmt .Printf ("waiting for build %s execution.\n " , buildID )
99169 status = g .GetBuildStatus (t , projectID , region , buildID )
100170 fmt .Printf ("build status is %s\n " , status )
101171 for status != StatusSuccess && status != StatusFailure && status != StatusCancelled {
102172 fmt .Printf ("build status is %s\n " , status )
103- if count >= maxRetry {
173+ if count >= maxBuildRetry {
104174 return "" , fmt .Errorf ("timeout waiting for build '%s' execution" , buildID )
105175 }
106176 count = count + 1
@@ -112,29 +182,61 @@ func (g GCP) GetFinalBuildState(t testing.TB, projectID, region, buildID string,
112182}
113183
114184// WaitBuildSuccess waits for the current build in a repo to finish.
115- func (g GCP ) WaitBuildSuccess (t testing.TB , project , region , repo , commitSha , failureMsg string , maxRetry int ) error {
116- var filter string
185+ func (g GCP ) WaitBuildSuccess (t testing.TB , project , region , repo , commitSha , failureMsg string , maxBuildRetry , maxErrorRetries int , timeBetweenErrorRetries time.Duration ) error {
186+ var filter , status , build string
187+ var timeoutErr , err error
188+
117189 if commitSha == "" {
118190 filter = fmt .Sprintf ("source.repoSource.repoName:%s" , repo )
119191 } else {
120192 filter = fmt .Sprintf ("source.repoSource.commitSha:%s" , commitSha )
121193 }
122- build := g .GetRunningBuildID (t , project , region , filter )
123- if build != "" {
124- status , err := g .GetFinalBuildState (t , project , region , build , maxRetry )
194+
195+ build = g .GetRunningBuildID (t , project , region , filter )
196+ for i := 0 ; i < maxErrorRetries ; i ++ {
197+ if build != "" {
198+ status , timeoutErr = g .GetFinalBuildState (t , project , region , build , maxBuildRetry )
199+ } else {
200+ status , build = g .GetLastBuildStatus (t , project , region , filter )
201+ }
202+
203+ if timeoutErr != nil {
204+ return timeoutErr
205+ } else if status != StatusSuccess {
206+ if ! g .IsRetryableError (t , project , region , build ) {
207+ return fmt .Errorf ("%s\n See:\n https://console.cloud.google.com/cloud-build/builds;region=%s/%s?project=%s\n for details" , failureMsg , region , build , project )
208+ }
209+ fmt .Println ("build failed with retryable error. a new build will be triggered." )
210+ } else {
211+ return nil // Build succeeded
212+ }
213+
214+ // Trigger a new build
215+ build , err = g .TriggerNewBuild (t , fmt .Sprintf ("projects/%s/locations/%s/builds/%s" , project , region , build ))
125216 if err != nil {
126- return err
217+ return fmt . Errorf ( "failed to trigger new build after %d retries: %w" , maxErrorRetries , err )
127218 }
128- if status != StatusSuccess {
129- return fmt .Errorf ("%s\n See:\n https://console.cloud.google.com/cloud-build/builds;region=%s/%s?project=%s\n for details" , failureMsg , region , build , project )
219+ fmt .Printf ("triggered new build with ID: %s (attempt %d/%d)\n " , build , i + 1 , maxErrorRetries )
220+ if i < maxErrorRetries - 1 {
221+ time .Sleep (timeBetweenErrorRetries ) // Wait before retrying
130222 }
131- } else {
132- status := g .GetLastBuildStatus (t , project , region , filter )
133- if status != StatusSuccess {
134- return fmt .Errorf ("%s\n See:\n https://console.cloud.google.com/cloud-build/builds;region=%s/%s?project=%s\n for details" , failureMsg , region , build , project )
223+ }
224+ return fmt .Errorf ("%s\n build failed after %d retries.\n See Cloud Build logs for details." , failureMsg , maxErrorRetries )
225+ }
226+
227+ // IsRetryableError checks the logs of a failed Cloud Build build
228+ // and verify if the error is a transient one and can be retried
229+ func (g GCP ) IsRetryableError (t testing.TB , projectID , region , build string ) bool {
230+ logs := g .GetBuildLogs (t , projectID , region , build )
231+ found := false
232+ for pattern , msg := range retryRegexp {
233+ if pattern .MatchString (logs ) {
234+ found = true
235+ fmt .Printf ("error '%s' is worth of a retry\n " , msg )
236+ break
135237 }
136238 }
137- return nil
239+ return found
138240}
139241
140242// HasSccNotification checks if a Security Command Center notification exists
0 commit comments