|
| 1 | +package main |
| 2 | + |
| 3 | +import ( |
| 4 | + "context" |
| 5 | + "fmt" |
| 6 | + "github.com/pkg/errors" |
| 7 | + "go.uber.org/ratelimit" |
| 8 | + "golang.org/x/sync/errgroup" |
| 9 | + "os" |
| 10 | + "sort" |
| 11 | + "strings" |
| 12 | + "time" |
| 13 | + |
| 14 | + "github.com/fatih/color" |
| 15 | + "github.com/google/go-github/v50/github" |
| 16 | + "github.com/smartcontractkit/chainlink-testing-framework/framework" |
| 17 | + "golang.org/x/oauth2" |
| 18 | +) |
| 19 | + |
| 20 | +const ( |
| 21 | + WorkflowRateLimitPerSecond = 10 |
| 22 | + JobsRateLimitPerSecond = 10 |
| 23 | + MaxBarLength = 50 |
| 24 | + GHResultsPerPage = 100 // anything above that won't work |
| 25 | +) |
| 26 | + |
| 27 | +var ( |
| 28 | + SlowTestThreshold = 5 * time.Minute |
| 29 | + ExtremelySlowTestThreshold = 10 * time.Minute |
| 30 | +) |
| 31 | + |
| 32 | +type JobResult struct { |
| 33 | + StepStats map[string]Stat |
| 34 | + JobStats map[string]Stat |
| 35 | +} |
| 36 | + |
| 37 | +type Stat struct { |
| 38 | + Name string |
| 39 | + Median time.Duration |
| 40 | + P95 time.Duration |
| 41 | + P99 time.Duration |
| 42 | + Durations []time.Duration |
| 43 | +} |
| 44 | + |
| 45 | +// AnalyzeCIRuns analyzes GitHub Actions job runs and prints statistics |
| 46 | +func AnalyzeCIRuns(owner, repo, wf string, daysRange int) error { |
| 47 | + ctx := context.Background() |
| 48 | + token := os.Getenv("GITHUB_TOKEN") |
| 49 | + if token == "" { |
| 50 | + return fmt.Errorf("GITHUB_TOKEN environment variable is not set") |
| 51 | + } |
| 52 | + |
| 53 | + framework.L.Info(). |
| 54 | + Str("Owner", owner). |
| 55 | + Str("Repo", repo). |
| 56 | + Str("Workflow", wf). |
| 57 | + Msg("Analyzing CI runs") |
| 58 | + |
| 59 | + ts := oauth2.StaticTokenSource(&oauth2.Token{AccessToken: token}) |
| 60 | + tc := oauth2.NewClient(ctx, ts) |
| 61 | + client := github.NewClient(tc) |
| 62 | + |
| 63 | + // Fetch workflow runs for the last N days |
| 64 | + // have GH rate limits in mind, see file constants |
| 65 | + lastMonth := time.Now().AddDate(0, 0, -daysRange) |
| 66 | + runs, err := getAllWorkflowRuns(ctx, client, owner, repo, wf, lastMonth) |
| 67 | + if err != nil { |
| 68 | + return fmt.Errorf("failed to fetch workflow runs: %w", err) |
| 69 | + } |
| 70 | + |
| 71 | + framework.L.Info(). |
| 72 | + Int("Runs", len(runs)). |
| 73 | + Msg("Found matching workflow runs") |
| 74 | + |
| 75 | + results := make(chan JobResult, len(runs)) |
| 76 | + eg := &errgroup.Group{} |
| 77 | + rl := ratelimit.New(JobsRateLimitPerSecond) |
| 78 | + |
| 79 | + for _, run := range runs { |
| 80 | + eg.Go(func() error { |
| 81 | + rl.Take() |
| 82 | + return analyzeRun(ctx, client, run, results, owner, repo) |
| 83 | + }) |
| 84 | + } |
| 85 | + if err := eg.Wait(); err != nil { |
| 86 | + return err |
| 87 | + } |
| 88 | + close(results) |
| 89 | + |
| 90 | + perStepStats := make(map[string]Stat) |
| 91 | + perJobStats := make(map[string]Stat) |
| 92 | + |
| 93 | + for result := range results { |
| 94 | + // Aggregate step durations |
| 95 | + for stepName, durations := range result.StepStats { |
| 96 | + if existing, ok := perStepStats[stepName]; ok { |
| 97 | + existing.Durations = append(existing.Durations, durations.Durations...) |
| 98 | + perStepStats[stepName] = existing |
| 99 | + } else { |
| 100 | + perStepStats[stepName] = Stat{ |
| 101 | + Name: stepName, |
| 102 | + Durations: durations.Durations, |
| 103 | + } |
| 104 | + } |
| 105 | + } |
| 106 | + // Aggregate job stats |
| 107 | + for jobName, stat := range result.JobStats { |
| 108 | + if existing, ok := perJobStats[jobName]; ok { |
| 109 | + existing.Durations = append(existing.Durations, stat.Durations...) |
| 110 | + perJobStats[jobName] = existing |
| 111 | + } else { |
| 112 | + perJobStats[jobName] = Stat{ |
| 113 | + Name: jobName, |
| 114 | + Durations: stat.Durations, |
| 115 | + } |
| 116 | + } |
| 117 | + } |
| 118 | + } |
| 119 | + |
| 120 | + for stepName, stat := range perStepStats { |
| 121 | + stat.Median, stat.P95, stat.P99 = calculatePercentiles(stat.Durations) |
| 122 | + perStepStats[stepName] = stat |
| 123 | + } |
| 124 | + for jobName, stat := range perJobStats { |
| 125 | + stat.Median, stat.P95, stat.P99 = calculatePercentiles(stat.Durations) |
| 126 | + perJobStats[jobName] = stat |
| 127 | + } |
| 128 | + fmt.Print("\nSteps:\n") |
| 129 | + printStats(perStepStats) |
| 130 | + fmt.Print("\nJobs:\n") |
| 131 | + printStats(perJobStats) |
| 132 | + return nil |
| 133 | +} |
| 134 | + |
| 135 | +func getAllWorkflowRuns(ctx context.Context, client *github.Client, owner, repo, name string, timeRange time.Time) ([]*github.WorkflowRun, error) { |
| 136 | + var allRuns []*github.WorkflowRun |
| 137 | + opts := &github.ListWorkflowRunsOptions{ |
| 138 | + Created: fmt.Sprintf(">%s", timeRange.Format(time.RFC3339)), |
| 139 | + ListOptions: github.ListOptions{PerPage: 100}, |
| 140 | + } |
| 141 | + rl := ratelimit.New(WorkflowRateLimitPerSecond) |
| 142 | + for { |
| 143 | + rl.Take() |
| 144 | + runs, resp, err := client.Actions.ListRepositoryWorkflowRuns(ctx, owner, repo, opts) |
| 145 | + if err != nil { |
| 146 | + return nil, fmt.Errorf("failed to fetch workflow runs: %w", err) |
| 147 | + } |
| 148 | + framework.L.Debug().Int("Runs", len(runs.WorkflowRuns)).Msg("Loading runs") |
| 149 | + for _, wr := range runs.WorkflowRuns { |
| 150 | + if strings.Contains(*wr.Name, name) { |
| 151 | + allRuns = append(allRuns, wr) |
| 152 | + } |
| 153 | + } |
| 154 | + if resp.NextPage == 0 { |
| 155 | + break |
| 156 | + } |
| 157 | + opts.Page = resp.NextPage |
| 158 | + } |
| 159 | + return allRuns, nil |
| 160 | +} |
| 161 | + |
| 162 | +// analyzeRun fetches workflow runs that are not skipped and returns their Stat through channel |
| 163 | +func analyzeRun(ctx context.Context, client *github.Client, run *github.WorkflowRun, results chan<- JobResult, owner, repo string) error { |
| 164 | + logger := framework.L.With(). |
| 165 | + Str("RunID", fmt.Sprintf("%d", *run.ID)). |
| 166 | + Str("CreatedAt", run.CreatedAt.Format(time.RFC3339)). |
| 167 | + Logger() |
| 168 | + logger.Debug().Msg("Analyzing run") |
| 169 | + |
| 170 | + jobs, _, err := client.Actions.ListWorkflowJobs(ctx, owner, repo, *run.ID, &github.ListWorkflowJobsOptions{ |
| 171 | + ListOptions: github.ListOptions{PerPage: GHResultsPerPage}, |
| 172 | + }) |
| 173 | + if err != nil { |
| 174 | + return errors.Wrap(err, "failed to fetch jobs for run") |
| 175 | + } |
| 176 | + |
| 177 | + stepStats := make(map[string]Stat) |
| 178 | + jobStats := make(map[string]Stat) |
| 179 | + |
| 180 | + // Analyze each job |
| 181 | + for _, job := range jobs.Jobs { |
| 182 | + logger.Debug(). |
| 183 | + Str("job_id", fmt.Sprintf("%d", *job.ID)). |
| 184 | + Str("job_name", *job.Name). |
| 185 | + Msg("Found job") |
| 186 | + |
| 187 | + // ignore jobs that are in progress or skipped |
| 188 | + if job.Conclusion != nil && *job.Conclusion == "skipped" { |
| 189 | + continue |
| 190 | + } |
| 191 | + if job.CompletedAt == nil { |
| 192 | + continue |
| 193 | + } |
| 194 | + jobDuration := job.CompletedAt.Time.Sub(job.StartedAt.Time) |
| 195 | + // Collect step durations |
| 196 | + for _, step := range job.Steps { |
| 197 | + if step.Conclusion != nil && *step.Conclusion == "skipped" { |
| 198 | + continue |
| 199 | + } |
| 200 | + elapsed := step.CompletedAt.Time.Sub(step.StartedAt.Time) |
| 201 | + if existing, ok := stepStats[*step.Name]; ok { |
| 202 | + existing.Durations = append(existing.Durations, elapsed) |
| 203 | + stepStats[*step.Name] = existing |
| 204 | + } else { |
| 205 | + stepStats[*step.Name] = Stat{ |
| 206 | + Name: *step.Name, |
| 207 | + Durations: []time.Duration{elapsed}, |
| 208 | + } |
| 209 | + } |
| 210 | + } |
| 211 | + // Collect per-job statistics |
| 212 | + if existing, ok := jobStats[*job.Name]; ok { |
| 213 | + existing.Durations = append(existing.Durations, jobDuration) |
| 214 | + jobStats[*job.Name] = existing |
| 215 | + } else { |
| 216 | + jobStats[*job.Name] = Stat{ |
| 217 | + Name: *job.Name, |
| 218 | + Durations: []time.Duration{jobDuration}, |
| 219 | + } |
| 220 | + } |
| 221 | + } |
| 222 | + results <- JobResult{ |
| 223 | + StepStats: stepStats, |
| 224 | + JobStats: jobStats, |
| 225 | + } |
| 226 | + return nil |
| 227 | +} |
| 228 | + |
| 229 | +// calculatePercentiles calculates the median (50th), 95th, and 99th percentiles |
| 230 | +func calculatePercentiles(durations []time.Duration) (median, p95, p99 time.Duration) { |
| 231 | + sort.Slice(durations, func(i, j int) bool { return durations[i] < durations[j] }) |
| 232 | + medianIndex := int(float64(len(durations)) * 50 / 100) |
| 233 | + p95Index := int(float64(len(durations)) * 95 / 100) |
| 234 | + p99Index := int(float64(len(durations)) * 99 / 100) |
| 235 | + return durations[medianIndex], durations[p95Index], durations[p99Index] |
| 236 | +} |
| 237 | + |
| 238 | +func printStats(jobStats map[string]Stat) { |
| 239 | + var stats []Stat |
| 240 | + for _, stat := range jobStats { |
| 241 | + sort.Slice(stat.Durations, func(i, j int) bool { return stat.Durations[i] < stat.Durations[j] }) |
| 242 | + stats = append(stats, stat) |
| 243 | + } |
| 244 | + sort.Slice(stats, func(i, j int) bool { return stats[i].Median > stats[j].Median }) |
| 245 | + maxNameLen := 0 |
| 246 | + for _, stat := range stats { |
| 247 | + if len(stat.Name) > maxNameLen { |
| 248 | + maxNameLen = len(stat.Name) |
| 249 | + } |
| 250 | + } |
| 251 | + |
| 252 | + for _, stat := range stats { |
| 253 | + colorPrinter := getColorPrinter(stat.Median) |
| 254 | + barLength := int(stat.Median.Seconds()) |
| 255 | + if barLength > MaxBarLength { |
| 256 | + barLength = MaxBarLength |
| 257 | + } |
| 258 | + bar := strings.Repeat("=", barLength) |
| 259 | + fmt.Printf("%-*s 50th:%s 95th:%s 99th:%s %s\n", |
| 260 | + maxNameLen, |
| 261 | + stat.Name, |
| 262 | + colorPrinter.Sprintf("%-12s", stat.Median.Round(time.Second)), |
| 263 | + colorPrinter.Sprintf("%-12s", stat.P95.Round(time.Second)), |
| 264 | + colorPrinter.Sprintf("%-12s", stat.P99.Round(time.Second)), |
| 265 | + colorPrinter.Sprint(bar)) |
| 266 | + } |
| 267 | +} |
| 268 | + |
| 269 | +// getColorPrinter returns a color printer based on the duration |
| 270 | +func getColorPrinter(duration time.Duration) *color.Color { |
| 271 | + switch { |
| 272 | + case duration < SlowTestThreshold: |
| 273 | + return color.New(color.FgGreen) |
| 274 | + case duration < ExtremelySlowTestThreshold: |
| 275 | + return color.New(color.FgYellow) |
| 276 | + default: |
| 277 | + return color.New(color.FgRed) |
| 278 | + } |
| 279 | +} |
0 commit comments