Skip to content

Commit 3db9dd1

Browse files
authored
Merge pull request #6 from tstromberg/main
unify cli/web code paths
2 parents 018702e + 9bf34e0 commit 3db9dd1

File tree

4 files changed

+243
-154
lines changed

4 files changed

+243
-154
lines changed

cmd/prcost/repository.go

Lines changed: 60 additions & 151 deletions
Original file line numberDiff line numberDiff line change
@@ -2,60 +2,15 @@ package main
22

33
import (
44
"context"
5-
"errors"
65
"fmt"
76
"log/slog"
87
"strings"
9-
"sync"
108
"time"
119

1210
"github.com/codeGROOVE-dev/prcost/pkg/cost"
1311
"github.com/codeGROOVE-dev/prcost/pkg/github"
1412
)
1513

16-
// countBotPRs counts how many PRs in the list are authored by bots.
17-
// Uses the same bot detection logic as pkg/github/query.go:isBot().
18-
func countBotPRs(prs []github.PRSummary) int {
19-
count := 0
20-
for _, pr := range prs {
21-
if isBotAuthor(pr.Author) {
22-
count++
23-
}
24-
}
25-
return count
26-
}
27-
28-
// isBotAuthor returns true if the author name indicates a bot account.
29-
// This matches the logic in pkg/github/query.go:isBot().
30-
func isBotAuthor(author string) bool {
31-
// Check for common bot name patterns
32-
if strings.HasSuffix(author, "[bot]") || strings.Contains(author, "-bot-") {
33-
return true
34-
}
35-
36-
// Check for specific known bot usernames (case-insensitive)
37-
lowerAuthor := strings.ToLower(author)
38-
knownBots := []string{
39-
"renovate",
40-
"dependabot",
41-
"github-actions",
42-
"codecov",
43-
"snyk",
44-
"greenkeeper",
45-
"imgbot",
46-
"renovate-bot",
47-
"dependabot-preview",
48-
}
49-
50-
for _, botName := range knownBots {
51-
if lowerAuthor == botName {
52-
return true
53-
}
54-
}
55-
56-
return false
57-
}
58-
5914
// analyzeRepository performs repository-wide cost analysis by sampling PRs.
6015
// Uses library functions from pkg/github and pkg/cost for fetching, sampling,
6116
// and extrapolation - all functionality is available to external clients.
@@ -82,7 +37,7 @@ func analyzeRepository(ctx context.Context, owner, repo string, sampleSize, days
8237
actualDays, _ := github.CalculateActualTimeWindow(prs, days)
8338

8439
// Count bot PRs before sampling
85-
botPRCount := countBotPRs(prs)
40+
botPRCount := github.CountBotPRs(prs)
8641
humanPRCount := len(prs) - botPRCount
8742

8843
// Sample PRs using time-bucket strategy (includes all PRs)
@@ -103,60 +58,37 @@ func analyzeRepository(ctx context.Context, owner, repo string, sampleSize, days
10358
len(samples), len(prs), actualDays)
10459
}
10560

106-
// Collect breakdowns from each sample using parallel processing
107-
var breakdowns []cost.Breakdown
108-
var mu sync.Mutex
109-
var wg sync.WaitGroup
110-
111-
// Use a buffered channel for worker pool pattern (same as web server)
112-
concurrency := 8 // Process up to 8 PRs concurrently
113-
semaphore := make(chan struct{}, concurrency)
114-
115-
for i, pr := range samples {
116-
wg.Add(1)
117-
go func(index int, prSummary github.PRSummary) {
118-
defer wg.Done()
119-
120-
// Acquire semaphore slot
121-
semaphore <- struct{}{}
122-
defer func() { <-semaphore }()
123-
124-
prURL := fmt.Sprintf("https://github.com/%s/%s/pull/%d", owner, repo, prSummary.Number)
125-
slog.Info("Processing sample PR",
126-
"repo", fmt.Sprintf("%s/%s", owner, repo),
127-
"number", prSummary.Number,
128-
"progress", fmt.Sprintf("%d/%d", index+1, len(samples)))
129-
130-
// Fetch full PR data using configured data source
131-
var prData cost.PRData
132-
var err error
133-
if dataSource == "turnserver" {
134-
// Use turnserver with updatedAt for effective caching
135-
prData, err = github.FetchPRDataViaTurnserver(ctx, prURL, token, prSummary.UpdatedAt)
136-
} else {
137-
// Use prx with updatedAt for effective caching
138-
prData, err = github.FetchPRData(ctx, prURL, token, prSummary.UpdatedAt)
139-
}
140-
if err != nil {
141-
slog.Warn("Failed to fetch PR data, skipping", "pr_number", prSummary.Number, "source", dataSource, "error", err)
142-
return
143-
}
144-
145-
// Calculate cost and accumulate with mutex protection
146-
breakdown := cost.Calculate(prData, cfg)
147-
mu.Lock()
148-
breakdowns = append(breakdowns, breakdown)
149-
mu.Unlock()
150-
}(i, pr)
151-
}
152-
153-
// Wait for all goroutines to complete
154-
wg.Wait()
155-
156-
if len(breakdowns) == 0 {
157-
return errors.New("no samples could be processed successfully")
61+
// Convert samples to PRSummaryInfo format
62+
var summaries []cost.PRSummaryInfo
63+
for _, pr := range samples {
64+
summaries = append(summaries, cost.PRSummaryInfo{
65+
Owner: pr.Owner,
66+
Repo: pr.Repo,
67+
Number: pr.Number,
68+
UpdatedAt: pr.UpdatedAt,
69+
})
70+
}
71+
72+
// Create fetcher
73+
fetcher := &github.SimpleFetcher{
74+
Token: token,
75+
DataSource: dataSource,
15876
}
15977

78+
// Analyze PRs using shared code path
79+
result, err := cost.AnalyzePRs(ctx, &cost.AnalysisRequest{
80+
Samples: summaries,
81+
Logger: slog.Default(),
82+
Fetcher: fetcher,
83+
Concurrency: 8, // Process up to 8 PRs concurrently
84+
Config: cfg,
85+
})
86+
if err != nil {
87+
return err
88+
}
89+
90+
breakdowns := result.Breakdowns
91+
16092
// Count unique authors across all PRs (not just samples)
16193
totalAuthors := github.CountUniqueAuthors(prs)
16294

@@ -204,7 +136,7 @@ func analyzeOrganization(ctx context.Context, org string, sampleSize, days int,
204136
actualDays, _ := github.CalculateActualTimeWindow(prs, days)
205137

206138
// Count bot PRs before sampling
207-
botPRCount := countBotPRs(prs)
139+
botPRCount := github.CountBotPRs(prs)
208140
humanPRCount := len(prs) - botPRCount
209141

210142
// Sample PRs using time-bucket strategy (includes all PRs)
@@ -225,60 +157,37 @@ func analyzeOrganization(ctx context.Context, org string, sampleSize, days int,
225157
len(samples), len(prs), org, actualDays)
226158
}
227159

228-
// Collect breakdowns from each sample using parallel processing
229-
var breakdowns []cost.Breakdown
230-
var mu sync.Mutex
231-
var wg sync.WaitGroup
232-
233-
// Use a buffered channel for worker pool pattern (same as web server)
234-
concurrency := 8 // Process up to 8 PRs concurrently
235-
semaphore := make(chan struct{}, concurrency)
236-
237-
for i, pr := range samples {
238-
wg.Add(1)
239-
go func(index int, prSummary github.PRSummary) {
240-
defer wg.Done()
241-
242-
// Acquire semaphore slot
243-
semaphore <- struct{}{}
244-
defer func() { <-semaphore }()
245-
246-
prURL := fmt.Sprintf("https://github.com/%s/%s/pull/%d", prSummary.Owner, prSummary.Repo, prSummary.Number)
247-
slog.Info("Processing sample PR",
248-
"repo", fmt.Sprintf("%s/%s", prSummary.Owner, prSummary.Repo),
249-
"number", prSummary.Number,
250-
"progress", fmt.Sprintf("%d/%d", index+1, len(samples)))
251-
252-
// Fetch full PR data using configured data source
253-
var prData cost.PRData
254-
var err error
255-
if dataSource == "turnserver" {
256-
// Use turnserver with updatedAt for effective caching
257-
prData, err = github.FetchPRDataViaTurnserver(ctx, prURL, token, prSummary.UpdatedAt)
258-
} else {
259-
// Use prx with updatedAt for effective caching
260-
prData, err = github.FetchPRData(ctx, prURL, token, prSummary.UpdatedAt)
261-
}
262-
if err != nil {
263-
slog.Warn("Failed to fetch PR data, skipping", "pr_number", prSummary.Number, "source", dataSource, "error", err)
264-
return
265-
}
266-
267-
// Calculate cost and accumulate with mutex protection
268-
breakdown := cost.Calculate(prData, cfg)
269-
mu.Lock()
270-
breakdowns = append(breakdowns, breakdown)
271-
mu.Unlock()
272-
}(i, pr)
273-
}
274-
275-
// Wait for all goroutines to complete
276-
wg.Wait()
277-
278-
if len(breakdowns) == 0 {
279-
return errors.New("no samples could be processed successfully")
160+
// Convert samples to PRSummaryInfo format
161+
var summaries []cost.PRSummaryInfo
162+
for _, pr := range samples {
163+
summaries = append(summaries, cost.PRSummaryInfo{
164+
Owner: pr.Owner,
165+
Repo: pr.Repo,
166+
Number: pr.Number,
167+
UpdatedAt: pr.UpdatedAt,
168+
})
169+
}
170+
171+
// Create fetcher
172+
fetcher := &github.SimpleFetcher{
173+
Token: token,
174+
DataSource: dataSource,
280175
}
281176

177+
// Analyze PRs using shared code path
178+
result, err := cost.AnalyzePRs(ctx, &cost.AnalysisRequest{
179+
Samples: summaries,
180+
Logger: slog.Default(),
181+
Fetcher: fetcher,
182+
Concurrency: 8, // Process up to 8 PRs concurrently
183+
Config: cfg,
184+
})
185+
if err != nil {
186+
return err
187+
}
188+
189+
breakdowns := result.Breakdowns
190+
282191
// Count unique authors across all PRs (not just samples)
283192
totalAuthors := github.CountUniqueAuthors(prs)
284193

0 commit comments

Comments
 (0)