@@ -2,60 +2,15 @@ package main
22
33import (
44 "context"
5- "errors"
65 "fmt"
76 "log/slog"
87 "strings"
9- "sync"
108 "time"
119
1210 "github.com/codeGROOVE-dev/prcost/pkg/cost"
1311 "github.com/codeGROOVE-dev/prcost/pkg/github"
1412)
1513
16- // countBotPRs counts how many PRs in the list are authored by bots.
17- // Uses the same bot detection logic as pkg/github/query.go:isBot().
18- func countBotPRs (prs []github.PRSummary ) int {
19- count := 0
20- for _ , pr := range prs {
21- if isBotAuthor (pr .Author ) {
22- count ++
23- }
24- }
25- return count
26- }
27-
28- // isBotAuthor returns true if the author name indicates a bot account.
29- // This matches the logic in pkg/github/query.go:isBot().
30- func isBotAuthor (author string ) bool {
31- // Check for common bot name patterns
32- if strings .HasSuffix (author , "[bot]" ) || strings .Contains (author , "-bot-" ) {
33- return true
34- }
35-
36- // Check for specific known bot usernames (case-insensitive)
37- lowerAuthor := strings .ToLower (author )
38- knownBots := []string {
39- "renovate" ,
40- "dependabot" ,
41- "github-actions" ,
42- "codecov" ,
43- "snyk" ,
44- "greenkeeper" ,
45- "imgbot" ,
46- "renovate-bot" ,
47- "dependabot-preview" ,
48- }
49-
50- for _ , botName := range knownBots {
51- if lowerAuthor == botName {
52- return true
53- }
54- }
55-
56- return false
57- }
58-
5914// analyzeRepository performs repository-wide cost analysis by sampling PRs.
6015// Uses library functions from pkg/github and pkg/cost for fetching, sampling,
6116// and extrapolation - all functionality is available to external clients.
@@ -82,7 +37,7 @@ func analyzeRepository(ctx context.Context, owner, repo string, sampleSize, days
8237 actualDays , _ := github .CalculateActualTimeWindow (prs , days )
8338
8439 // Count bot PRs before sampling
85- botPRCount := countBotPRs (prs )
40+ botPRCount := github . CountBotPRs (prs )
8641 humanPRCount := len (prs ) - botPRCount
8742
8843 // Sample PRs using time-bucket strategy (includes all PRs)
@@ -103,60 +58,37 @@ func analyzeRepository(ctx context.Context, owner, repo string, sampleSize, days
10358 len (samples ), len (prs ), actualDays )
10459 }
10560
106- // Collect breakdowns from each sample using parallel processing
107- var breakdowns []cost.Breakdown
108- var mu sync.Mutex
109- var wg sync.WaitGroup
110-
111- // Use a buffered channel for worker pool pattern (same as web server)
112- concurrency := 8 // Process up to 8 PRs concurrently
113- semaphore := make (chan struct {}, concurrency )
114-
115- for i , pr := range samples {
116- wg .Add (1 )
117- go func (index int , prSummary github.PRSummary ) {
118- defer wg .Done ()
119-
120- // Acquire semaphore slot
121- semaphore <- struct {}{}
122- defer func () { <- semaphore }()
123-
124- prURL := fmt .Sprintf ("https://github.com/%s/%s/pull/%d" , owner , repo , prSummary .Number )
125- slog .Info ("Processing sample PR" ,
126- "repo" , fmt .Sprintf ("%s/%s" , owner , repo ),
127- "number" , prSummary .Number ,
128- "progress" , fmt .Sprintf ("%d/%d" , index + 1 , len (samples )))
129-
130- // Fetch full PR data using configured data source
131- var prData cost.PRData
132- var err error
133- if dataSource == "turnserver" {
134- // Use turnserver with updatedAt for effective caching
135- prData , err = github .FetchPRDataViaTurnserver (ctx , prURL , token , prSummary .UpdatedAt )
136- } else {
137- // Use prx with updatedAt for effective caching
138- prData , err = github .FetchPRData (ctx , prURL , token , prSummary .UpdatedAt )
139- }
140- if err != nil {
141- slog .Warn ("Failed to fetch PR data, skipping" , "pr_number" , prSummary .Number , "source" , dataSource , "error" , err )
142- return
143- }
144-
145- // Calculate cost and accumulate with mutex protection
146- breakdown := cost .Calculate (prData , cfg )
147- mu .Lock ()
148- breakdowns = append (breakdowns , breakdown )
149- mu .Unlock ()
150- }(i , pr )
151- }
152-
153- // Wait for all goroutines to complete
154- wg .Wait ()
155-
156- if len (breakdowns ) == 0 {
157- return errors .New ("no samples could be processed successfully" )
61+ // Convert samples to PRSummaryInfo format
62+ var summaries []cost.PRSummaryInfo
63+ for _ , pr := range samples {
64+ summaries = append (summaries , cost.PRSummaryInfo {
65+ Owner : pr .Owner ,
66+ Repo : pr .Repo ,
67+ Number : pr .Number ,
68+ UpdatedAt : pr .UpdatedAt ,
69+ })
70+ }
71+
72+ // Create fetcher
73+ fetcher := & github.SimpleFetcher {
74+ Token : token ,
75+ DataSource : dataSource ,
15876 }
15977
78+ // Analyze PRs using shared code path
79+ result , err := cost .AnalyzePRs (ctx , & cost.AnalysisRequest {
80+ Samples : summaries ,
81+ Logger : slog .Default (),
82+ Fetcher : fetcher ,
83+ Concurrency : 8 , // Process up to 8 PRs concurrently
84+ Config : cfg ,
85+ })
86+ if err != nil {
87+ return err
88+ }
89+
90+ breakdowns := result .Breakdowns
91+
16092 // Count unique authors across all PRs (not just samples)
16193 totalAuthors := github .CountUniqueAuthors (prs )
16294
@@ -204,7 +136,7 @@ func analyzeOrganization(ctx context.Context, org string, sampleSize, days int,
204136 actualDays , _ := github .CalculateActualTimeWindow (prs , days )
205137
206138 // Count bot PRs before sampling
207- botPRCount := countBotPRs (prs )
139+ botPRCount := github . CountBotPRs (prs )
208140 humanPRCount := len (prs ) - botPRCount
209141
210142 // Sample PRs using time-bucket strategy (includes all PRs)
@@ -225,60 +157,37 @@ func analyzeOrganization(ctx context.Context, org string, sampleSize, days int,
225157 len (samples ), len (prs ), org , actualDays )
226158 }
227159
228- // Collect breakdowns from each sample using parallel processing
229- var breakdowns []cost.Breakdown
230- var mu sync.Mutex
231- var wg sync.WaitGroup
232-
233- // Use a buffered channel for worker pool pattern (same as web server)
234- concurrency := 8 // Process up to 8 PRs concurrently
235- semaphore := make (chan struct {}, concurrency )
236-
237- for i , pr := range samples {
238- wg .Add (1 )
239- go func (index int , prSummary github.PRSummary ) {
240- defer wg .Done ()
241-
242- // Acquire semaphore slot
243- semaphore <- struct {}{}
244- defer func () { <- semaphore }()
245-
246- prURL := fmt .Sprintf ("https://github.com/%s/%s/pull/%d" , prSummary .Owner , prSummary .Repo , prSummary .Number )
247- slog .Info ("Processing sample PR" ,
248- "repo" , fmt .Sprintf ("%s/%s" , prSummary .Owner , prSummary .Repo ),
249- "number" , prSummary .Number ,
250- "progress" , fmt .Sprintf ("%d/%d" , index + 1 , len (samples )))
251-
252- // Fetch full PR data using configured data source
253- var prData cost.PRData
254- var err error
255- if dataSource == "turnserver" {
256- // Use turnserver with updatedAt for effective caching
257- prData , err = github .FetchPRDataViaTurnserver (ctx , prURL , token , prSummary .UpdatedAt )
258- } else {
259- // Use prx with updatedAt for effective caching
260- prData , err = github .FetchPRData (ctx , prURL , token , prSummary .UpdatedAt )
261- }
262- if err != nil {
263- slog .Warn ("Failed to fetch PR data, skipping" , "pr_number" , prSummary .Number , "source" , dataSource , "error" , err )
264- return
265- }
266-
267- // Calculate cost and accumulate with mutex protection
268- breakdown := cost .Calculate (prData , cfg )
269- mu .Lock ()
270- breakdowns = append (breakdowns , breakdown )
271- mu .Unlock ()
272- }(i , pr )
273- }
274-
275- // Wait for all goroutines to complete
276- wg .Wait ()
277-
278- if len (breakdowns ) == 0 {
279- return errors .New ("no samples could be processed successfully" )
160+ // Convert samples to PRSummaryInfo format
161+ var summaries []cost.PRSummaryInfo
162+ for _ , pr := range samples {
163+ summaries = append (summaries , cost.PRSummaryInfo {
164+ Owner : pr .Owner ,
165+ Repo : pr .Repo ,
166+ Number : pr .Number ,
167+ UpdatedAt : pr .UpdatedAt ,
168+ })
169+ }
170+
171+ // Create fetcher
172+ fetcher := & github.SimpleFetcher {
173+ Token : token ,
174+ DataSource : dataSource ,
280175 }
281176
177+ // Analyze PRs using shared code path
178+ result , err := cost .AnalyzePRs (ctx , & cost.AnalysisRequest {
179+ Samples : summaries ,
180+ Logger : slog .Default (),
181+ Fetcher : fetcher ,
182+ Concurrency : 8 , // Process up to 8 PRs concurrently
183+ Config : cfg ,
184+ })
185+ if err != nil {
186+ return err
187+ }
188+
189+ breakdowns := result .Breakdowns
190+
282191 // Count unique authors across all PRs (not just samples)
283192 totalAuthors := github .CountUniqueAuthors (prs )
284193
0 commit comments