diff --git a/cmd/prcost/repository.go b/cmd/prcost/repository.go index 1421e82..c16e772 100644 --- a/cmd/prcost/repository.go +++ b/cmd/prcost/repository.go @@ -103,10 +103,15 @@ func analyzeRepository(ctx context.Context, owner, repo string, sampleSize, days prSummaryInfos := make([]cost.PRSummaryInfo, len(prs)) for i, pr := range prs { prSummaryInfos[i] = cost.PRSummaryInfo{ - Owner: pr.Owner, - Repo: pr.Repo, - Merged: pr.Merged, - State: pr.State, + Owner: pr.Owner, + Repo: pr.Repo, + Author: pr.Author, + AuthorType: pr.AuthorType, + CreatedAt: pr.CreatedAt, + UpdatedAt: pr.UpdatedAt, + ClosedAt: pr.ClosedAt, + Merged: pr.Merged, + State: pr.State, } } @@ -214,10 +219,15 @@ func analyzeOrganization(ctx context.Context, org string, sampleSize, days int, prSummaryInfos := make([]cost.PRSummaryInfo, len(prs)) for i, pr := range prs { prSummaryInfos[i] = cost.PRSummaryInfo{ - Owner: pr.Owner, - Repo: pr.Repo, - Merged: pr.Merged, - State: pr.State, + Owner: pr.Owner, + Repo: pr.Repo, + Author: pr.Author, + AuthorType: pr.AuthorType, + CreatedAt: pr.CreatedAt, + UpdatedAt: pr.UpdatedAt, + ClosedAt: pr.ClosedAt, + Merged: pr.Merged, + State: pr.State, } } diff --git a/internal/server/integration_test.go b/internal/server/integration_test.go index 87d48be..05c56a7 100644 --- a/internal/server/integration_test.go +++ b/internal/server/integration_test.go @@ -33,7 +33,7 @@ func TestOrgSampleStreamIntegration(t *testing.T) { // Create request reqBody := OrgSampleRequest{ Org: "codeGROOVE-dev", - SampleSize: 100, + SampleSize: 250, Days: 60, } body, err := json.Marshal(reqBody) @@ -195,7 +195,7 @@ func TestOrgSampleStreamNoTimeout(t *testing.T) { // Create request with larger sample size to ensure longer operation reqBody := OrgSampleRequest{ Org: "codeGROOVE-dev", - SampleSize: 100, + SampleSize: 250, Days: 60, } body, err := json.Marshal(reqBody) diff --git a/internal/server/server.go b/internal/server/server.go index 5909601..7bea048 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -144,7 +144,7 @@ type CalculateResponse struct { type RepoSampleRequest struct { Owner string `json:"owner"` Repo string `json:"repo"` - SampleSize int `json:"sample_size,omitempty"` // Default: 100 + SampleSize int `json:"sample_size,omitempty"` // Default: 250 Days int `json:"days,omitempty"` // Default: 60 Config *cost.Config `json:"config,omitempty"` } @@ -154,7 +154,7 @@ type RepoSampleRequest struct { //nolint:govet // fieldalignment: API struct field order optimized for readability type OrgSampleRequest struct { Org string `json:"org"` - SampleSize int `json:"sample_size,omitempty"` // Default: 100 + SampleSize int `json:"sample_size,omitempty"` // Default: 250 Days int `json:"days,omitempty"` // Default: 60 Config *cost.Config `json:"config,omitempty"` } @@ -1478,18 +1478,18 @@ func (s *Server) parseRepoSampleRequest(ctx context.Context, r *http.Request) (* // Set defaults if req.SampleSize == 0 { - req.SampleSize = 100 + req.SampleSize = 250 } if req.Days == 0 { req.Days = 60 } - // Validate reasonable limits (silently cap at 100) + // Validate reasonable limits (silently cap at 250) if req.SampleSize < 1 { return nil, errors.New("sample_size must be at least 1") } - if req.SampleSize > 100 { - req.SampleSize = 100 + if req.SampleSize > 250 { + req.SampleSize = 250 } if req.Days < 1 || req.Days > 365 { return nil, errors.New("days must be between 1 and 365") @@ -1536,18 +1536,18 @@ func (s *Server) parseOrgSampleRequest(ctx context.Context, r *http.Request) (*O // Set defaults if req.SampleSize == 0 { - req.SampleSize = 100 + req.SampleSize = 250 } if req.Days == 0 { req.Days = 60 } - // Validate reasonable limits (silently cap at 100) + // Validate reasonable limits (silently cap at 250) if req.SampleSize < 1 { return nil, errors.New("sample_size must be at least 1") } - if req.SampleSize > 100 { - req.SampleSize = 100 + if req.SampleSize > 250 { + req.SampleSize = 250 } if req.Days < 1 || req.Days > 365 { return nil, errors.New("days must be between 1 and 365") @@ -1663,10 +1663,15 @@ func (s *Server) processRepoSample(ctx context.Context, req *RepoSampleRequest, prSummaryInfos := make([]cost.PRSummaryInfo, len(prs)) for i, pr := range prs { prSummaryInfos[i] = cost.PRSummaryInfo{ - Owner: pr.Owner, - Repo: pr.Repo, - Merged: pr.Merged, - State: pr.State, + Owner: pr.Owner, + Repo: pr.Repo, + Author: pr.Author, + AuthorType: pr.AuthorType, + CreatedAt: pr.CreatedAt, + UpdatedAt: pr.UpdatedAt, + ClosedAt: pr.ClosedAt, + Merged: pr.Merged, + State: pr.State, } } @@ -1811,10 +1816,15 @@ func (s *Server) processOrgSample(ctx context.Context, req *OrgSampleRequest, to prSummaryInfos := make([]cost.PRSummaryInfo, len(prs)) for i, pr := range prs { prSummaryInfos[i] = cost.PRSummaryInfo{ - Owner: pr.Owner, - Repo: pr.Repo, - Merged: pr.Merged, - State: pr.State, + Owner: pr.Owner, + Repo: pr.Repo, + Author: pr.Author, + AuthorType: pr.AuthorType, + CreatedAt: pr.CreatedAt, + UpdatedAt: pr.UpdatedAt, + ClosedAt: pr.ClosedAt, + Merged: pr.Merged, + State: pr.State, } } @@ -2219,10 +2229,15 @@ func (s *Server) processRepoSampleWithProgress(ctx context.Context, req *RepoSam prSummaryInfos := make([]cost.PRSummaryInfo, len(prs)) for i, pr := range prs { prSummaryInfos[i] = cost.PRSummaryInfo{ - Owner: pr.Owner, - Repo: pr.Repo, - Merged: pr.Merged, - State: pr.State, + Owner: pr.Owner, + Repo: pr.Repo, + Author: pr.Author, + AuthorType: pr.AuthorType, + CreatedAt: pr.CreatedAt, + UpdatedAt: pr.UpdatedAt, + ClosedAt: pr.ClosedAt, + Merged: pr.Merged, + State: pr.State, } } @@ -2380,10 +2395,15 @@ func (s *Server) processOrgSampleWithProgress(ctx context.Context, req *OrgSampl prSummaryInfos := make([]cost.PRSummaryInfo, len(prs)) for i, pr := range prs { prSummaryInfos[i] = cost.PRSummaryInfo{ - Owner: pr.Owner, - Repo: pr.Repo, - Merged: pr.Merged, - State: pr.State, + Owner: pr.Owner, + Repo: pr.Repo, + Author: pr.Author, + AuthorType: pr.AuthorType, + CreatedAt: pr.CreatedAt, + UpdatedAt: pr.UpdatedAt, + ClosedAt: pr.ClosedAt, + Merged: pr.Merged, + State: pr.State, } } diff --git a/internal/server/server_test.go b/internal/server/server_test.go index 0720c14..906a233 100644 --- a/internal/server/server_test.go +++ b/internal/server/server_test.go @@ -1484,7 +1484,7 @@ func TestParseRepoSampleRequest(t *testing.T) { wantOwner: "testowner", wantRepo: "testrepo", wantDays: 60, - wantSampleSize: 100, + wantSampleSize: 250, }, { name: "missing owner", @@ -1571,7 +1571,7 @@ func TestParseOrgSampleRequest(t *testing.T) { wantErr: false, wantOrg: "testorg", wantDays: 60, - wantSampleSize: 100, + wantSampleSize: 250, }, { name: "missing org", diff --git a/internal/server/static/index.html b/internal/server/static/index.html index dfdb78b..e35c5c9 100644 --- a/internal/server/static/index.html +++ b/internal/server/static/index.html @@ -1063,7 +1063,7 @@

PR Cost Calculator

id="repoSampleSize" value="50" min="1" - max="100" + max="250" >
50 (recommended, ±14% accuracy) or 30 (faster, ±18% accuracy)
@@ -1103,7 +1103,7 @@

PR Cost Calculator

id="orgSampleSize" value="50" min="1" - max="100" + max="250" >
50 (recommended, ±14% accuracy) or 30 (faster, ±18% accuracy)
@@ -1426,7 +1426,7 @@

Why calculate PR costs?

} } - function formatEfficiencyHTML(efficiencyPct, grade, message, preventableCost, preventableHours, totalCost, totalHours, avgOpenHours, isAnnual = false, annualWasteCost = 0, annualWasteHours = 0, wasteHoursPerWeek = 0, wasteCostPerWeek = 0, wasteHoursPerAuthorPerWeek = 0, wasteCostPerAuthorPerWeek = 0, totalAuthors = 0, salary = 250000, benefitsMultiplier = 1.2, analysisType = 'project', sourceName = '', mergeRate = 0, mergedPRs = 0, unmergedPRs = 0, velocityGrade = '', velocityMessage = '', mergeRateGrade = '', mergeRateMessage = '') { + function formatEfficiencyHTML(efficiencyPct, grade, message, preventableCost, preventableHours, totalCost, totalHours, avgOpenHours, isAnnual = false, annualWasteCost = 0, annualWasteHours = 0, wasteHoursPerWeek = 0, wasteCostPerWeek = 0, wasteHoursPerAuthorPerWeek = 0, wasteCostPerAuthorPerWeek = 0, totalAuthors = 0, salary = 250000, benefitsMultiplier = 1.2, analysisType = 'project', sourceName = '', mergeRate = 0, mergedPRs = 0, unmergedPRs = 0, velocityGrade = '', velocityMessage = '', mergeRateGrade = '', mergeRateMessage = '', days = 60) { let html = '
'; // Development Efficiency box @@ -1450,6 +1450,8 @@

Why calculate PR costs?

html += `${formatTimeUnit(avgOpenHours)}`; html += '
'; html += `
${velocityGradeObj.message}
`; + const cutoffDays = parseInt(days) * 2; + html += `
Excludes open PRs created >${cutoffDays}d ago
`; html += ''; // Close efficiency-box // Merge Success Rate box (if data available) - use backend-computed grades if provided @@ -1470,14 +1472,16 @@

Why calculate PR costs?

// Annual Impact box (only if annual) if (isAnnual && annualWasteCost > 0) { - html += '
'; - html += '

Projected Annual Waste

'; + html += '
'; + html += '

Projected Annual Waste

'; + html += '
'; const annualWasteRounded = Math.round(annualWasteCost); const annualWasteFormatted = '$' + annualWasteRounded.toLocaleString('en-US'); - html += `
${annualWasteFormatted}
`; + html += `${annualWasteFormatted}`; + html += '
'; const annualCostPerHead = salary * benefitsMultiplier; const headcount = annualWasteCost / annualCostPerHead; - html += `
Equal to ${headcount.toFixed(1)} engineers
`; + html += `
Equal to ${headcount.toFixed(1)} engineers
`; html += '
'; // Close efficiency-box } @@ -2417,7 +2421,7 @@

Why calculate PR costs?

const velocityMessage = e.merge_velocity_message || ''; const mergeRateGrade = e.merge_rate_grade || ''; const mergeRateMessage = e.merge_rate_grade_message || ''; - html += formatEfficiencyHTML(extEfficiencyPct, extEfficiency.grade, extEfficiency.message, extPreventableCost, extPreventableHours, e.total_cost, e.total_hours, avgPRDurationHours, true, annualWasteCost, annualWasteHours, wasteHoursPerWeek, wasteCostPerWeek, wasteHoursPerAuthorPerWeek, wasteCostPerAuthorPerWeek, totalAuthors, salary, benefitsMultiplier, analysisType, sourceName, mergeRate, mergedPRs, unmergedPRs, velocityGrade, velocityMessage, mergeRateGrade, mergeRateMessage); + html += formatEfficiencyHTML(extEfficiencyPct, extEfficiency.grade, extEfficiency.message, extPreventableCost, extPreventableHours, e.total_cost, e.total_hours, avgPRDurationHours, true, annualWasteCost, annualWasteHours, wasteHoursPerWeek, wasteCostPerWeek, wasteHoursPerAuthorPerWeek, wasteCostPerAuthorPerWeek, totalAuthors, salary, benefitsMultiplier, analysisType, sourceName, mergeRate, mergedPRs, unmergedPRs, velocityGrade, velocityMessage, mergeRateGrade, mergeRateMessage, days); // Add R2R callout if enabled, otherwise generic merge time callout // Calculate modeled efficiency (with 1.5h merge time) diff --git a/pkg/cost/analyze.go b/pkg/cost/analyze.go index f4261c5..a807b37 100644 --- a/pkg/cost/analyze.go +++ b/pkg/cost/analyze.go @@ -34,12 +34,16 @@ type AnalysisRequest struct { // PRSummaryInfo contains basic PR information needed for fetching and analysis. type PRSummaryInfo struct { - UpdatedAt time.Time - Owner string - Repo string - State string // "OPEN", "CLOSED", "MERGED" - Number int - Merged bool // Whether the PR was merged + UpdatedAt time.Time + CreatedAt time.Time + ClosedAt *time.Time // Nil if still open + Owner string + Repo string + Author string + AuthorType string // "Bot", "User", or empty if unknown + State string // "OPEN", "CLOSED", "MERGED" + Number int + Merged bool // Whether the PR was merged } // AnalysisResult contains the breakdowns from analyzed PRs. diff --git a/pkg/cost/cost_test.go b/pkg/cost/cost_test.go index 7f7e587..1621a25 100644 --- a/pkg/cost/cost_test.go +++ b/pkg/cost/cost_test.go @@ -1370,10 +1370,24 @@ func TestExtrapolateFromSamplesMultiple(t *testing.T) { // Create merge status for 20 PRs: 17 merged, 3 open prStatuses := make([]PRSummaryInfo, 20) for i := range 17 { - prStatuses[i] = PRSummaryInfo{Owner: "test", Repo: "test", Merged: true, State: "MERGED"} + closedAt := now.Add(-time.Duration(i) * time.Hour) + prStatuses[i] = PRSummaryInfo{ + Owner: "test", + Repo: "test", + Merged: true, + State: "MERGED", + CreatedAt: closedAt.Add(-24 * time.Hour), + ClosedAt: &closedAt, + } } for i := 17; i < 20; i++ { - prStatuses[i] = PRSummaryInfo{Owner: "test", Repo: "test", Merged: false, State: "OPEN"} + prStatuses[i] = PRSummaryInfo{ + Owner: "test", + Repo: "test", + Merged: false, + State: "OPEN", + CreatedAt: now.Add(-time.Duration(i-17+1) * 24 * time.Hour), // Open PRs created 1-3 days ago + } } result := ExtrapolateFromSamples(breakdowns, 20, 5, 3, 14, cfg, prStatuses, nil) @@ -1445,10 +1459,36 @@ func TestExtrapolateFromSamplesBotVsHuman(t *testing.T) { }, } - // Create merge status for 10 PRs: all merged + // Create merge status for 10 PRs: 5 human, 5 bot (all merged) prStatuses := make([]PRSummaryInfo, 10) + now := time.Now() for i := range 10 { - prStatuses[i] = PRSummaryInfo{Owner: "test", Repo: "test", Merged: true, State: "MERGED"} + if i < 5 { + // Human PRs + prStatuses[i] = PRSummaryInfo{ + Owner: "test", + Repo: "test", + Author: "human-author", + AuthorType: "User", + CreatedAt: now.Add(-24 * time.Hour), + ClosedAt: &now, + Merged: true, + State: "MERGED", + } + } else { + // Bot PRs + closedTime := now.Add(-2 * time.Hour) + prStatuses[i] = PRSummaryInfo{ + Owner: "test", + Repo: "test", + Author: "dependabot[bot]", + AuthorType: "Bot", + CreatedAt: now.Add(-4 * time.Hour), + ClosedAt: &closedTime, + Merged: true, + State: "MERGED", + } + } } result := ExtrapolateFromSamples(breakdowns, 10, 5, 0, 7, cfg, prStatuses, nil) diff --git a/pkg/cost/extrapolate.go b/pkg/cost/extrapolate.go index c4b0f03..c4caaf4 100644 --- a/pkg/cost/extrapolate.go +++ b/pkg/cost/extrapolate.go @@ -3,8 +3,53 @@ package cost import ( "log/slog" "math" + "strings" + "time" ) +// isAuthorBot determines if a PR author is likely a bot based on AuthorType and common naming patterns. +func isAuthorBot(authorType, authorLogin string) bool { + // Primary check: GitHub's __typename field + if authorType == "Bot" { + return true + } + + // Fallback: Common bot naming patterns + login := strings.ToLower(authorLogin) + + // Check for [bot] suffix + if strings.HasSuffix(login, "[bot]") { + return true + } + + // Check for word-boundary bot patterns to avoid false positives like "robot" + // Match bot with specific separators or as a suffix/prefix + if strings.HasPrefix(login, "bot-") || strings.HasPrefix(login, "bot_") { + return true + } + if strings.Contains(login, "-bot-") || strings.Contains(login, "_bot_") || + strings.Contains(login, "-bot") || strings.Contains(login, "_bot") { + return true + } + + // Specific bot names + botNames := []string{ + "dependabot", "renovate", "greenkeeper", + "github-actions", "codecov", "coveralls", + "mergify", "snyk", "imgbot", + "allcontributors", "stalebot", + "netlify", "vercel", + "codefactor-io", "deepsource-autofix", + "pre-commit-ci", "ready-to-review", + } + for _, name := range botNames { + if strings.Contains(login, name) { + return true + } + } + return false +} + // PRMergeStatus represents merge status information for a PR (for calculating merge rate). type PRMergeStatus struct { State string // "OPEN", "CLOSED", "MERGED" @@ -153,8 +198,8 @@ func ExtrapolateFromSamples(breakdowns []Breakdown, totalPRs, totalAuthors, actu publicCount := 0 privateCount := 0 - for _, pr := range prs { - repoKey := pr.Owner + "/" + pr.Repo + for i := range prs { + repoKey := prs[i].Owner + "/" + prs[i].Repo if uniqueRepos[repoKey] { continue // Already counted this repo } @@ -162,7 +207,7 @@ func ExtrapolateFromSamples(breakdowns []Breakdown, totalPRs, totalAuthors, actu // Check visibility - if repoVisibility is nil or doesn't have this repo, assume public if repoVisibility != nil { - if isPrivate, ok := repoVisibility[pr.Repo]; ok && isPrivate { + if isPrivate, ok := repoVisibility[prs[i].Repo]; ok && isPrivate { privateCount++ } else { publicCount++ @@ -173,27 +218,88 @@ func ExtrapolateFromSamples(breakdowns []Breakdown, totalPRs, totalAuthors, actu } if len(breakdowns) == 0 { - // Calculate merge rate even with no successful samples + // Calculate merge rate, avg duration, and bot/human stats even with no successful samples + // Apply same filtering logic as main path to avoid ancient open PRs mergedCount := 0 - for _, pr := range prs { - if pr.Merged { + var sumPRDuration float64 + var humanCount, botCount int + var humanDuration, botDuration float64 + var countedPRs int + createdCutoff := time.Now().AddDate(0, 0, -daysInPeriod*2) // 2x the analysis period + + var skippedOpen int + for i := range prs { + // For open PRs: only include if created within 2x the period + // For closed PRs: include all (they were modified/closed in the period) + if prs[i].ClosedAt == nil { + if prs[i].CreatedAt.Before(createdCutoff) { + skippedOpen++ + continue + } + } + + countedPRs++ + if prs[i].Merged { mergedCount++ } + + // Calculate PR duration from CreatedAt to ClosedAt (or now if still open) + var duration float64 + if prs[i].ClosedAt != nil { + duration = prs[i].ClosedAt.Sub(prs[i].CreatedAt).Hours() + } else { + duration = time.Since(prs[i].CreatedAt).Hours() + } + sumPRDuration += duration + + // Track human/bot breakdown + if isAuthorBot(prs[i].AuthorType, prs[i].Author) { + botCount++ + botDuration += duration + } else { + humanCount++ + humanDuration += duration + } } + + slog.Info("PR duration calculation filtering (no samples path)", + "total_prs", len(prs), + "skipped_old_open_prs", skippedOpen, + "counted_prs", countedPRs, + "human_prs", humanCount, + "bot_prs", botCount, + "cutoff_date", createdCutoff.Format(time.RFC3339)) mergeRate := 0.0 - if len(prs) > 0 { - mergeRate = 100.0 * float64(mergedCount) / float64(len(prs)) + if countedPRs > 0 { + mergeRate = 100.0 * float64(mergedCount) / float64(countedPRs) + } + avgPRDuration := 0.0 + if countedPRs > 0 { + avgPRDuration = sumPRDuration / float64(countedPRs) + } + avgHumanDuration := 0.0 + if humanCount > 0 { + avgHumanDuration = humanDuration / float64(humanCount) + } + avgBotDuration := 0.0 + if botCount > 0 { + avgBotDuration = botDuration / float64(botCount) } return ExtrapolatedBreakdown{ - TotalPRs: totalPRs, - SampledPRs: 0, - SuccessfulSamples: 0, - UniqueRepositories: len(uniqueRepos), - MergedPRs: mergedCount, - UnmergedPRs: len(prs) - mergedCount, - MergeRate: mergeRate, - MergeRateNote: "Recently modified PRs successfully merged", + TotalPRs: totalPRs, + SampledPRs: 0, + SuccessfulSamples: 0, + UniqueRepositories: len(uniqueRepos), + MergedPRs: mergedCount, + UnmergedPRs: len(prs) - mergedCount, + MergeRate: mergeRate, + MergeRateNote: "Recently modified PRs successfully merged", + AvgPRDurationHours: avgPRDuration, + HumanPRs: humanCount, + BotPRs: botCount, + AvgHumanPRDurationHours: avgHumanDuration, + AvgBotPRDurationHours: avgBotDuration, } } @@ -450,19 +556,80 @@ func ExtrapolateFromSamples(breakdowns []Breakdown, totalPRs, totalAuthors, actu "waste_cost_per_author_per_week", wasteCostPerAuthorPerWeek) } - // Calculate average PR durations - avgPRDuration := sumPRDuration / samples + // Calculate average PR durations and human/bot breakdown from ALL PRs (not just samples) + // This provides more accurate merge velocity and bot/human metrics + // + // Include: + // - All closed PRs (modified in the period) + // - Open PRs created within 2x the period (to avoid ancient zombies skewing data) + // + // This gives a fair view of "PRs active in this period" without 5-year-old open PRs + // that got a bot comment last week showing up as "292 years average". + var totalPRDuration float64 + var allHumanPRCount, allBotPRCount int + var allHumanPRDuration, allBotPRDuration float64 + createdCutoff := time.Now().AddDate(0, 0, -daysInPeriod*2) // 2x the analysis period + + var skippedOpen, skippedClosed int + for i := range prs { + // For open PRs: only include if created within 2x the period + // For closed PRs: include all (they were modified/closed in the period) + if prs[i].ClosedAt == nil { + if prs[i].CreatedAt.Before(createdCutoff) { + skippedOpen++ + continue + } + } + + // Calculate PR duration from CreatedAt to ClosedAt (or now if still open) + var duration float64 + if prs[i].ClosedAt != nil { + duration = prs[i].ClosedAt.Sub(prs[i].CreatedAt).Hours() + } else { + duration = time.Since(prs[i].CreatedAt).Hours() + } + totalPRDuration += duration + + // Determine if this is a bot PR based on AuthorType and naming patterns + isBot := isAuthorBot(prs[i].AuthorType, prs[i].Author) + if isBot { + allBotPRCount++ + allBotPRDuration += duration + } else { + allHumanPRCount++ + allHumanPRDuration += duration + } + } + + slog.Info("PR duration calculation filtering", + "total_prs", len(prs), + "skipped_old_open_prs", skippedOpen, + "skipped_old_closed_prs", skippedClosed, + "counted_prs", allHumanPRCount+allBotPRCount, + "human_prs", allHumanPRCount, + "bot_prs", allBotPRCount, + "cutoff_date", createdCutoff.Format(time.RFC3339)) + + // Calculate totals from PRs we actually counted (within the period) + totalCountedPRs := allHumanPRCount + allBotPRCount + + avgPRDuration := 0.0 + if totalCountedPRs > 0 { + avgPRDuration = totalPRDuration / float64(totalCountedPRs) + } + + // Calculate average durations for human/bot PRs from PRs within the period var avgHumanPRDuration, avgBotPRDuration float64 - if humanPRCount > 0 { - avgHumanPRDuration = sumHumanPRDuration / float64(humanPRCount) + if allHumanPRCount > 0 { + avgHumanPRDuration = allHumanPRDuration / float64(allHumanPRCount) } - if botPRCount > 0 { - avgBotPRDuration = sumBotPRDuration / float64(botPRCount) + if allBotPRCount > 0 { + avgBotPRDuration = allBotPRDuration / float64(allBotPRCount) } - // Extrapolate bot vs human PR counts - extHumanPRs := int(float64(humanPRCount) / samples * multiplier) - extBotPRs := int(float64(botPRCount) / samples * multiplier) + // Use actual human/bot counts from PRs created within the period + extHumanPRs := allHumanPRCount + extBotPRs := allBotPRCount // Calculate R2R savings // Formula: baseline annual waste - (re-modeled waste with 40min PRs) - (R2R subscription cost) @@ -524,8 +691,8 @@ func ExtrapolateFromSamples(breakdowns []Breakdown, totalPRs, totalAuthors, actu // Calculate merge rate from all PRs (not just samples) mergedCount := 0 unmergedCount := 0 - for _, pr := range prs { - if pr.Merged { + for i := range prs { + if prs[i].Merged { mergedCount++ } else { unmergedCount++ diff --git a/pkg/cost/grading.go b/pkg/cost/grading.go index 31977d4..03991e5 100644 --- a/pkg/cost/grading.go +++ b/pkg/cost/grading.go @@ -6,23 +6,23 @@ package cost func EfficiencyGrade(efficiencyPct float64) (grade, message string) { switch { case efficiencyPct >= 97: - return "A+", "Impeccable" + return "A+", "Outstanding efficiency" case efficiencyPct >= 93: - return "A", "Excellent" + return "A", "Excellent efficiency" case efficiencyPct >= 90: - return "A-", "Nearly excellent" + return "A-", "Very good efficiency" case efficiencyPct >= 87: - return "B+", "Acceptable+" + return "B+", "Above average" case efficiencyPct >= 83: - return "B", "Acceptable" + return "B", "Good efficiency" case efficiencyPct >= 80: - return "B-", "Nearly acceptable" + return "B-", "Acceptable efficiency" case efficiencyPct >= 70: - return "C", "Average" + return "C", "Average efficiency" case efficiencyPct >= 60: - return "D", "Not good my friend." + return "D", "Below average" default: - return "F", "Failing" + return "F", "Needs improvement" } } @@ -31,17 +31,17 @@ func EfficiencyGrade(efficiencyPct float64) (grade, message string) { func MergeVelocityGrade(avgOpenHours float64) (grade, message string) { switch { case avgOpenHours <= 4: // 4 hours - return "A+", "World-class velocity" + return "A+", "Exceptional velocity" case avgOpenHours <= 24: // 1 day - return "A", "High-performing team" + return "A", "Excellent velocity" case avgOpenHours <= 84: // 3.5 days - return "B", "Room for improvement" + return "B", "Good velocity" case avgOpenHours <= 132: // 5.5 days - return "C", "Sluggish" + return "C", "Average velocity" case avgOpenHours <= 168: // 7 days (1 week) - return "D", "Slow" + return "D", "Below average" default: - return "F", "Failing" + return "F", "Needs improvement" } } @@ -50,14 +50,14 @@ func MergeVelocityGrade(avgOpenHours float64) (grade, message string) { func MergeRateGrade(mergeRatePct float64) (grade, message string) { switch { case mergeRatePct > 90: - return "A", "Excellent" + return "A", "Excellent merge rate" case mergeRatePct > 80: - return "B", "Good" + return "B", "Good merge rate" case mergeRatePct > 70: - return "C", "Acceptable" + return "C", "Average merge rate" case mergeRatePct > 60: - return "D", "Low" + return "D", "Below average" default: - return "F", "Poor" + return "F", "Needs improvement" } } diff --git a/pkg/github/fetch.go b/pkg/github/fetch.go index cde29da..a1e8aea 100644 --- a/pkg/github/fetch.go +++ b/pkg/github/fetch.go @@ -39,7 +39,7 @@ func PRDataFromPRX(prData *prx.PullRequestData) cost.PRData { // Fallback bot detection: if prx didn't mark it as a bot, check common bot names authorBot := pr.AuthorBot if !authorBot { - authorBot = IsBot(pr.Author) + authorBot = IsBot("", pr.Author) if authorBot { slog.Info("Bot detected by name pattern (prx missed it)", "author", pr.Author, @@ -204,9 +204,9 @@ func extractParticipantEvents(events []prx.Event) []cost.ParticipantEvent { } // Skip bots: check both prx's Bot field and common bot patterns - isBotEvent := event.Bot || event.Actor == "github" || IsBot(event.Actor) + isBotEvent := event.Bot || event.Actor == "github" || IsBot("", event.Actor) if isBotEvent { - if !event.Bot && IsBot(event.Actor) { + if !event.Bot && IsBot("", event.Actor) { slog.Debug("Bot event detected by name pattern (prx missed it)", "actor", event.Actor, "kind", event.Kind, diff --git a/pkg/github/query.go b/pkg/github/query.go index 43f3746..164efcc 100644 --- a/pkg/github/query.go +++ b/pkg/github/query.go @@ -14,19 +14,65 @@ import ( // PRSummary holds minimal information about a PR for sampling and fetching. type PRSummary struct { - UpdatedAt time.Time - Owner string - Repo string - Author string - State string // "OPEN", "CLOSED", "MERGED" - Number int - Merged bool // Whether the PR was merged + UpdatedAt time.Time + CreatedAt time.Time + ClosedAt *time.Time // Nil if still open + Owner string + Repo string + Author string + AuthorType string // "Bot", "User", or empty if unknown + State string // "OPEN", "CLOSED", "MERGED" + Number int + Merged bool // Whether the PR was merged } // ProgressCallback is called during PR fetching to report progress. // Parameters: queryName (e.g., "recent", "old", "early"), currentPage, totalPRsSoFar. type ProgressCallback func(queryName string, page int, prCount int) +// IsBot determines if a PR author is likely a bot based on AuthorType and common naming patterns. +func IsBot(authorType, authorLogin string) bool { + // Primary check: GitHub's __typename field + if authorType == "Bot" { + return true + } + + // Fallback: Common bot naming patterns + login := strings.ToLower(authorLogin) + + // Check for [bot] suffix + if strings.HasSuffix(login, "[bot]") { + return true + } + + // Check for word-boundary bot patterns to avoid false positives like "robot" + // Match bot with specific separators or as a suffix/prefix + if strings.HasPrefix(login, "bot-") || strings.HasPrefix(login, "bot_") { + return true + } + if strings.Contains(login, "-bot-") || strings.Contains(login, "_bot_") || + strings.Contains(login, "-bot") || strings.Contains(login, "_bot") { + return true + } + + // Specific bot names + botNames := []string{ + "dependabot", "renovate", "greenkeeper", + "github-actions", "codecov", "coveralls", + "mergify", "snyk", "imgbot", + "allcontributors", "stalebot", + "netlify", "vercel", + "codefactor-io", "deepsource-autofix", + "pre-commit-ci", "ready-to-review", + } + for _, name := range botNames { + if strings.Contains(login, name) { + return true + } + } + return false +} + // FetchPRsFromRepo queries GitHub GraphQL API for all PRs in a repository // modified since the specified date. // @@ -144,11 +190,14 @@ func fetchPRsFromRepoWithSort(ctx context.Context, params repoSortParams) ([]PRS } nodes { number + createdAt updatedAt + closedAt state merged author { login + __typename } } } @@ -217,10 +266,15 @@ func fetchPRsFromRepoWithSort(ctx context.Context, params repoSortParams) ([]PRS } Nodes []struct { Number int + CreatedAt time.Time UpdatedAt time.Time + ClosedAt *time.Time State string Merged bool - Author struct{ Login string } + Author struct { + Login string + TypeName string `json:"__typename"` + } } TotalCount int } @@ -267,13 +321,16 @@ func fetchPRsFromRepoWithSort(ctx context.Context, params repoSortParams) ([]PRS continue } allPRs = append(allPRs, PRSummary{ - Owner: owner, - Repo: repo, - Number: node.Number, - Author: node.Author.Login, - UpdatedAt: node.UpdatedAt, - State: node.State, - Merged: node.Merged, + Owner: owner, + Repo: repo, + Number: node.Number, + Author: node.Author.Login, + AuthorType: node.Author.TypeName, + CreatedAt: node.CreatedAt, + UpdatedAt: node.UpdatedAt, + ClosedAt: node.ClosedAt, + State: node.State, + Merged: node.Merged, }) // Check if we've hit the maxPRs limit @@ -307,10 +364,10 @@ func deduplicatePRs(prs []PRSummary) []PRSummary { seen := make(map[int]bool) var unique []PRSummary - for _, pr := range prs { - if !seen[pr.Number] { - seen[pr.Number] = true - unique = append(unique, pr) + for i := range prs { + if !seen[prs[i].Number] { + seen[prs[i].Number] = true + unique = append(unique, prs[i]) } } @@ -447,11 +504,14 @@ func fetchPRsFromOrgWithSort(ctx context.Context, params orgSortParams) ([]PRSum nodes { ... on PullRequest { number + createdAt updatedAt + closedAt state merged author { login + __typename } repository { owner { @@ -523,11 +583,16 @@ func fetchPRsFromOrgWithSort(ctx context.Context, params orgSortParams) ([]PRSum EndCursor string } Nodes []struct { - Number int - UpdatedAt time.Time - State string - Merged bool - Author struct{ Login string } + Number int + CreatedAt time.Time + UpdatedAt time.Time + ClosedAt *time.Time + State string + Merged bool + Author struct { + Login string + TypeName string `json:"__typename"` + } Repository struct { Owner struct{ Login string } Name string @@ -564,13 +629,16 @@ func fetchPRsFromOrgWithSort(ctx context.Context, params orgSortParams) ([]PRSum // Collect PRs from this page for _, node := range result.Data.Search.Nodes { allPRs = append(allPRs, PRSummary{ - Owner: node.Repository.Owner.Login, - Repo: node.Repository.Name, - Number: node.Number, - Author: node.Author.Login, - UpdatedAt: node.UpdatedAt, - State: node.State, - Merged: node.Merged, + Owner: node.Repository.Owner.Login, + Repo: node.Repository.Name, + Number: node.Number, + Author: node.Author.Login, + AuthorType: node.Author.TypeName, + CreatedAt: node.CreatedAt, + UpdatedAt: node.UpdatedAt, + ClosedAt: node.ClosedAt, + State: node.State, + Merged: node.Merged, }) // Check if we've hit the maxPRs limit @@ -625,49 +693,11 @@ func deduplicatePRsByOwnerRepoNumber(prs []PRSummary) []PRSummary { return unique } -// IsBot returns true if the author name indicates a bot account. -func IsBot(author string) bool { - lowerAuthor := strings.ToLower(author) - - // Check for [bot] suffix - if strings.HasSuffix(lowerAuthor, "[bot]") { - return true - } - - // Common bot account name patterns - botPatterns := []string{ - "dependabot", - "renovate", - "github-actions", - "codecov", - "greenkeeper", - "snyk", - "allcontributors", - "imgbot", - "stalebot", - "mergify", - "netlify", - "vercel", - "codefactor-io", - "deepsource-autofix", - "pre-commit-ci", - "ready-to-review", - } - - for _, pattern := range botPatterns { - if strings.Contains(lowerAuthor, pattern) { - return true - } - } - - return false -} - // CountBotPRs counts how many PRs in the list are authored by bots. func CountBotPRs(prs []PRSummary) int { count := 0 for _, pr := range prs { - if IsBot(pr.Author) { + if IsBot(pr.AuthorType, pr.Author) { count++ } } @@ -781,7 +811,7 @@ func SamplePRs(prs []PRSummary, sampleSize int) []PRSummary { func CountUniqueAuthors(prs []PRSummary) int { uniqueAuthors := make(map[string]bool) for _, pr := range prs { - if !IsBot(pr.Author) { + if !IsBot(pr.AuthorType, pr.Author) { uniqueAuthors[pr.Author] = true } } diff --git a/pkg/github/query_test.go b/pkg/github/query_test.go index 0fdf041..31f5a6c 100644 --- a/pkg/github/query_test.go +++ b/pkg/github/query_test.go @@ -40,7 +40,7 @@ func TestIsBot(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := IsBot(tt.prAuthor) + got := IsBot("", tt.prAuthor) if got != tt.want { t.Errorf("IsBot(%q) = %v, want %v", tt.prAuthor, got, tt.want) } @@ -215,7 +215,7 @@ func TestIsBotEdgeCases(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := IsBot(tt.author) + got := IsBot("", tt.author) if got != tt.want { t.Errorf("IsBot(%q) = %v, want %v", tt.author, got, tt.want) }