Skip to content

Commit 2f532ae

Browse files
Thomas StrombergThomas Stromberg
authored andcommitted
Improve how we calculate PR tracking costs
1 parent 6d94ed2 commit 2f532ae

File tree

5 files changed

+90
-221
lines changed

5 files changed

+90
-221
lines changed

README.md

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,39 @@ Calibrated on Windows Vista development data showing 4% weekly code churn. A PR
186186

187187
**Reference**: Nagappan, N., et al. (2008). Organizational Structure and Software Quality. *ICSE '08*.
188188

189+
### 6. PR Tracking Overhead: Empirical Organizational Studies
190+
191+
Models the cost of managing and triaging open PR backlogs. This captures planning and coordination overhead, **excluding actual code review time** (counted separately in future review costs). Based on research showing developers spend significant time on PR discovery, triage, and project management activities beyond reviewing code.
192+
193+
**Formula**:
194+
```
195+
tracking_hours_per_day = openPRs × log₂(activeContributors + 1) × 0.005
196+
```
197+
198+
**Components**:
199+
- **Linear with PR count**: More open PRs require more organizational scanning/triage overhead
200+
- **Logarithmic with team size**: Larger teams develop specialization, tooling, and distributed ownership that reduce per-capita burden
201+
- **Constant (0.005)**: Calibrated to ~20 seconds per PR per week of planning/coordination time, excluding actual review
202+
203+
**Validation Examples**:
204+
- 20 PRs, 5 contributors: ~15 min/day total (3 min/person/day)
205+
- 200 PRs, 50 contributors: ~6 hours/day total (7 min/person/day)
206+
- 1000 PRs, 100 contributors: ~33 hours/day total (20 min/person/day)
207+
208+
**Activities Captured** (non-review overhead only):
209+
- Sprint/milestone planning discussions about open PRs
210+
- Daily standup mentions and status coordination
211+
- Searching for duplicate work before starting new PRs
212+
- Identifying related PRs that may conflict or depend on each other
213+
- Quarterly/monthly mass triage of stale PRs
214+
- Project/product management tracking of feature delivery
215+
- Estimating and re-prioritizing work based on open PR backlog
216+
217+
**References**:
218+
- Bacchelli, A., & Bird, C. (2013). Expectations, Outcomes, and Challenges of Modern Code Review. *ICSE '13*.
219+
- Rigby, P. C., & Bird, C. (2013). Convergent Contemporary Software Peer Review Practices. *FSE '13*.
220+
- Uwano, H., et al. (2006). Analyzing Individual Performance of Source Code Review Using Reviewers' Eye Movement. *ETRA '06*.
221+
189222
## Model Limitations
190223

191224
**Individual Estimates**: High variance (CV > 1.0) due to developer and task heterogeneity.

pkg/cost/extrapolate.go

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
package cost
22

3-
import "log/slog"
3+
import (
4+
"log/slog"
5+
"math"
6+
)
47

58
// PRMergeStatus represents merge status information for a PR (for calculating merge rate).
69
type PRMergeStatus struct {
@@ -322,11 +325,21 @@ func ExtrapolateFromSamples(breakdowns []Breakdown, totalPRs, totalAuthors, actu
322325
extCodeChurnCost := sumCodeChurnCost / samples * multiplier
323326
extAutomatedUpdatesCost := sumAutomatedUpdatesCost / samples * multiplier
324327
// Calculate Open PR Tracking cost based on actual open PRs (not from samples)
325-
// Formula: actualOpenPRs × uniqueUsers × (tracking_minutes_per_day_per_person / 60) × daysInPeriod × hourlyRate
326-
// This scales with team size: larger teams spend more total time tracking open PRs
328+
// Formula: openPRs × log2(activeContributors + 1) × 0.005 × daysInPeriod × hourlyRate
329+
// This represents planning/coordination overhead ONLY (excludes actual code review)
330+
// - Linear with PR count: more PRs = more planning/triage overhead
331+
// - Logarithmic with team size: larger teams have specialization/better processes
332+
// - Constant 0.005: calibrated to ~20 seconds per PR per week of planning/coordination time
333+
// (excludes actual review time, which is counted separately in FutureReviewCost)
327334
hourlyRate := cfg.AnnualSalary * cfg.BenefitsMultiplier / cfg.HoursPerYear
328335
uniqueUserCount := len(uniqueNonBotUsers)
329-
extPRTrackingHours := float64(actualOpenPRs) * float64(uniqueUserCount) * (cfg.PRTrackingMinutesPerDay / 60.0) * float64(daysInPeriod)
336+
var extPRTrackingHours float64
337+
if uniqueUserCount > 0 && actualOpenPRs > 0 {
338+
// log2(n+1) to handle log(0) and provide smooth scaling
339+
teamScaleFactor := math.Log2(float64(uniqueUserCount) + 1)
340+
// 0.005 hours = 0.30 minutes per PR per day (organizational average for planning/coordination only)
341+
extPRTrackingHours = float64(actualOpenPRs) * teamScaleFactor * 0.005 * float64(daysInPeriod)
342+
}
330343
extPRTrackingCost := extPRTrackingHours * hourlyRate
331344
extFutureReviewCost := sumFutureReviewCost / samples * multiplier
332345
extFutureMergeCost := sumFutureMergeCost / samples * multiplier

pkg/github/fetch.go

Lines changed: 19 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ func PRDataFromPRX(prData *prx.PullRequestData) cost.PRData {
3939
// Fallback bot detection: if prx didn't mark it as a bot, check common bot names
4040
authorBot := pr.AuthorBot
4141
if !authorBot {
42-
authorBot = isCommonBot(pr.Author)
42+
authorBot = IsBot(pr.Author)
4343
if authorBot {
4444
slog.Info("Bot detected by name pattern (prx missed it)",
4545
"author", pr.Author,
@@ -71,45 +71,6 @@ func PRDataFromPRX(prData *prx.PullRequestData) cost.PRData {
7171
return data
7272
}
7373

74-
// isCommonBot checks if a username matches common bot patterns.
75-
// This is a fallback in case prx doesn't correctly mark the AuthorBot field.
76-
func isCommonBot(username string) bool {
77-
lowerName := strings.ToLower(username)
78-
79-
// Common bot account names
80-
botPatterns := []string{
81-
"dependabot",
82-
"renovate",
83-
"github-actions",
84-
"codecov",
85-
"greenkeeper",
86-
"snyk-bot",
87-
"allcontributors",
88-
"imgbot",
89-
"stalebot",
90-
"mergify",
91-
"netlify",
92-
"vercel",
93-
"codefactor-io",
94-
"deepsource-autofix",
95-
"pre-commit-ci",
96-
"ready-to-review",
97-
}
98-
99-
for _, pattern := range botPatterns {
100-
if strings.Contains(lowerName, pattern) {
101-
return true
102-
}
103-
}
104-
105-
// Check for [bot] suffix
106-
if strings.HasSuffix(lowerName, "[bot]") {
107-
return true
108-
}
109-
110-
return false
111-
}
112-
11374
// FetchPRData retrieves pull request information from GitHub and converts it
11475
// to the format needed for cost calculation.
11576
//
@@ -137,7 +98,7 @@ func FetchPRData(ctx context.Context, prURL string, token string, updatedAt time
13798
slog.Debug("Parsed PR URL", "owner", owner, "repo", repo, "number", number)
13899

139100
// Get cache directory from user's cache directory
140-
cacheDir, err := getCacheDir()
101+
userCacheDir, err := os.UserCacheDir()
141102
if err != nil {
142103
slog.Warn("Failed to get cache directory, using non-cached client", "error", err)
143104
// Fallback to non-cached client
@@ -151,6 +112,20 @@ func FetchPRData(ctx context.Context, prURL string, token string, updatedAt time
151112
return result, nil
152113
}
153114

115+
cacheDir := filepath.Join(userCacheDir, "prcost")
116+
if err := os.MkdirAll(cacheDir, 0o700); err != nil {
117+
slog.Warn("Failed to create cache directory, using non-cached client", "error", err)
118+
// Fallback to non-cached client
119+
client := prx.NewClient(token)
120+
prData, err := client.PullRequest(ctx, owner, repo, number)
121+
if err != nil {
122+
slog.Error("GitHub API call failed", "owner", owner, "repo", repo, "pr", number, "error", err)
123+
return cost.PRData{}, fmt.Errorf("failed to fetch PR data: %w", err)
124+
}
125+
result := PRDataFromPRX(prData)
126+
return result, nil
127+
}
128+
154129
// Create prx cache client for disk-based caching
155130
client, err := prx.NewCacheClient(token, cacheDir)
156131
if err != nil {
@@ -229,9 +204,9 @@ func extractParticipantEvents(events []prx.Event) []cost.ParticipantEvent {
229204
}
230205

231206
// Skip bots: check both prx's Bot field and common bot patterns
232-
isBot := event.Bot || event.Actor == "github" || isCommonBot(event.Actor)
233-
if isBot {
234-
if !event.Bot && isCommonBot(event.Actor) {
207+
isBotEvent := event.Bot || event.Actor == "github" || IsBot(event.Actor)
208+
if isBotEvent {
209+
if !event.Bot && IsBot(event.Actor) {
235210
slog.Debug("Bot event detected by name pattern (prx missed it)",
236211
"actor", event.Actor,
237212
"kind", event.Kind,
@@ -250,21 +225,3 @@ func extractParticipantEvents(events []prx.Event) []cost.ParticipantEvent {
250225

251226
return participantEvents
252227
}
253-
254-
// getCacheDir returns the cache directory for prx client.
255-
// Uses OS-specific user cache directory with prcost subdirectory.
256-
func getCacheDir() (string, error) {
257-
userCacheDir, err := os.UserCacheDir()
258-
if err != nil {
259-
return "", fmt.Errorf("get user cache dir: %w", err)
260-
}
261-
262-
cacheDir := filepath.Join(userCacheDir, "prcost")
263-
264-
// Ensure cache directory exists
265-
if err := os.MkdirAll(cacheDir, 0o700); err != nil {
266-
return "", fmt.Errorf("create cache dir: %w", err)
267-
}
268-
269-
return cacheDir, nil
270-
}

pkg/github/fetch_test.go

Lines changed: 0 additions & 142 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ package github
33
import (
44
"encoding/json"
55
"os"
6-
"strings"
76
"testing"
87
"time"
98

@@ -277,77 +276,6 @@ func TestPRDataFromPRXWithRealData(t *testing.T) {
277276
t.Logf("PR 1891: %d human events out of %d total events", len(costData.Events), len(prxData.Events))
278277
}
279278

280-
func TestGetCacheDir(t *testing.T) {
281-
dir, err := getCacheDir()
282-
if err != nil {
283-
t.Fatalf("getCacheDir() error = %v", err)
284-
}
285-
if dir == "" {
286-
t.Error("getCacheDir() returned empty string")
287-
}
288-
289-
// Should contain prcost in the path
290-
if !strings.Contains(dir, "prcost") {
291-
t.Errorf("getCacheDir() = %q, expected to contain 'prcost'", dir)
292-
}
293-
}
294-
295-
func TestIsCommonBot(t *testing.T) {
296-
tests := []struct {
297-
name string
298-
username string
299-
want bool
300-
}{
301-
{"dependabot", "dependabot[bot]", true},
302-
{"renovate", "renovate-bot", true},
303-
{"github-actions", "github-actions", true},
304-
{"codecov", "codecov-commenter", true},
305-
{"greenkeeper", "greenkeeper[bot]", true},
306-
{"snyk", "snyk-bot", true},
307-
{"allcontributors", "allcontributors[bot]", true},
308-
{"imgbot", "ImgBot", true}, // Case insensitive
309-
{"stalebot", "stalebot", true},
310-
{"mergify", "mergify[bot]", true},
311-
{"netlify", "netlify[bot]", true},
312-
{"vercel", "vercel[bot]", true},
313-
{"codefactor", "codefactor-io", true},
314-
{"deepsource", "deepsource-autofix[bot]", true},
315-
{"pre-commit", "pre-commit-ci[bot]", true},
316-
{"regular user", "john-doe", false},
317-
{"bot in middle", "robot-person", false},
318-
}
319-
320-
for _, tt := range tests {
321-
t.Run(tt.name, func(t *testing.T) {
322-
got := isCommonBot(tt.username)
323-
if got != tt.want {
324-
t.Errorf("isCommonBot(%q) = %v, want %v", tt.username, got, tt.want)
325-
}
326-
})
327-
}
328-
}
329-
330-
func TestIsCommonBotCaseSensitivity(t *testing.T) {
331-
tests := []struct {
332-
name string
333-
username string
334-
want bool
335-
}{
336-
{"uppercase BOT", "DEPENDABOT[bot]", true},
337-
{"mixed case", "DePeNdAbOt[bot]", true},
338-
{"lowercase all", "dependabot[bot]", true},
339-
}
340-
341-
for _, tt := range tests {
342-
t.Run(tt.name, func(t *testing.T) {
343-
got := isCommonBot(tt.username)
344-
if got != tt.want {
345-
t.Errorf("isCommonBot(%q) = %v, want %v", tt.username, got, tt.want)
346-
}
347-
})
348-
}
349-
}
350-
351279
func TestExtractParticipantEventsEdgeCases(t *testing.T) {
352280
now := time.Now()
353281

@@ -438,73 +366,3 @@ func TestPRDataFromPRXWithRealSprinklerData(t *testing.T) {
438366

439367
t.Logf("Sprinkler PR 37: %d human events out of %d total events", len(costData.Events), len(prxData.Events))
440368
}
441-
442-
func TestGetCacheDirCreatesDirectory(t *testing.T) {
443-
// This test actually calls getCacheDir to improve coverage
444-
dir, err := getCacheDir()
445-
if err != nil {
446-
t.Fatalf("getCacheDir() error = %v", err)
447-
}
448-
if dir == "" {
449-
t.Error("getCacheDir() returned empty string")
450-
}
451-
452-
// Verify directory was created
453-
info, err := os.Stat(dir)
454-
if err != nil {
455-
t.Errorf("Cache directory was not created: %v", err)
456-
}
457-
if !info.IsDir() {
458-
t.Error("Cache path is not a directory")
459-
}
460-
}
461-
462-
func TestIsCommonBotVariations(t *testing.T) {
463-
tests := []struct {
464-
username string
465-
want bool
466-
}{
467-
{"dependabot", true},
468-
{"dependabot[bot]", true},
469-
{"renovate", true},
470-
{"renovate-bot", true},
471-
{"github-actions", true},
472-
{"github-actions[bot]", true},
473-
{"codecov", true},
474-
{"codecov-commenter", true},
475-
{"greenkeeper", true},
476-
{"greenkeeper[bot]", true},
477-
{"snyk-bot", true},
478-
{"allcontributors", true},
479-
{"allcontributors[bot]", true},
480-
{"imgbot", true},
481-
{"ImgBot", true}, // case insensitive
482-
{"stalebot", true},
483-
{"mergify", true},
484-
{"mergify[bot]", true},
485-
{"netlify", true},
486-
{"netlify[bot]", true},
487-
{"vercel", true},
488-
{"vercel[bot]", true},
489-
{"codefactor-io", true},
490-
{"deepsource-autofix", true},
491-
{"deepsource-autofix[bot]", true},
492-
{"pre-commit-ci", true},
493-
{"pre-commit-ci[bot]", true},
494-
{"ready-to-review", true},
495-
{"ready-to-review[bot]", true},
496-
{"regular-user", false},
497-
{"robot", false},
498-
{"botman", false},
499-
{"john-doe", false},
500-
}
501-
502-
for _, tt := range tests {
503-
t.Run(tt.username, func(t *testing.T) {
504-
got := isCommonBot(tt.username)
505-
if got != tt.want {
506-
t.Errorf("isCommonBot(%q) = %v, want %v", tt.username, got, tt.want)
507-
}
508-
})
509-
}
510-
}

0 commit comments

Comments
 (0)