Skip to content

Commit f108a37

Browse files
committed
Add per-owner PR caps, improve retry reliability
1 parent 9f24f4b commit f108a37

File tree

8 files changed

+178
-44
lines changed

8 files changed

+178
-44
lines changed

better-reviewers

17.4 KB
Binary file not shown.

bot_detection_test.go

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ func TestIsValidReviewer(t *testing.T) {
8989
// Create a mock GitHub client that returns specific user types
9090
mockClient := &GitHubClient{
9191
httpClient: &http.Client{},
92+
cache: &cache{entries: make(map[string]cacheEntry)},
9293
userCache: &userCache{users: make(map[string]*userInfo)},
9394
token: "test-token",
9495
}
@@ -101,11 +102,19 @@ func TestIsValidReviewer(t *testing.T) {
101102
mockClient.userCache.users["deploy-service"] = &userInfo{login: "deploy-service", userType: userTypeBot}
102103
mockClient.userCache.users["johndoe"] = &userInfo{login: "johndoe", userType: userTypeUser}
103104
mockClient.userCache.users["sergiodj"] = &userInfo{login: "sergiodj", userType: userTypeUser}
105+
mockClient.userCache.users["busyuser"] = &userInfo{login: "busyuser", userType: userTypeUser}
104106

105107
rf := &ReviewerFinder{
106-
client: mockClient,
108+
client: mockClient,
109+
maxPRs: 9,
110+
prCountCache: prCountCacheTTL,
107111
}
108112

113+
// Mock the PR count in cache to avoid API calls
114+
mockClient.cache.setWithTTL(makeCacheKey("pr-count", "test", "johndoe"), 5, prCountCacheTTL)
115+
mockClient.cache.setWithTTL(makeCacheKey("pr-count", "test", "sergiodj"), 3, prCountCacheTTL)
116+
mockClient.cache.setWithTTL(makeCacheKey("pr-count", "test", "busyuser"), 10, prCountCacheTTL) // Over limit
117+
109118
ctx := context.Background()
110119
pr := &PullRequest{Owner: "test", Repository: "repo"}
111120

@@ -120,6 +129,7 @@ func TestIsValidReviewer(t *testing.T) {
120129
{"Bot with API confirmation", "dependabot[bot]", false},
121130
{"Pattern-based bot", "github-actions", false},
122131
{"Service account", "deploy-service", false},
132+
{"User with too many PRs", "busyuser", false},
123133

124134
// Should be valid
125135
{"Regular user", "johndoe", true},

constants.go

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,15 @@ const (
2727
prCacheTTL = 20 * 24 * time.Hour // Cache PRs for 20 days (use updated_at to invalidate)
2828
searchCacheTTL = 15 * time.Minute // Cache search results for 15 minutes
2929

30-
// Specific cache TTLs for different data types
30+
// Specific cache TTLs for different data types.
3131
userTypeCacheTTL = 30 * 24 * time.Hour // User type never changes
3232
repoContributorsCacheTTL = 4 * time.Hour // 4 hours - catch people returning from vacation
3333
directoryOwnersCacheTTL = 3 * 24 * time.Hour // Directory ownership changes slowly
3434
recentPRsCacheTTL = 1 * time.Hour // Recent PRs for active repos
3535
fileHistoryCacheTTL = 3 * 24 * time.Hour // File history changes slowly
36+
prCountCacheTTL = 6 * time.Hour // PR count for workload balancing (default).
37+
prCountFailureCacheTTL = 10 * time.Minute // Cache failures to avoid repeated API calls.
38+
prStaleDaysThreshold = 90 // PRs older than this are considered stale.
3639

3740
// API and pagination limits.
3841
perPageLimit = 100 // GitHub API per_page limit
@@ -56,17 +59,20 @@ const (
5659
topCandidatesToLog = 5 // Number of top candidates to log
5760
maxContextScore = 100 // Maximum context score for candidates
5861

59-
// Scoring weights (must sum to 100)
62+
// Scoring weights (must sum to 100).
6063
fileOverlapWeight = 40.0 // Weight for file overlap score
6164
recencyWeight = 35.0 // Weight for recency score
6265
expertiseWeight = 25.0 // Weight for domain expertise score
6366

64-
// File significance multipliers
67+
// File significance multipliers.
6568
prodCodeMultiplier = 1.5 // Production code vs test code
6669
criticalFileMultiplier = 1.3 // Main.go, handlers, etc.
6770
refactoringMultiplier = 1.2 // More deletions than additions
6871

69-
// Retry parameters handled by external library
72+
// Retry parameters handled by external library.
73+
maxRetryAttempts = 25 // Maximum retry attempts for API calls.
74+
initialRetryDelay = 5 * time.Second // Initial delay for retry attempts.
75+
maxRetryDelay = 20 * time.Minute // Maximum delay for retry attempts.
7076

7177
// PR URL parsing.
7278
minURLParts = 4 // Minimum parts in PR URL

github_client.go

Lines changed: 115 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"io"
1010
"log"
1111
"net/http"
12+
"net/url"
1213
"os/exec"
1314
"strings"
1415
"time"
@@ -94,11 +95,11 @@ func newGitHubClient(ctx context.Context) (*GitHubClient, error) {
9495
}
9596

9697
// makeRequest makes an HTTP request to the GitHub API with retry logic.
97-
func (c *GitHubClient) makeRequest(ctx context.Context, method, url string, body any) (*http.Response, error) {
98-
log.Printf("[HTTP] %s %s", method, url)
98+
func (c *GitHubClient) makeRequest(ctx context.Context, method, apiURL string, body any) (*http.Response, error) {
99+
log.Printf("[HTTP] %s %s", method, apiURL)
99100

100101
var resp *http.Response
101-
err := retryWithBackoff(ctx, fmt.Sprintf("%s %s", method, url), func() error {
102+
err := retryWithBackoff(ctx, fmt.Sprintf("%s %s", method, apiURL), func() error {
102103
var bodyReader io.Reader
103104
if body != nil {
104105
bodyBytes, err := json.Marshal(body)
@@ -108,7 +109,7 @@ func (c *GitHubClient) makeRequest(ctx context.Context, method, url string, body
108109
bodyReader = bytes.NewReader(bodyBytes)
109110
}
110111

111-
req, err := http.NewRequestWithContext(ctx, method, url, bodyReader)
112+
req, err := http.NewRequestWithContext(ctx, method, apiURL, bodyReader)
112113
if err != nil {
113114
return fmt.Errorf("failed to create request: %w", err)
114115
}
@@ -149,7 +150,7 @@ func (c *GitHubClient) makeRequest(ctx context.Context, method, url string, body
149150
return nil, err
150151
}
151152

152-
log.Printf("[HTTP] %s %s - Status: %d", method, url, resp.StatusCode)
153+
log.Printf("[HTTP] %s %s - Status: %d", method, apiURL, resp.StatusCode)
153154
return resp, nil
154155
}
155156

@@ -168,8 +169,8 @@ func (c *GitHubClient) pullRequestWithUpdatedAt(
168169
}
169170

170171
log.Printf("[API] Fetching PR details for %s/%s#%d to get title, state, author, assignees, reviewers, and metadata", owner, repo, prNumber)
171-
url := fmt.Sprintf("https://api.github.com/repos/%s/%s/pulls/%d", owner, repo, prNumber)
172-
resp, err := c.makeRequest(ctx, "GET", url, nil)
172+
apiURL := fmt.Sprintf("https://api.github.com/repos/%s/%s/pulls/%d", owner, repo, prNumber)
173+
resp, err := c.makeRequest(ctx, "GET", apiURL, nil)
173174
if err != nil {
174175
return nil, err
175176
}
@@ -282,11 +283,11 @@ func (c *GitHubClient) openPullRequests(ctx context.Context, owner, repo string)
282283

283284
for {
284285
log.Printf("[API] Requesting page %d of open PRs for %s/%s (pagination)", page, owner, repo)
285-
url := fmt.Sprintf("https://api.github.com/repos/%s/%s/pulls?state=open&per_page=100&page=%d", owner, repo, page)
286+
apiURL := fmt.Sprintf("https://api.github.com/repos/%s/%s/pulls?state=open&per_page=100&page=%d", owner, repo, page)
286287

287288
// Extract API call to avoid defer in loop
288289
prs, shouldBreak, err := func() ([]json.RawMessage, bool, error) {
289-
resp, err := c.makeRequest(ctx, "GET", url, nil)
290+
resp, err := c.makeRequest(ctx, "GET", apiURL, nil)
290291
if err != nil {
291292
return nil, false, err
292293
}
@@ -344,8 +345,8 @@ func (c *GitHubClient) openPullRequests(ctx context.Context, owner, repo string)
344345
// changedFiles fetches the list of changed files in a PR.
345346
func (c *GitHubClient) changedFiles(ctx context.Context, owner, repo string, prNumber int) ([]ChangedFile, error) {
346347
log.Printf("[API] Fetching changed files for PR %s/%s#%d to determine modified files for reviewer expertise matching", owner, repo, prNumber)
347-
url := fmt.Sprintf("https://api.github.com/repos/%s/%s/pulls/%d/files?per_page=100", owner, repo, prNumber)
348-
resp, err := c.makeRequest(ctx, "GET", url, nil)
348+
apiURL := fmt.Sprintf("https://api.github.com/repos/%s/%s/pulls/%d/files?per_page=100", owner, repo, prNumber)
349+
resp, err := c.makeRequest(ctx, "GET", apiURL, nil)
349350
if err != nil {
350351
return nil, err
351352
}
@@ -382,8 +383,8 @@ func (c *GitHubClient) changedFiles(ctx context.Context, owner, repo string, prN
382383
// lastCommitTime returns the timestamp of the last commit.
383384
func (c *GitHubClient) lastCommitTime(ctx context.Context, owner, repo, sha string) (time.Time, error) {
384385
log.Printf("[API] Fetching commit details for %s/%s@%s to get last commit timestamp for PR staleness analysis", owner, repo, sha)
385-
url := fmt.Sprintf("https://api.github.com/repos/%s/%s/commits/%s", owner, repo, sha)
386-
resp, err := c.makeRequest(ctx, "GET", url, nil)
386+
apiURL := fmt.Sprintf("https://api.github.com/repos/%s/%s/commits/%s", owner, repo, sha)
387+
resp, err := c.makeRequest(ctx, "GET", apiURL, nil)
387388
if err != nil {
388389
return time.Time{}, err
389390
}
@@ -411,8 +412,8 @@ func (c *GitHubClient) lastCommitTime(ctx context.Context, owner, repo, sha stri
411412
// lastReviewTime returns the timestamp of the last review.
412413
func (c *GitHubClient) lastReviewTime(ctx context.Context, owner, repo string, prNumber int) (time.Time, error) {
413414
log.Printf("[API] Fetching review history for PR %s/%s#%d to determine last review timestamp for staleness detection", owner, repo, prNumber)
414-
url := fmt.Sprintf("https://api.github.com/repos/%s/%s/pulls/%d/reviews", owner, repo, prNumber)
415-
resp, err := c.makeRequest(ctx, "GET", url, nil)
415+
apiURL := fmt.Sprintf("https://api.github.com/repos/%s/%s/pulls/%d/reviews", owner, repo, prNumber)
416+
resp, err := c.makeRequest(ctx, "GET", apiURL, nil)
416417
if err != nil {
417418
return time.Time{}, err
418419
}
@@ -564,3 +565,102 @@ func (*ReviewerFinder) hasWriteAccess(ctx context.Context, _ string, _ string, _
564565
// In production, this would check collaborator status
565566
return true
566567
}
568+
569+
// openPRCount returns the number of open PRs assigned to or requested for review by a user in an organization.
570+
func (c *GitHubClient) openPRCount(ctx context.Context, org, user string, cacheTTL time.Duration) (int, error) {
571+
// Check cache first for successful results
572+
cacheKey := makeCacheKey("pr-count", org, user)
573+
if cached, found := c.cache.value(cacheKey); found {
574+
if count, ok := cached.(int); ok {
575+
log.Printf(" [CACHE] User %s has %d non-stale open PRs in org %s (cached)", user, count, org)
576+
return count, nil
577+
}
578+
}
579+
580+
// Check if we recently failed to get PR count for this user to avoid repeated failures
581+
failureKey := makeCacheKey("pr-count-failure", org, user)
582+
if _, found := c.cache.value(failureKey); found {
583+
return 0, errors.New("recently failed to get PR count (cached failure)")
584+
}
585+
586+
// Validate that the organization and user are not empty
587+
if org == "" || user == "" {
588+
return 0, fmt.Errorf("invalid organization (%s) or user (%s)", org, user)
589+
}
590+
591+
log.Printf(" [API] Fetching open PR count for user %s in org %s", user, org)
592+
593+
// Create a context with shorter timeout for PR count queries to avoid hanging
594+
timeoutCtx, cancel := context.WithTimeout(ctx, 30*time.Second)
595+
defer cancel()
596+
597+
// Calculate the cutoff date for non-stale PRs (90 days ago)
598+
cutoffDate := time.Now().AddDate(0, 0, -prStaleDaysThreshold).Format("2006-01-02")
599+
600+
// Use two separate queries as they are simpler and more reliable
601+
// Only count PRs updated within the last 90 days to exclude stale PRs
602+
// First, search for PRs where user is assigned
603+
assignedQuery := fmt.Sprintf("is:pr is:open org:%s assignee:%s updated:>=%s", org, user, cutoffDate)
604+
log.Printf(" [DEBUG] Searching assigned PRs for %s (updated since %s)", user, cutoffDate)
605+
assignedCount, err := c.searchPRCount(timeoutCtx, assignedQuery)
606+
if err != nil {
607+
// Cache the failure to avoid repeated attempts
608+
c.cache.setWithTTL(failureKey, true, prCountFailureCacheTTL)
609+
return 0, fmt.Errorf("failed to get assigned PR count: %w", err)
610+
}
611+
log.Printf(" [DEBUG] Found %d non-stale assigned PRs for %s", assignedCount, user)
612+
613+
// Second, search for PRs where user is requested as reviewer
614+
reviewQuery := fmt.Sprintf("is:pr is:open org:%s review-requested:%s updated:>=%s", org, user, cutoffDate)
615+
log.Printf(" [DEBUG] Searching review-requested PRs for %s (updated since %s)", user, cutoffDate)
616+
reviewCount, err := c.searchPRCount(timeoutCtx, reviewQuery)
617+
if err != nil {
618+
// Cache the failure to avoid repeated attempts
619+
c.cache.setWithTTL(failureKey, true, prCountFailureCacheTTL)
620+
return 0, fmt.Errorf("failed to get review-requested PR count: %w", err)
621+
}
622+
log.Printf(" [DEBUG] Found %d non-stale review-requested PRs for %s", reviewCount, user)
623+
624+
total := assignedCount + reviewCount
625+
626+
log.Printf(" 📊 User %s has %d non-stale open PRs in org %s (%d assigned, %d for review)", user, total, org, assignedCount, reviewCount)
627+
628+
// Cache the successful result
629+
c.cache.setWithTTL(cacheKey, total, cacheTTL)
630+
631+
return total, nil
632+
}
633+
634+
// searchPRCount searches for PRs matching a query and returns the count.
635+
func (c *GitHubClient) searchPRCount(ctx context.Context, query string) (int, error) {
636+
encodedQuery := url.QueryEscape(query)
637+
apiURL := fmt.Sprintf("https://api.github.com/search/issues?q=%s&per_page=1", encodedQuery)
638+
log.Printf(" [DEBUG] Search query: %s", query)
639+
log.Printf(" [DEBUG] Full URL: %s", apiURL)
640+
resp, err := c.makeRequest(ctx, "GET", apiURL, nil)
641+
if err != nil {
642+
return 0, err
643+
}
644+
defer func() {
645+
if err := resp.Body.Close(); err != nil {
646+
log.Printf("[WARN] Failed to close response body: %v", err)
647+
}
648+
}()
649+
650+
if resp.StatusCode == http.StatusForbidden {
651+
return 0, fmt.Errorf("search API rate limit exceeded (status %d)", resp.StatusCode)
652+
}
653+
if resp.StatusCode != http.StatusOK {
654+
return 0, fmt.Errorf("search failed (status %d)", resp.StatusCode)
655+
}
656+
657+
var searchResult struct {
658+
TotalCount int `json:"total_count"`
659+
}
660+
661+
if err := json.NewDecoder(resp.Body).Decode(&searchResult); err != nil {
662+
return 0, fmt.Errorf("failed to decode search result: %w", err)
663+
}
664+
665+
return searchResult.TotalCount, nil
666+
}

graphql.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ func (c *GitHubClient) makeGraphQLRequest(ctx context.Context, query string, var
1818
// Extract query type for better debugging
1919
queryType := extractGraphQLQueryType(query)
2020
querySize := len(query)
21-
21+
2222
log.Printf("[API] Executing GraphQL query: %s (size: %d chars)", queryType, querySize)
2323
if len(variables) > 0 {
2424
log.Printf("[GRAPHQL] Variables: %+v", variables)
@@ -604,7 +604,7 @@ func extractReviewers(pr map[string]any) []string {
604604
func extractGraphQLQueryType(query string) string {
605605
query = strings.TrimSpace(query)
606606
lines := strings.Split(query, "\n")
607-
607+
608608
// Look for the main query pattern
609609
for _, line := range lines {
610610
line = strings.TrimSpace(line)
@@ -638,7 +638,7 @@ func extractGraphQLQueryType(query string) string {
638638
}
639639
}
640640
}
641-
641+
642642
// Fallback to detecting by content
643643
if strings.Contains(query, "organization") && strings.Contains(query, "repositories") {
644644
return "org-batch-prs"
@@ -649,6 +649,6 @@ func extractGraphQLQueryType(query string) string {
649649
if strings.Contains(query, "history") {
650650
return "commit-history"
651651
}
652-
652+
653653
return "unknown-graphql"
654654
}

main.go

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,12 @@ var (
1919
org = flag.String("org", "", "GitHub organization to monitor")
2020

2121
// Behavior flags.
22-
poll = flag.Duration("poll", 0, "Polling interval (e.g., 1h, 30m). If not set, runs once")
23-
dryRun = flag.Bool("dry-run", false, "Run in dry-run mode (no actual approvals)")
24-
minOpenTime = flag.Duration("min-age", 1*time.Hour, "Minimum time since last activity for PR assignment")
25-
maxOpenTime = flag.Duration("max-age", 180*24*time.Hour, "Maximum time since last activity for PR assignment")
22+
poll = flag.Duration("poll", 0, "Polling interval (e.g., 1h, 30m). If not set, runs once")
23+
dryRun = flag.Bool("dry-run", false, "Run in dry-run mode (no actual approvals)")
24+
minOpenTime = flag.Duration("min-age", 1*time.Hour, "Minimum time since last activity for PR assignment")
25+
maxOpenTime = flag.Duration("max-age", 180*24*time.Hour, "Maximum time since last activity for PR assignment")
26+
maxPRs = flag.Int("max-prs", 9, "Maximum number of non-stale open PRs a candidate can have before being filtered out")
27+
prCountCache = flag.Duration("pr-count-cache", prCountCacheTTL, "Cache duration for PR count queries (e.g., 6h, 12h)")
2628
)
2729

2830
func main() {
@@ -40,11 +42,13 @@ func main() {
4042
}
4143

4244
finder := &ReviewerFinder{
43-
client: client,
44-
dryRun: *dryRun,
45-
minOpenTime: *minOpenTime,
46-
maxOpenTime: *maxOpenTime,
47-
output: &outputFormatter{verbose: true},
45+
client: client,
46+
dryRun: *dryRun,
47+
minOpenTime: *minOpenTime,
48+
maxOpenTime: *maxOpenTime,
49+
maxPRs: *maxPRs,
50+
prCountCache: *prCountCache,
51+
output: &outputFormatter{verbose: true},
4852
}
4953

5054
if *poll > 0 {

retry.go

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ package main
33
import (
44
"context"
55
"log"
6-
"time"
76

87
"github.com/codeGROOVE-dev/retry"
98
)
@@ -13,12 +12,12 @@ func retryWithBackoff(ctx context.Context, operation string, fn func() error) er
1312
return retry.Do(
1413
fn,
1514
retry.Context(ctx),
16-
retry.Attempts(5),
15+
retry.Attempts(maxRetryAttempts),
1716
retry.DelayType(retry.BackOffDelay),
18-
retry.Delay(time.Second),
19-
retry.MaxDelay(2*time.Minute),
17+
retry.Delay(initialRetryDelay),
18+
retry.MaxDelay(maxRetryDelay),
2019
retry.OnRetry(func(n uint, err error) {
21-
log.Printf("[RETRY] %s: attempt %d/5 failed: %v", operation, n+1, err)
20+
log.Printf("[RETRY] %s: attempt %d/%d failed: %v", operation, n+1, maxRetryAttempts, err)
2221
}),
2322
retry.LastErrorOnly(true),
2423
)

0 commit comments

Comments
 (0)