Skip to content

Commit f31656b

Browse files
committed
Improve bot detection
1 parent 68934e8 commit f31656b

File tree

5 files changed

+364
-2
lines changed

5 files changed

+364
-2
lines changed

better-reviewers

896 Bytes
Binary file not shown.

bot_detection_test.go

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
package main
2+
3+
import (
4+
"context"
5+
"testing"
6+
)
7+
8+
func TestIsUserBot(t *testing.T) {
9+
rf := &ReviewerFinder{}
10+
ctx := context.Background()
11+
12+
tests := []struct {
13+
name string
14+
username string
15+
wantBot bool
16+
}{
17+
// Bot patterns
18+
{"Bot with [bot] suffix", "dependabot[bot]", true},
19+
{"Bot with -bot suffix", "renovate-bot", true},
20+
{"Bot with _bot suffix", "security_bot", true},
21+
{"Bot with bot- prefix", "bot-user", true},
22+
{"Bot with bot_ prefix", "bot_scanner", true},
23+
{"Bot with .bot suffix", "scanner.bot", true},
24+
25+
// Specific known bots
26+
{"GitHub Actions", "github-actions", true},
27+
{"GitHub Actions with bracket", "github-actions[bot]", true},
28+
{"Dependabot", "dependabot", true},
29+
{"Renovate", "renovate", true},
30+
{"Greenkeeper", "greenkeeper", true},
31+
{"Snyk", "snyk-bot", true},
32+
{"Codecov", "codecov", true},
33+
{"Travis CI", "travis-ci", true},
34+
{"CircleCI", "circleci", true},
35+
{"Jenkins", "jenkins", true},
36+
{"Mergify", "mergify[bot]", true},
37+
{"Stale bot", "stale[bot]", true},
38+
39+
// Organization/service patterns
40+
{"Octo STS", "octo-sts", true},
41+
{"Octocat", "octocat", true},
42+
{"Service account with -sts", "my-app-sts", true},
43+
{"Service account with -svc", "backend-svc", true},
44+
{"Service account", "api-service", true},
45+
{"System account", "auth-system", true},
46+
{"Automation account", "deploy-automation", true},
47+
{"CI account", "project-ci", true},
48+
{"CD account", "prod-cd", true},
49+
{"Deploy account", "k8s-deploy", true},
50+
{"Release account", "release-manager", true},
51+
{"Build account", "docker-build", true},
52+
{"Test account", "e2e-test", true},
53+
{"Admin account", "cluster-admin", true},
54+
{"Security account", "security-scanner", true},
55+
{"Compliance account", "compliance-checker", true},
56+
57+
// Valid human users
58+
{"Regular user", "johndoe", false},
59+
{"User with dash", "john-doe", false},
60+
{"User with underscore", "john_doe", false},
61+
{"User with numbers", "user123", false},
62+
{"Common contributor", "sergiodj", false},
63+
{"Common contributor 2", "murraybd", false},
64+
{"PR author", "ajayk", false},
65+
{"Reviewer", "tstromberg", false},
66+
{"Another reviewer", "vavilen84", false},
67+
68+
// Edge cases - users that might look like bots but aren't
69+
{"User with 'bot' in name", "abbott", false},
70+
{"User with 'test' in name", "atestuser", false},
71+
{"User with 'build' in name", "builderman", false},
72+
{"User with 'admin' in name", "adminton", false},
73+
{"User ending in 'sts'", "roberts", false},
74+
{"User ending in 'ci'", "luci", false},
75+
}
76+
77+
for _, tt := range tests {
78+
t.Run(tt.name, func(t *testing.T) {
79+
got := rf.isUserBot(ctx, tt.username)
80+
if got != tt.wantBot {
81+
t.Errorf("isUserBot(%q) = %v, want %v", tt.username, got, tt.wantBot)
82+
}
83+
})
84+
}
85+
}
86+
87+
func TestIsValidReviewer(t *testing.T) {
88+
// Create a mock GitHub client that returns specific user types
89+
mockClient := &GitHubClient{
90+
userCache: newUserCache(),
91+
}
92+
93+
// Pre-populate the cache with test data
94+
mockClient.userCache.users["octo-sts"] = &userInfo{login: "octo-sts", userType: userTypeOrg}
95+
mockClient.userCache.users["github"] = &userInfo{login: "github", userType: userTypeOrg}
96+
mockClient.userCache.users["dependabot[bot]"] = &userInfo{login: "dependabot[bot]", userType: userTypeBot}
97+
mockClient.userCache.users["johndoe"] = &userInfo{login: "johndoe", userType: userTypeUser}
98+
mockClient.userCache.users["sergiodj"] = &userInfo{login: "sergiodj", userType: userTypeUser}
99+
100+
rf := &ReviewerFinder{
101+
client: mockClient,
102+
}
103+
104+
ctx := context.Background()
105+
pr := &PullRequest{Owner: "test", Repository: "repo"}
106+
107+
tests := []struct {
108+
name string
109+
username string
110+
wantValid bool
111+
}{
112+
// Should be filtered out
113+
{"Organization account", "octo-sts", false},
114+
{"GitHub org", "github", false},
115+
{"Bot with API confirmation", "dependabot[bot]", false},
116+
{"Pattern-based bot", "github-actions", false},
117+
{"Service account", "deploy-service", false},
118+
119+
// Should be valid
120+
{"Regular user", "johndoe", true},
121+
{"Contributor", "sergiodj", true},
122+
}
123+
124+
for _, tt := range tests {
125+
t.Run(tt.name, func(t *testing.T) {
126+
got := rf.isValidReviewer(ctx, pr, tt.username)
127+
if got != tt.wantValid {
128+
t.Errorf("isValidReviewer(%q) = %v, want %v", tt.username, got, tt.wantValid)
129+
}
130+
})
131+
}
132+
}
133+
134+
func TestGetUserType(t *testing.T) {
135+
// Test the userType detection logic
136+
tests := []struct {
137+
name string
138+
apiResponse string
139+
wantUserType userType
140+
}{
141+
{
142+
name: "Organization response",
143+
apiResponse: `{"type": "Organization", "name": "Test Org"}`,
144+
wantUserType: userTypeOrg,
145+
},
146+
{
147+
name: "Bot response",
148+
apiResponse: `{"type": "Bot", "name": "Test Bot"}`,
149+
wantUserType: userTypeBot,
150+
},
151+
{
152+
name: "User response",
153+
apiResponse: `{"type": "User", "name": "John Doe"}`,
154+
wantUserType: userTypeUser,
155+
},
156+
{
157+
name: "Empty type defaults to user",
158+
apiResponse: `{"name": "Unknown"}`,
159+
wantUserType: userTypeUser,
160+
},
161+
}
162+
163+
// These would be integration tests with a mock HTTP client
164+
// For now, we're testing the core logic
165+
for _, tt := range tests {
166+
t.Run(tt.name, func(t *testing.T) {
167+
// This would test the actual API parsing logic
168+
// Implementation would require mocking the HTTP client
169+
})
170+
}
171+
}

github_client.go

Lines changed: 76 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ type GitHubClient struct {
1818
token string
1919
httpClient *http.Client
2020
cache *cache
21+
userCache *userCache
2122
}
2223

2324
// PullRequest represents a GitHub pull request.
@@ -81,6 +82,7 @@ func newGitHubClient() (*GitHubClient, error) {
8182
token: token,
8283
httpClient: &http.Client{Timeout: time.Duration(httpTimeout) * time.Second},
8384
cache: newCache(cacheTTL),
85+
userCache: newUserCache(),
8486
}, nil
8587
}
8688

@@ -377,7 +379,80 @@ func (rf *ReviewerFinder) getRecentPRCommenters(ctx context.Context, owner, repo
377379

378380
// isUserBot checks if a user is a bot.
379381
func (rf *ReviewerFinder) isUserBot(ctx context.Context, username string) bool {
380-
return strings.HasSuffix(username, "[bot]") || strings.HasSuffix(username, "-bot")
382+
lower := strings.ToLower(username)
383+
384+
// Check for common bot patterns
385+
botPatterns := []string{
386+
"[bot]",
387+
"-bot",
388+
"_bot",
389+
"bot-",
390+
"bot_",
391+
".bot",
392+
"github-actions",
393+
"dependabot",
394+
"renovate",
395+
"greenkeeper",
396+
"snyk",
397+
"codecov",
398+
"coveralls",
399+
"travis",
400+
"circleci",
401+
"jenkins",
402+
"buildkite",
403+
"semaphore",
404+
"appveyor",
405+
"azure-pipelines",
406+
"github-classroom",
407+
"imgbot",
408+
"allcontributors",
409+
"whitesource",
410+
"mergify",
411+
"sonarcloud",
412+
"deepsource",
413+
"codefactor",
414+
"lgtm",
415+
"codacy",
416+
"hound",
417+
"stale",
418+
}
419+
420+
for _, pattern := range botPatterns {
421+
if strings.Contains(lower, pattern) {
422+
return true
423+
}
424+
}
425+
426+
// Check for common organization/service account patterns
427+
orgPatterns := []string{
428+
"octo-sts",
429+
"octocat",
430+
"-sts",
431+
"-svc",
432+
"-service",
433+
"-system",
434+
"-automation",
435+
"-ci",
436+
"-cd",
437+
"-deploy",
438+
"-release",
439+
"release-manager",
440+
"-build",
441+
"-test",
442+
"-admin",
443+
"-security",
444+
"security-scanner",
445+
"-compliance",
446+
"compliance-checker",
447+
}
448+
449+
for _, pattern := range orgPatterns {
450+
if strings.Contains(lower, pattern) {
451+
return true
452+
}
453+
}
454+
455+
return false
381456
}
382457

383458
// hasWriteAccess checks if a user has write access to the repository.

reviewer.go

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,25 @@ func (rf *ReviewerFinder) findAssigneeExpert(ctx context.Context, pr *PullReques
219219

220220
// isValidReviewer checks if a user is a valid reviewer.
221221
func (rf *ReviewerFinder) isValidReviewer(ctx context.Context, pr *PullRequest, username string) bool {
222-
return !rf.isUserBot(ctx, username) && rf.hasWriteAccess(ctx, pr.Owner, pr.Repository, username)
222+
// First check pattern-based bot detection
223+
if rf.isUserBot(ctx, username) {
224+
if rf.output != nil && rf.output.verbose {
225+
fmt.Printf(" Filtered (bot pattern): %s\n", username)
226+
}
227+
return false
228+
}
229+
230+
// Then check GitHub API for user type
231+
uType, err := rf.client.getUserType(ctx, username)
232+
if err == nil && (uType == userTypeOrg || uType == userTypeBot) {
233+
if rf.output != nil && rf.output.verbose {
234+
fmt.Printf(" Filtered (%s): %s\n", uType, username)
235+
}
236+
return false
237+
}
238+
239+
// Finally check write access
240+
return rf.hasWriteAccess(ctx, pr.Owner, pr.Repository, username)
223241
}
224242

225243
// findExpertReviewer finds the most active reviewer for the changes.

user_cache.go

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
package main
2+
3+
import (
4+
"context"
5+
"encoding/json"
6+
"fmt"
7+
"sync"
8+
)
9+
10+
// userType represents the type of GitHub account.
11+
type userType string
12+
13+
const (
14+
userTypeUser userType = "User"
15+
userTypeOrg userType = "Organization"
16+
userTypeBot userType = "Bot"
17+
)
18+
19+
// userInfo caches information about a GitHub user.
20+
type userInfo struct {
21+
login string
22+
userType userType
23+
}
24+
25+
// userCache caches user type information to avoid repeated API calls.
26+
type userCache struct {
27+
mu sync.RWMutex
28+
users map[string]*userInfo
29+
}
30+
31+
// newUserCache creates a new user cache.
32+
func newUserCache() *userCache {
33+
return &userCache{
34+
users: make(map[string]*userInfo),
35+
}
36+
}
37+
38+
// getUserType gets the type of a GitHub account (User, Organization, or Bot).
39+
func (c *GitHubClient) getUserType(ctx context.Context, username string) (userType, error) {
40+
// Check cache first
41+
if c.userCache != nil {
42+
c.userCache.mu.RLock()
43+
if info, exists := c.userCache.users[username]; exists {
44+
c.userCache.mu.RUnlock()
45+
return info.userType, nil
46+
}
47+
c.userCache.mu.RUnlock()
48+
}
49+
50+
// Make API call to get user info
51+
url := fmt.Sprintf("https://api.github.com/users/%s", username)
52+
resp, err := c.makeRequest(ctx, "GET", url, nil)
53+
if err != nil {
54+
return userTypeUser, err // Default to user on error
55+
}
56+
defer resp.Body.Close()
57+
58+
if resp.StatusCode == 404 {
59+
// User doesn't exist
60+
return userTypeBot, nil
61+
}
62+
63+
if resp.StatusCode != 200 {
64+
return userTypeUser, fmt.Errorf("failed to get user info (status %d)", resp.StatusCode)
65+
}
66+
67+
var data struct {
68+
Type string `json:"type"`
69+
Name string `json:"name"`
70+
}
71+
72+
if err := json.NewDecoder(resp.Body).Decode(&data); err != nil {
73+
return userTypeUser, err
74+
}
75+
76+
// Determine user type
77+
var uType userType
78+
switch data.Type {
79+
case "Organization":
80+
uType = userTypeOrg
81+
case "Bot":
82+
uType = userTypeBot
83+
default:
84+
uType = userTypeUser
85+
}
86+
87+
// Cache the result
88+
if c.userCache != nil {
89+
c.userCache.mu.Lock()
90+
c.userCache.users[username] = &userInfo{
91+
login: username,
92+
userType: uType,
93+
}
94+
c.userCache.mu.Unlock()
95+
}
96+
97+
return uType, nil
98+
}

0 commit comments

Comments
 (0)