Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
147 changes: 145 additions & 2 deletions pkg/bot/bot.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,21 @@ type ThreadCache struct {
// ThreadInfo is an alias to state.ThreadInfo to avoid duplication.
type ThreadInfo = state.ThreadInfo

// CommitPREntry caches recent commit→PR mappings for fast lookup.
type CommitPREntry struct {
PRNumber int
HeadSHA string
UpdatedAt time.Time
}

// CommitPRCache provides in-memory caching of commit SHA → PR mappings.
// This allows quick lookup when check events arrive with just a commit SHA,
// avoiding expensive GitHub API calls for recently-seen PRs.
type CommitPRCache struct {
mu sync.RWMutex
entries map[string][]CommitPREntry // "owner/repo" -> recent PRs with commits
}

// Get retrieves thread info for a PR.
func (tc *ThreadCache) Get(prKey string) (ThreadInfo, bool) {
tc.mu.RLock()
Expand Down Expand Up @@ -81,6 +96,113 @@ func (tc *ThreadCache) Cleanup(maxAge time.Duration) {
}
}

// RecordPR records a PR's head commit SHA for commit→PR lookups.
// Entries are kept for 10 minutes to handle check events that arrive shortly after PR events.
func (cpc *CommitPRCache) RecordPR(owner, repo string, prNumber int, headSHA string) {
if headSHA == "" {
return // Skip empty commits
}

cpc.mu.Lock()
defer cpc.mu.Unlock()

repoKey := owner + "/" + repo
now := time.Now()

// Initialize map if needed
if cpc.entries == nil {
cpc.entries = make(map[string][]CommitPREntry)
}

// Add new entry
entry := CommitPREntry{
PRNumber: prNumber,
HeadSHA: headSHA,
UpdatedAt: now,
}

// Get existing entries for this repo
entries := cpc.entries[repoKey]

// Check if this exact PR+commit combination already exists - update timestamp if so
found := false
for i := range entries {
if entries[i].PRNumber == prNumber && entries[i].HeadSHA == headSHA {
entries[i].UpdatedAt = now // Refresh timestamp
found = true
break
}
}

if !found {
entries = append(entries, entry)
}

// Update the map with the modified entries before filtering
cpc.entries[repoKey] = entries

// Keep only entries from last 10 minutes (check events usually arrive within seconds)
cutoff := now.Add(-10 * time.Minute)
filtered := make([]CommitPREntry, 0, len(entries))
for i := range entries {
if entries[i].UpdatedAt.After(cutoff) {
filtered = append(filtered, entries[i])
}
}

cpc.entries[repoKey] = filtered
}

// FindPRsForCommit finds PRs in a repo that match the given commit SHA.
// Returns PR numbers if found in recent cache (last 10 minutes), nil otherwise.
func (cpc *CommitPRCache) FindPRsForCommit(owner, repo, commitSHA string) []int {
if commitSHA == "" {
return nil
}

cpc.mu.RLock()
defer cpc.mu.RUnlock()

repoKey := owner + "/" + repo
entries, exists := cpc.entries[repoKey]
if !exists {
return nil
}

// Check which PRs have this commit
var prNumbers []int
for i := range entries {
if entries[i].HeadSHA == commitSHA {
prNumbers = append(prNumbers, entries[i].PRNumber)
}
}

return prNumbers
}

// MostRecentPR returns the most recently updated PR number for a repo from the cache.
// Returns 0 if no recent PRs are cached for this repo.
func (cpc *CommitPRCache) MostRecentPR(owner, repo string) int {
cpc.mu.RLock()
defer cpc.mu.RUnlock()

repoKey := owner + "/" + repo
entries, exists := cpc.entries[repoKey]
if !exists || len(entries) == 0 {
return 0
}

// Find the entry with the most recent UpdatedAt timestamp
mostRecent := entries[0]
for i := 1; i < len(entries); i++ {
if entries[i].UpdatedAt.After(mostRecent.UpdatedAt) {
mostRecent = entries[i]
}
}

return mostRecent.PRNumber
}

// Coordinator coordinates between GitHub, Slack, and notifications for a single org.
//
//nolint:govet // Field order optimized for logical grouping over memory alignment
Expand All @@ -94,8 +216,9 @@ type Coordinator struct {
configManager *config.Manager
notifier *notify.Manager
userMapper *usermapping.Service
threadCache *ThreadCache // In-memory cache for fast lookups
eventSemaphore chan struct{} // Limits concurrent event processing (prevents overwhelming APIs)
threadCache *ThreadCache // In-memory cache for fast lookups
commitPRCache *CommitPRCache // Maps commit SHAs to PR numbers for check events
eventSemaphore chan struct{} // Limits concurrent event processing (prevents overwhelming APIs)
}

// StateStore interface for persistent state - allows dependency injection for testing.
Expand Down Expand Up @@ -134,6 +257,9 @@ func New(
prThreads: make(map[string]ThreadInfo),
creating: make(map[string]bool),
},
commitPRCache: &CommitPRCache{
entries: make(map[string][]CommitPREntry),
},
eventSemaphore: make(chan struct{}, 10), // Allow 10 concurrent events per org
}

Expand Down Expand Up @@ -1546,6 +1672,23 @@ func (c *Coordinator) handlePullRequestFromSprinkler(
// Use PR details from turnclient instead of making additional GitHub API call
pr := checkResult.PullRequest

// Populate commit→PR cache for all commits in this PR
// This allows us to quickly map check events (which only have commit SHA) back to PRs
// without making expensive GitHub API calls
if len(pr.Commits) > 0 {
slog.Debug("populating commit→PR cache from turnclient response",
logFieldOwner, owner,
logFieldRepo, repo,
"pr_number", prNumber,
"commit_count", len(pr.Commits))

for _, commitSHA := range pr.Commits {
if commitSHA != "" {
c.commitPRCache.RecordPR(owner, repo, prNumber, commitSHA)
}
}
}

// Create a synthetic webhook event to reuse existing logic with real PR data
event := struct {
Action string `json:"action"`
Expand Down
134 changes: 132 additions & 2 deletions pkg/bot/bot_sprinkler.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (

"github.com/codeGROOVE-dev/slacker/pkg/state"
"github.com/codeGROOVE-dev/sprinkler/pkg/client"
"github.com/codeGROOVE-dev/turnclient/pkg/turn"
)

// Constants for URL parsing.
Expand Down Expand Up @@ -84,13 +85,142 @@ func (c *Coordinator) lookupPRsForCheckEvent(ctx context.Context, event client.E
owner := parts[3]
repo := parts[4]

slog.Info("sprinkler cache miss - looking up PRs for commit via GitHub API",
// First, check our local commit→PR cache (populated from recent PR events)
// This is MUCH faster than GitHub API and usually works since check events
// arrive seconds after PR events
cachedPRs := c.commitPRCache.FindPRsForCommit(owner, repo, commitSHA)
if len(cachedPRs) > 0 {
slog.Info("found PRs for commit in cache - avoiding GitHub API call",
"organization", organization,
"owner", owner,
"repo", repo,
"commit_sha", commitSHA,
"pr_count", len(cachedPRs),
"pr_numbers", cachedPRs,
"type", event.Type,
"delivery_id", deliveryID,
"cache_hit", true)
return cachedPRs
}

// Cache miss - try turnclient lookup on most recent PR before falling back to GitHub API
slog.Info("commit→PR cache miss - will try turnclient on recent PR before GitHub API",
"organization", organization,
"owner", owner,
"repo", repo,
"commit_sha", commitSHA,
"type", event.Type,
"delivery_id", deliveryID)
"delivery_id", deliveryID,
"cache_hit", false,
"reason", "check event arrived before PR event or cache expired")

// Second attempt: Check if we recently saw a PR for this repo
// If yes, fetch it via turnclient to see if it contains this commit
// This is cheaper than searching all PRs via GitHub API
mostRecentPR := c.commitPRCache.MostRecentPR(owner, repo)
if mostRecentPR > 0 {
slog.Debug("attempting turnclient lookup on most recent PR for repo",
"organization", organization,
"owner", owner,
"repo", repo,
"pr_number", mostRecentPR,
"commit_sha", commitSHA,
"rationale", "check events usually arrive for recently updated PRs")

// Get GitHub token
githubToken := c.github.InstallationToken(ctx)
if githubToken != "" {
// Create turnclient
turnClient, tcErr := turn.NewDefaultClient()
if tcErr == nil {
turnClient.SetAuthToken(githubToken)

// Check the recent PR with current timestamp
checkCtx, checkCancel := context.WithTimeout(ctx, 30*time.Second)
prURL := fmt.Sprintf("https://github.com/%s/%s/pull/%d", owner, repo, mostRecentPR)
checkResult, checkErr := turnClient.Check(checkCtx, prURL, owner, time.Now())
checkCancel()

if checkErr == nil && checkResult != nil {
// Check if any commits match
for _, prCommit := range checkResult.PullRequest.Commits {
if prCommit == commitSHA {
slog.Info("found commit in most recent PR via turnclient - avoiding GitHub API search",
"organization", organization,
"owner", owner,
"repo", repo,
"pr_number", mostRecentPR,
"commit_sha", commitSHA,
"turnclient_hit", true,
"bonus", "got free PR status update")

// Populate cache with all commits from this PR
for _, commit := range checkResult.PullRequest.Commits {
if commit != "" {
c.commitPRCache.RecordPR(owner, repo, mostRecentPR, commit)
}
}

// Process the PR update since we have fresh data
go c.handlePullRequestEventWithData(context.Background(), owner, repo, struct {
Action string `json:"action"`
PullRequest struct {
HTMLURL string `json:"html_url"`
Title string `json:"title"`
CreatedAt time.Time `json:"created_at"`
User struct {
Login string `json:"login"`
} `json:"user"`
Number int `json:"number"`
} `json:"pull_request"`
Number int `json:"number"`
}{
Action: "synchronize",
PullRequest: struct {
HTMLURL string `json:"html_url"`
Title string `json:"title"`
CreatedAt time.Time `json:"created_at"`
User struct {
Login string `json:"login"`
} `json:"user"`
Number int `json:"number"`
}{
HTMLURL: prURL,
Title: checkResult.PullRequest.Title,
CreatedAt: checkResult.PullRequest.CreatedAt,
User: struct {
Login string `json:"login"`
}{
Login: checkResult.PullRequest.Author,
},
Number: mostRecentPR,
},
Number: mostRecentPR,
}, checkResult, nil)

return []int{mostRecentPR}
}
}

slog.Debug("commit not found in most recent PR - will fall back to GitHub API",
"organization", organization,
"owner", owner,
"repo", repo,
"pr_number", mostRecentPR,
"commit_sha", commitSHA,
"pr_commits_checked", len(checkResult.PullRequest.Commits))
}
}
}
}

// Third attempt: Fall back to GitHub API to search all PRs
slog.Info("falling back to GitHub API to search all PRs for commit",
"organization", organization,
"owner", owner,
"repo", repo,
"commit_sha", commitSHA,
"tried_turnclient", mostRecentPR > 0)

// Look up PRs for this commit using GitHub API
// Allow up to 3 minutes for retries (2 min max delay + buffer)
Expand Down
Loading
Loading