Skip to content

Commit b8802ca

Browse files
authored
fix: add search result caching, project health caching, and repo score TTL (#511)
- Cache GitHub search API results with 15-minute TTL to reduce rate limit consumption when oss-search is run multiple times in quick succession - Cache project health check results with 4-hour TTL to avoid redundant API calls when the same repo appears across multiple search phases - Enforce 30-day TTL on repo scores in getLowScoringRepos() so stale low scores don't permanently block repos from appearing in search results - Extract reusable cachedTimeBased() helper in http-cache.ts for time-based caching without ETag/conditional requests - Add warning log for corrupted lastEvaluatedAt dates in state Closes #487
1 parent de805ff commit b8802ca

File tree

7 files changed

+274
-50
lines changed

7 files changed

+274
-50
lines changed

packages/core/src/core/http-cache.ts

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,32 @@ export async function cachedRequest<T>(
299299
}
300300
}
301301

302+
/**
303+
* Time-based cache wrapper (no ETag / conditional requests).
304+
*
305+
* If a cached result exists and is younger than `maxAgeMs`, returns it.
306+
* Otherwise calls `fetcher`, caches the result, and returns it.
307+
*
308+
* Use this for expensive operations whose results change slowly
309+
* (e.g. search queries, project health checks).
310+
*/
311+
export async function cachedTimeBased<T>(
312+
cache: HttpCache,
313+
key: string,
314+
maxAgeMs: number,
315+
fetcher: () => Promise<T>,
316+
): Promise<T> {
317+
const cached = cache.getIfFresh(key, maxAgeMs);
318+
if (cached) {
319+
debug(MODULE, `Time-based cache hit for ${key}`);
320+
return cached as T;
321+
}
322+
323+
const result = await fetcher();
324+
cache.set(key, '', result);
325+
return result;
326+
}
327+
302328
/**
303329
* Detect whether an error is a 304 Not Modified response.
304330
* Octokit throws a RequestError with status 304 for conditional requests.

packages/core/src/core/issue-discovery.test.ts

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,29 @@ vi.mock('./github.js', () => ({
1111
checkRateLimit: vi.fn().mockResolvedValue({ remaining: 30, limit: 30, resetAt: new Date().toISOString() }),
1212
}));
1313

14+
vi.mock('./http-cache.js', () => ({
15+
getHttpCache: vi.fn().mockReturnValue({
16+
getIfFresh: vi.fn().mockReturnValue(null),
17+
get: vi.fn().mockReturnValue(null),
18+
set: vi.fn(),
19+
hasInflight: vi.fn().mockReturnValue(false),
20+
getInflight: vi.fn().mockReturnValue(undefined),
21+
setInflight: vi.fn().mockReturnValue(() => {}),
22+
}),
23+
// Pass through to the fetcher so octokit mocks are still exercised
24+
cachedRequest: vi.fn().mockImplementation(async (_cache: unknown, _url: string, fetcher: (h: Record<string, string>) => Promise<{ data: unknown }>) => {
25+
const response = await fetcher({});
26+
return response.data;
27+
}),
28+
cachedTimeBased: vi.fn().mockImplementation(async (cache: any, _key: string, _maxAgeMs: number, fetcher: () => Promise<unknown>) => {
29+
const cached = cache.getIfFresh(_key, _maxAgeMs);
30+
if (cached) return cached;
31+
const result = await fetcher();
32+
cache.set(_key, '', result);
33+
return result;
34+
}),
35+
}));
36+
1437
vi.mock('./state.js', () => ({
1538
getStateManager: vi.fn(() => ({
1639
getState: () => ({
@@ -1457,6 +1480,55 @@ describe('Phase 3: actively maintained repos (#349)', () => {
14571480
});
14581481
});
14591482

1483+
describe('Search result caching (#487)', () => {
1484+
let discovery: InstanceType<typeof IssueDiscovery>;
1485+
let mockCache: any;
1486+
1487+
beforeEach(async () => {
1488+
const { getHttpCache } = await import('./http-cache.js');
1489+
mockCache = (getHttpCache as any)();
1490+
mockCache.getIfFresh.mockReturnValue(null);
1491+
mockCache.set.mockClear();
1492+
1493+
mockOctokitInstance = {
1494+
issues: { get: vi.fn(), listEventsForTimeline: vi.fn().mockResolvedValue({ data: [] }) },
1495+
search: { issuesAndPullRequests: vi.fn().mockResolvedValue({ data: { total_count: 0, items: [] } }) },
1496+
repos: {
1497+
get: vi.fn().mockResolvedValue({ data: { open_issues_count: 0, pushed_at: new Date().toISOString(), stargazers_count: 100, forks_count: 10 } }),
1498+
listCommits: vi.fn().mockResolvedValue({ data: [{ commit: { author: { date: new Date().toISOString() } } }] }),
1499+
getContent: vi.fn().mockRejectedValue(new Error('404 Not Found')),
1500+
},
1501+
actions: { listRepoWorkflows: vi.fn().mockResolvedValue({ data: { total_count: 0 } }) },
1502+
paginate: { iterator: vi.fn() },
1503+
activity: { listReposStarredByAuthenticatedUser: vi.fn() },
1504+
};
1505+
discovery = new IssueDiscovery('fake-token');
1506+
});
1507+
1508+
it('should call cache.set after a fresh search API call', async () => {
1509+
mockOctokitInstance.search.issuesAndPullRequests.mockResolvedValue({
1510+
data: { total_count: 0, items: [] },
1511+
});
1512+
1513+
await expect(discovery.searchIssues({ maxResults: 1 })).rejects.toThrow();
1514+
1515+
// cache.set should have been called for the Phase 2 search
1516+
expect(mockCache.set).toHaveBeenCalled();
1517+
const setCall = mockCache.set.mock.calls[0];
1518+
expect(setCall[0]).toMatch(/^search:/); // cache key starts with search:
1519+
});
1520+
1521+
it('should return cached results when getIfFresh returns data', async () => {
1522+
const cachedData = { total_count: 0, items: [] };
1523+
mockCache.getIfFresh.mockReturnValue(cachedData);
1524+
1525+
await expect(discovery.searchIssues({ maxResults: 1 })).rejects.toThrow();
1526+
1527+
// Octokit search should NOT have been called (cached results used)
1528+
expect(mockOctokitInstance.search.issuesAndPullRequests).not.toHaveBeenCalled();
1529+
});
1530+
});
1531+
14601532
describe('DOC_ONLY_LABELS', () => {
14611533
it('should contain the expected documentation labels', () => {
14621534
expect(DOC_ONLY_LABELS.has('documentation')).toBe(true);

packages/core/src/core/issue-discovery.ts

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import { daysBetween, getDataDir } from './utils.js';
1616
import { DEFAULT_CONFIG, type SearchPriority, type IssueCandidate } from './types.js';
1717
import { ValidationError, errorMessage, getHttpStatusCode } from './errors.js';
1818
import { debug, info, warn } from './logger.js';
19+
import { getHttpCache, cachedTimeBased } from './http-cache.js';
1920
import { type GitHubSearchItem, isDocOnlyIssue, detectLabelFarmingRepos, applyPerRepoCap } from './issue-filtering.js';
2021
import { IssueVetter } from './issue-vetting.js';
2122
import { calculateViabilityScore as calcViabilityScore, type ViabilityScoreParams } from './issue-scoring.js';
@@ -39,6 +40,9 @@ export type { SearchPriority, IssueCandidate } from './types.js';
3940

4041
const MODULE = 'issue-discovery';
4142

43+
/** TTL for cached search API results (15 minutes). */
44+
const SEARCH_CACHE_TTL_MS = 15 * 60 * 1000;
45+
4246
export class IssueDiscovery {
4347
private octokit: Octokit;
4448
private stateManager: ReturnType<typeof getStateManager>;
@@ -55,6 +59,29 @@ export class IssueDiscovery {
5559
this.vetter = new IssueVetter(this.octokit, this.stateManager);
5660
}
5761

62+
/**
63+
* Wrap octokit.search.issuesAndPullRequests with time-based caching.
64+
* Repeated identical queries within SEARCH_CACHE_TTL_MS return cached results
65+
* without consuming GitHub API rate limit points.
66+
*/
67+
private async cachedSearch(params: {
68+
q: string;
69+
sort: string;
70+
order: string;
71+
per_page: number;
72+
}): Promise<{ total_count: number; items: GitHubSearchItem[] }> {
73+
const cacheKey = `search:${params.q}:${params.sort}:${params.order}:${params.per_page}`;
74+
return cachedTimeBased(
75+
getHttpCache(),
76+
cacheKey,
77+
SEARCH_CACHE_TTL_MS,
78+
async () => {
79+
const { data } = await this.octokit.search.issuesAndPullRequests(params);
80+
return data;
81+
},
82+
);
83+
}
84+
5885
/**
5986
* Fetch the authenticated user's starred repositories from GitHub.
6087
* Updates the state manager with the list and timestamp.
@@ -375,7 +402,7 @@ export class IssueDiscovery {
375402
info(MODULE, 'Phase 2: General issue search...');
376403
const remainingNeeded = maxResults - allCandidates.length;
377404
try {
378-
const { data } = await this.octokit.search.issuesAndPullRequests({
405+
const data = await this.cachedSearch({
379406
q: baseQuery,
380407
sort: 'created',
381408
order: 'desc',
@@ -433,7 +460,7 @@ export class IssueDiscovery {
433460
.trim();
434461

435462
try {
436-
const { data } = await this.octokit.search.issuesAndPullRequests({
463+
const data = await this.cachedSearch({
437464
q: phase3Query,
438465
sort: 'updated',
439466
order: 'desc',
@@ -568,7 +595,7 @@ export class IssueDiscovery {
568595
const repoFilter = batch.map((r) => `repo:${r}`).join(' OR ');
569596
const batchQuery = `${baseQuery} (${repoFilter})`;
570597

571-
const { data } = await this.octokit.search.issuesAndPullRequests({
598+
const data = await this.cachedSearch({
572599
q: batchQuery,
573600
sort: 'created',
574601
order: 'desc',

packages/core/src/core/issue-vetting.test.ts

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,22 @@ vi.mock('./pagination.js', () => ({
1010
}));
1111

1212
vi.mock('./http-cache.js', () => ({
13-
getHttpCache: vi.fn().mockReturnValue({}),
13+
getHttpCache: vi.fn().mockReturnValue({
14+
getIfFresh: vi.fn().mockReturnValue(null),
15+
get: vi.fn().mockReturnValue(null),
16+
set: vi.fn(),
17+
hasInflight: vi.fn().mockReturnValue(false),
18+
getInflight: vi.fn().mockReturnValue(undefined),
19+
setInflight: vi.fn().mockReturnValue(() => {}),
20+
}),
1421
cachedRequest: vi.fn(),
22+
cachedTimeBased: vi.fn().mockImplementation(async (cache: any, _key: string, _maxAgeMs: number, fetcher: () => Promise<unknown>) => {
23+
const cached = cache.getIfFresh(_key, _maxAgeMs);
24+
if (cached) return cached;
25+
const result = await fetcher();
26+
cache.set(_key, '', result);
27+
return result;
28+
}),
1529
}));
1630

1731
vi.mock('./logger.js', () => ({
@@ -505,6 +519,45 @@ describe('checkProjectHealth', () => {
505519
expect(health.stargazersCount).toBeUndefined();
506520
expect(health.forksCount).toBeUndefined();
507521
});
522+
523+
it('returns cached health when getIfFresh returns data (#487)', async () => {
524+
const { getHttpCache } = await import('./http-cache.js');
525+
const mockCache = (getHttpCache as any)();
526+
const cachedHealth = {
527+
repo: 'owner/repo',
528+
lastCommitAt: '2025-06-30T00:00:00Z',
529+
daysSinceLastCommit: 1,
530+
openIssuesCount: 10,
531+
avgIssueResponseDays: 0,
532+
ciStatus: 'passing',
533+
isActive: true,
534+
stargazersCount: 500,
535+
forksCount: 50,
536+
};
537+
mockCache.getIfFresh.mockReturnValueOnce(cachedHealth);
538+
vi.mocked(cachedRequest).mockClear();
539+
540+
const health = await vetter.checkProjectHealth('owner', 'repo');
541+
expect(health).toEqual(cachedHealth);
542+
// API should NOT have been called (cachedRequest is used for repo.get)
543+
expect(cachedRequest).not.toHaveBeenCalled();
544+
});
545+
546+
it('caches health result after fresh API call (#487)', async () => {
547+
const { getHttpCache } = await import('./http-cache.js');
548+
const mockCache = (getHttpCache as any)();
549+
mockCache.getIfFresh.mockReturnValue(null);
550+
mockCache.set.mockClear();
551+
552+
const health = await vetter.checkProjectHealth('owner', 'repo');
553+
expect(health.isActive).toBe(true);
554+
// cache.set should have been called with the health result
555+
expect(mockCache.set).toHaveBeenCalledWith(
556+
'health:owner/repo',
557+
'',
558+
expect.objectContaining({ repo: 'owner/repo', isActive: true }),
559+
);
560+
});
508561
});
509562

510563
describe('fetchContributionGuidelines', () => {

packages/core/src/core/issue-vetting.ts

Lines changed: 51 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ import {
1818
} from './types.js';
1919
import { ValidationError, errorMessage, getHttpStatusCode } from './errors.js';
2020
import { warn } from './logger.js';
21-
import { getHttpCache, cachedRequest } from './http-cache.js';
21+
import { getHttpCache, cachedRequest, cachedTimeBased } from './http-cache.js';
2222
import { getStateManager } from './state.js';
2323
import { calculateRepoQualityBonus, calculateViabilityScore } from './issue-scoring.js';
2424

@@ -37,6 +37,8 @@ export interface CheckResult {
3737
// Cache for contribution guidelines (expires after 1 hour, max 100 entries)
3838
const guidelinesCache = new Map<string, { guidelines: ContributionGuidelines | undefined; fetchedAt: number }>();
3939
const CACHE_TTL_MS = 60 * 60 * 1000; // 1 hour
40+
/** TTL for cached project health results (4 hours). Health data (stars, commits, CI) changes slowly. */
41+
const HEALTH_CACHE_TTL_MS = 4 * 60 * 60 * 1000;
4042
const CACHE_MAX_SIZE = 100;
4143

4244
function pruneCache(): void {
@@ -447,58 +449,62 @@ export class IssueVetter {
447449
}
448450

449451
async checkProjectHealth(owner: string, repo: string): Promise<ProjectHealth> {
450-
try {
451-
// Get repo info (with ETag caching — repo metadata changes infrequently)
452-
const cache = getHttpCache();
453-
const url = `/repos/${owner}/${repo}`;
454-
const repoData = await cachedRequest(
455-
cache,
456-
url,
457-
(headers) =>
458-
this.octokit.repos.get({ owner, repo, headers }) as Promise<{
459-
data: { open_issues_count: number; pushed_at: string; stargazers_count: number; forks_count: number };
460-
headers: Record<string, string>;
461-
}>,
462-
);
463-
464-
// Get recent commits
465-
const { data: commits } = await this.octokit.repos.listCommits({
466-
owner,
467-
repo,
468-
per_page: 1,
469-
});
452+
const cache = getHttpCache();
453+
const healthCacheKey = `health:${owner}/${repo}`;
470454

471-
const lastCommit = commits[0];
472-
const lastCommitAt = lastCommit?.commit?.author?.date || repoData.pushed_at;
473-
const daysSinceLastCommit = daysBetween(new Date(lastCommitAt));
455+
try {
456+
return await cachedTimeBased(cache, healthCacheKey, HEALTH_CACHE_TTL_MS, async () => {
457+
// Get repo info (with ETag caching — repo metadata changes infrequently)
458+
const url = `/repos/${owner}/${repo}`;
459+
const repoData = await cachedRequest(
460+
cache,
461+
url,
462+
(headers) =>
463+
this.octokit.repos.get({ owner, repo, headers }) as Promise<{
464+
data: { open_issues_count: number; pushed_at: string; stargazers_count: number; forks_count: number };
465+
headers: Record<string, string>;
466+
}>,
467+
);
474468

475-
// Check CI status (simplified - just check if workflows exist)
476-
let ciStatus: 'passing' | 'failing' | 'unknown' = 'unknown';
477-
try {
478-
const { data: workflows } = await this.octokit.actions.listRepoWorkflows({
469+
// Get recent commits
470+
const { data: commits } = await this.octokit.repos.listCommits({
479471
owner,
480472
repo,
481473
per_page: 1,
482474
});
483-
if (workflows.total_count > 0) {
484-
ciStatus = 'passing'; // Assume passing if workflows exist
475+
476+
const lastCommit = commits[0];
477+
const lastCommitAt = lastCommit?.commit?.author?.date || repoData.pushed_at;
478+
const daysSinceLastCommit = daysBetween(new Date(lastCommitAt));
479+
480+
// Check CI status (simplified - just check if workflows exist)
481+
let ciStatus: 'passing' | 'failing' | 'unknown' = 'unknown';
482+
try {
483+
const { data: workflows } = await this.octokit.actions.listRepoWorkflows({
484+
owner,
485+
repo,
486+
per_page: 1,
487+
});
488+
if (workflows.total_count > 0) {
489+
ciStatus = 'passing'; // Assume passing if workflows exist
490+
}
491+
} catch (error) {
492+
const errMsg = errorMessage(error);
493+
warn(MODULE, `Failed to check CI status for ${owner}/${repo}: ${errMsg}. Defaulting to unknown.`);
485494
}
486-
} catch (error) {
487-
const errMsg = errorMessage(error);
488-
warn(MODULE, `Failed to check CI status for ${owner}/${repo}: ${errMsg}. Defaulting to unknown.`);
489-
}
490495

491-
return {
492-
repo: `${owner}/${repo}`,
493-
lastCommitAt,
494-
daysSinceLastCommit,
495-
openIssuesCount: repoData.open_issues_count,
496-
avgIssueResponseDays: 0, // Would need more API calls to calculate
497-
ciStatus,
498-
isActive: daysSinceLastCommit < 30,
499-
stargazersCount: repoData.stargazers_count,
500-
forksCount: repoData.forks_count,
501-
};
496+
return {
497+
repo: `${owner}/${repo}`,
498+
lastCommitAt,
499+
daysSinceLastCommit,
500+
openIssuesCount: repoData.open_issues_count,
501+
avgIssueResponseDays: 0, // Would need more API calls to calculate
502+
ciStatus,
503+
isActive: daysSinceLastCommit < 30,
504+
stargazersCount: repoData.stargazers_count,
505+
forksCount: repoData.forks_count,
506+
};
507+
});
502508
} catch (error) {
503509
const errMsg = errorMessage(error);
504510
warn(MODULE, `Error checking project health for ${owner}/${repo}: ${errMsg}`);

0 commit comments

Comments
 (0)