Skip to content

Commit 6a1072d

Browse files
authored
Merge pull request #24 from BadlyDrawnBoy/codex/replace-map-with-lru-cache-in-cache.ts
Add configurable LRU caching with eviction metrics
2 parents ce04c00 + 509641c commit 6a1072d

File tree

2 files changed

+190
-11
lines changed

2 files changed

+190
-11
lines changed

src/utils/cache.ts

Lines changed: 127 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,93 @@ interface CacheEntry<T> {
1818
createdAt: number;
1919
}
2020

21-
const textCache = new Map<string, CacheEntry<PdfPageText>>();
22-
const ocrCache = new Map<string, CacheEntry<CachedOcrResult>>();
21+
interface CacheOptions {
22+
maxEntries: number;
23+
ttlMs?: number;
24+
}
25+
26+
class LruCache<T> {
27+
private store = new Map<string, CacheEntry<T>>();
28+
private evictions = 0;
29+
30+
constructor(private readonly options: CacheOptions) {}
31+
32+
get size(): number {
33+
return this.store.size;
34+
}
35+
36+
get evictionCount(): number {
37+
return this.evictions;
38+
}
39+
40+
getKeys(): string[] {
41+
return Array.from(this.store.keys());
42+
}
43+
44+
clear(): void {
45+
this.store.clear();
46+
}
47+
48+
private isExpired(entry: CacheEntry<T>): boolean {
49+
if (!this.options.ttlMs) return false;
50+
return Date.now() - entry.createdAt > this.options.ttlMs;
51+
}
52+
53+
private markRecentlyUsed(key: string, entry: CacheEntry<T>): void {
54+
this.store.delete(key);
55+
this.store.set(key, entry);
56+
}
57+
58+
private trimToMaxEntries(): void {
59+
while (this.store.size > this.options.maxEntries) {
60+
const oldestKey = this.store.keys().next().value;
61+
if (!oldestKey) return;
62+
this.store.delete(oldestKey);
63+
this.evictions += 1;
64+
}
65+
}
66+
67+
get(key: string): T | undefined {
68+
const entry = this.store.get(key);
69+
if (!entry) return undefined;
70+
71+
if (this.isExpired(entry)) {
72+
this.store.delete(key);
73+
this.evictions += 1;
74+
return undefined;
75+
}
76+
77+
this.markRecentlyUsed(key, entry);
78+
return entry.value;
79+
}
80+
81+
set(key: string, value: T): void {
82+
const entry: CacheEntry<T> = { value, createdAt: Date.now() };
83+
if (this.store.has(key)) {
84+
this.store.delete(key);
85+
}
86+
87+
this.store.set(key, entry);
88+
this.trimToMaxEntries();
89+
}
90+
}
91+
92+
type CacheScope = 'text' | 'ocr';
93+
94+
const DEFAULT_CACHE_OPTIONS: Record<CacheScope, CacheOptions> = {
95+
text: { maxEntries: 500 },
96+
ocr: { maxEntries: 500 },
97+
};
98+
99+
let cacheOptions: Record<CacheScope, CacheOptions> = {
100+
text: { ...DEFAULT_CACHE_OPTIONS.text },
101+
ocr: { ...DEFAULT_CACHE_OPTIONS.ocr },
102+
};
103+
104+
const buildCache = <T>(scope: CacheScope): LruCache<T> => new LruCache<T>(cacheOptions[scope]);
105+
106+
let textCache = buildCache<PdfPageText>('text');
107+
let ocrCache = buildCache<CachedOcrResult>('ocr');
23108

24109
const buildPageKey = (fingerprint: string, page: number, options: PageCacheOptions): string => {
25110
const serializedOptions = JSON.stringify({
@@ -52,7 +137,7 @@ export const getCachedPageText = (
52137
options: PageCacheOptions
53138
): PdfPageText | undefined => {
54139
if (!fingerprint) return undefined;
55-
return textCache.get(buildPageKey(fingerprint, page, options))?.value;
140+
return textCache.get(buildPageKey(fingerprint, page, options));
56141
};
57142

58143
export const setCachedPageText = (
@@ -62,15 +147,15 @@ export const setCachedPageText = (
62147
value: PdfPageText
63148
): void => {
64149
if (!fingerprint) return;
65-
textCache.set(buildPageKey(fingerprint, page, options), { value, createdAt: Date.now() });
150+
textCache.set(buildPageKey(fingerprint, page, options), value);
66151
};
67152

68153
export const getCachedOcrText = (
69154
fingerprint: string | undefined,
70155
target: string
71156
): CachedOcrResult | undefined => {
72157
if (!fingerprint) return undefined;
73-
return ocrCache.get(buildOcrKey(fingerprint, target))?.value;
158+
return ocrCache.get(buildOcrKey(fingerprint, target));
74159
};
75160

76161
export const setCachedOcrText = (
@@ -79,19 +164,28 @@ export const setCachedOcrText = (
79164
value: CachedOcrResult
80165
): void => {
81166
if (!fingerprint) return;
82-
ocrCache.set(buildOcrKey(fingerprint, target), { value, createdAt: Date.now() });
167+
ocrCache.set(buildOcrKey(fingerprint, target), value);
83168
};
84169

85170
export const getCacheStats = (): {
86171
text_entries: number;
87172
ocr_entries: number;
88173
text_keys: string[];
89174
ocr_keys: string[];
175+
text_evictions: number;
176+
ocr_evictions: number;
177+
config: typeof cacheOptions;
90178
} => ({
91179
text_entries: textCache.size,
92180
ocr_entries: ocrCache.size,
93-
text_keys: Array.from(textCache.keys()),
94-
ocr_keys: Array.from(ocrCache.keys()),
181+
text_keys: textCache.getKeys(),
182+
ocr_keys: ocrCache.getKeys(),
183+
text_evictions: textCache.evictionCount,
184+
ocr_evictions: ocrCache.evictionCount,
185+
config: {
186+
text: { ...cacheOptions.text },
187+
ocr: { ...cacheOptions.ocr },
188+
},
95189
});
96190

97191
export const clearCache = (
@@ -110,3 +204,28 @@ export const clearCache = (
110204

111205
return { cleared_text: clearText, cleared_ocr: clearOcr };
112206
};
207+
208+
export const configureCache = (scope: CacheScope, options: Partial<CacheOptions>): void => {
209+
cacheOptions = { ...cacheOptions, [scope]: { ...cacheOptions[scope], ...options } } as Record<
210+
CacheScope,
211+
CacheOptions
212+
>;
213+
214+
if (scope === 'text') {
215+
textCache = buildCache<PdfPageText>('text');
216+
} else {
217+
ocrCache = buildCache<CachedOcrResult>('ocr');
218+
}
219+
};
220+
221+
export const resetCacheConfig = (): void => {
222+
cacheOptions = {
223+
text: { ...DEFAULT_CACHE_OPTIONS.text },
224+
ocr: { ...DEFAULT_CACHE_OPTIONS.ocr },
225+
};
226+
227+
textCache = buildCache<PdfPageText>('text');
228+
ocrCache = buildCache<CachedOcrResult>('ocr');
229+
};
230+
231+
export const getCacheConfig = (): Record<CacheScope, CacheOptions> => cacheOptions;

test/utils/cache.test.ts

Lines changed: 63 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,24 @@
1-
import { afterEach, describe, expect, it } from 'vitest';
2-
import { buildOcrProviderKey, clearCache, getCachedOcrText, setCachedOcrText } from '../../src/utils/cache.js';
1+
import { afterEach, describe, expect, it, vi } from 'vitest';
2+
import {
3+
buildOcrProviderKey,
4+
clearCache,
5+
configureCache,
6+
getCachedOcrText,
7+
getCachedPageText,
8+
getCacheStats,
9+
resetCacheConfig,
10+
setCachedOcrText,
11+
setCachedPageText,
12+
} from '../../src/utils/cache.js';
313

414
const fingerprint = 'fingerprint-123';
515
const targetBase = 'image-1-0';
616

717
describe('OCR cache provider awareness', () => {
818
afterEach(() => {
9-
clearCache('ocr');
19+
clearCache('all');
20+
resetCacheConfig();
21+
vi.useRealTimers();
1022
});
1123

1224
it('creates distinct cache entries for different providers', () => {
@@ -35,4 +47,52 @@ describe('OCR cache provider awareness', () => {
3547
expect(getCachedOcrText(fingerprint, keyWithDifferentScale)).toBeUndefined();
3648
expect(getCachedOcrText(fingerprint, keyWithScale)?.text).toBe('scaled');
3749
});
50+
51+
it('evicts least recently used entries when exceeding max size', () => {
52+
configureCache('ocr', { maxEntries: 2 });
53+
54+
const firstKey = `${targetBase}-a`;
55+
const secondKey = `${targetBase}-b`;
56+
const thirdKey = `${targetBase}-c`;
57+
58+
setCachedOcrText(fingerprint, firstKey, { text: 'first' });
59+
setCachedOcrText(fingerprint, secondKey, { text: 'second' });
60+
61+
// Access the first key to make it most recently used
62+
expect(getCachedOcrText(fingerprint, firstKey)?.text).toBe('first');
63+
64+
setCachedOcrText(fingerprint, thirdKey, { text: 'third' });
65+
66+
expect(getCachedOcrText(fingerprint, secondKey)).toBeUndefined();
67+
expect(getCachedOcrText(fingerprint, firstKey)?.text).toBe('first');
68+
expect(getCachedOcrText(fingerprint, thirdKey)?.text).toBe('third');
69+
70+
const stats = getCacheStats();
71+
expect(stats.ocr_entries).toBe(2);
72+
expect(stats.ocr_evictions).toBe(1);
73+
});
74+
75+
it('expires entries after TTL duration', () => {
76+
vi.useFakeTimers();
77+
configureCache('text', { maxEntries: 3, ttlMs: 1000 });
78+
79+
const pageOptions = {
80+
includeImageIndexes: false,
81+
preserveWhitespace: false,
82+
trimLines: true,
83+
} as const;
84+
85+
setCachedPageText(fingerprint, 1, pageOptions, {
86+
page_number: 1,
87+
page_index: 0,
88+
page_label: null,
89+
lines: [],
90+
text: 'temporary',
91+
});
92+
93+
vi.advanceTimersByTime(1500);
94+
95+
expect(getCachedPageText(fingerprint, 1, pageOptions)).toBeUndefined();
96+
expect(getCacheStats().text_evictions).toBe(1);
97+
});
3898
});

0 commit comments

Comments
 (0)