diff --git a/src/tokenizers.js b/src/tokenizers.js index cc61f17a4..cea994de2 100644 --- a/src/tokenizers.js +++ b/src/tokenizers.js @@ -43,6 +43,7 @@ import { TokenLattice, CharTrie, DictionarySplitter, + LRUCache, } from './utils/data-structures.js'; import { Template } from '@huggingface/jinja'; @@ -727,8 +728,24 @@ class BPE extends TokenizerModel { this.ignore_merges = this.config.ignore_merges ?? false; - /** @type {Map} */ - this.cache = new Map(); + /** + * The maximum length we should cache in a model. + * Strings that are too long have minimal chances to cache hit anyway + */ + this.max_length_to_cache = 256; + + /** + * The default capacity for a `BPE`'s internal cache. + */ + this.cache_capacity = 10000; + this.cache = new LRUCache(this.cache_capacity); + } + + /** + * Clears the cache. + */ + clear_cache() { + this.cache.clear(); } /** @@ -855,8 +872,10 @@ class BPE extends TokenizerModel { } } - // Save the result to the cache - this.cache.set(token, result); + if (token.length < this.max_length_to_cache) { + // Save the result to the cache + this.cache.put(token, result); + } return result; } diff --git a/src/utils/data-structures.js b/src/utils/data-structures.js index 83b874626..f9eb04913 100644 --- a/src/utils/data-structures.js +++ b/src/utils/data-structures.js @@ -519,3 +519,56 @@ export class DictionarySplitter { return result; } } + +/** +* A simple Least Recently Used (LRU) cache implementation in JavaScript. +* This cache stores key-value pairs and evicts the least recently used item +* when the capacity is exceeded. +*/ +export class LRUCache { + /** + * Creates an LRUCache instance. + * @param {number} capacity The maximum number of items the cache can hold. + */ + constructor(capacity) { + this.capacity = capacity; + this.cache = new Map(); + } + + /** + * Retrieves the value associated with the given key and marks the key as recently used. + * @param {any} key The key to retrieve. + * @returns {any} The value associated with the key, or undefined if the key does not exist. + */ + get(key) { + if (!this.cache.has(key)) return undefined; + const value = this.cache.get(key); + this.cache.delete(key); + this.cache.set(key, value); + return value; + } + + /** + * Inserts or updates the key-value pair in the cache. + * If the key already exists, it is updated and marked as recently used. + * If the cache exceeds its capacity, the least recently used item is evicted. + * @param {any} key The key to add or update. + * @param {any} value The value to associate with the key. + */ + put(key, value) { + if (this.cache.has(key)) { + this.cache.delete(key); + } + this.cache.set(key, value); + if (this.cache.size > this.capacity) { + this.cache.delete(this.cache.keys().next().value); + } + } + + /** + * Clears the cache. + */ + clear() { + this.cache.clear(); + } +} diff --git a/tests/tokenizers.test.js b/tests/tokenizers.test.js index 2742513ee..f4f4a263b 100644 --- a/tests/tokenizers.test.js +++ b/tests/tokenizers.test.js @@ -302,6 +302,23 @@ describe("Edge cases", () => { }, 5000); // NOTE: 5 seconds }); +describe("Memory leak tests", () => { + it("should not leak memory", async () => { + const id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"; + const tokenizer = await AutoTokenizer.from_pretrained(id); + + const startMemory = process.memoryUsage().heapUsed; + for (let i = 0; i < 2500; ++i) { + const s = `${i} ${i} `.repeat(i); + tokenizer.encode(s); + } + const endMemory = process.memoryUsage().heapUsed; + const memoryUsed = endMemory - startMemory; + const memoryLimit = 100 * 1024 * 1024; // 100 MB + expect(memoryUsed).toBeLessThan(memoryLimit); + }, 30000); // Increase timeout to accommodate the memory leak test +}); + describe("Extra decoding tests", () => { it( "should be able to decode the output of encode", diff --git a/tests/utils/data_structures.test.js b/tests/utils/data_structures.test.js index 504cef217..0a1f3a0fc 100644 --- a/tests/utils/data_structures.test.js +++ b/tests/utils/data_structures.test.js @@ -1,4 +1,4 @@ -import { PriorityQueue, DictionarySplitter } from "../../src/utils/data-structures.js"; +import { PriorityQueue, DictionarySplitter, LRUCache } from "../../src/utils/data-structures.js"; describe("Priority queue", () => { const EXAMPLE_ARRAY = [2, 5, 3, 1, 4]; @@ -49,3 +49,73 @@ describe("Dictionary splitter", () => { expect(result).toEqual(expected); }); }); + +describe("LRUCache", () => { + it("should return undefined for non-existent keys", () => { + const cache = new LRUCache(2); + expect(cache.get("nonexistent")).toEqual(undefined); + }); + + it("should store and retrieve values correctly", () => { + const cache = new LRUCache(2); + cache.put("a", 1); + cache.put("b", 2); + expect(cache.get("a")).toEqual(1); + expect(cache.get("b")).toEqual(2); + }); + + it("should update the value and refresh the usage", () => { + const cache = new LRUCache(2); + cache.put("a", 1); + cache.put("b", 2); + // Update key "a" + cache.put("a", 10); + expect(cache.get("a")).toEqual(10); + // Access "a" so "b" becomes the LRU + cache.get("a"); + cache.put("c", 3); + // "b" should be evicted since it is the least recently used. + expect(cache.get("b")).toEqual(undefined); + expect(cache.get("c")).toEqual(3); + }); + + it("should evict the least recently used item when capacity is exceeded", () => { + const cache = new LRUCache(3); + cache.put("a", 1); + cache.put("b", 2); + cache.put("c", 3); + // Access "a" to refresh its recentness. + cache.get("a"); + // Insert a new key, this should evict "b" as it is the least recently used. + cache.put("d", 4); + expect(cache.get("b")).toEqual(undefined); + expect(cache.get("a")).toEqual(1); + expect(cache.get("c")).toEqual(3); + expect(cache.get("d")).toEqual(4); + }); + + it("should update the usage order on get", () => { + const cache = new LRUCache(3); + cache.put("a", "apple"); + cache.put("b", "banana"); + cache.put("c", "cherry"); + // Access "a" making it most recently used. + expect(cache.get("a")).toEqual("apple"); + // Insert new element to evict the least recently used ("b"). + cache.put("d", "date"); + expect(cache.get("b")).toEqual(undefined); + // "a", "c", and "d" should be present. + expect(cache.get("a")).toEqual("apple"); + expect(cache.get("c")).toEqual("cherry"); + expect(cache.get("d")).toEqual("date"); + }); + + it("should clear the cache", () => { + const cache = new LRUCache(2); + cache.put("a", 1); + cache.put("b", 2); + cache.clear(); + expect(cache.get("a")).toEqual(undefined); + expect(cache.get("b")).toEqual(undefined); + }); +});