|
| 1 | +import { describe, it, expect, vi, beforeEach, afterEach } from "vitest" |
| 2 | +import { |
| 3 | + validateFileTokenBudget, |
| 4 | + truncateFileContent, |
| 5 | + FILE_SIZE_THRESHOLD, |
| 6 | + MAX_FILE_SIZE_FOR_TOKENIZATION, |
| 7 | + PREVIEW_SIZE_FOR_LARGE_FILES, |
| 8 | +} from "../fileTokenBudget" |
| 9 | + |
| 10 | +// Mock dependencies |
| 11 | +vi.mock("fs/promises", () => ({ |
| 12 | + stat: vi.fn(), |
| 13 | + readFile: vi.fn(), |
| 14 | + open: vi.fn(), |
| 15 | +})) |
| 16 | + |
| 17 | +vi.mock("../../../../utils/countTokens", () => ({ |
| 18 | + countTokens: vi.fn(), |
| 19 | +})) |
| 20 | + |
| 21 | +// Import after mocking |
| 22 | +const fs = await import("fs/promises") |
| 23 | +const { countTokens } = await import("../../../../utils/countTokens") |
| 24 | + |
| 25 | +const mockStat = vi.mocked(fs.stat) |
| 26 | +const mockReadFile = vi.mocked(fs.readFile) |
| 27 | +const mockOpen = vi.mocked(fs.open) |
| 28 | +const mockCountTokens = vi.mocked(countTokens) |
| 29 | + |
| 30 | +describe("fileTokenBudget", () => { |
| 31 | + beforeEach(() => { |
| 32 | + vi.clearAllMocks() |
| 33 | + mockOpen.mockReset() |
| 34 | + }) |
| 35 | + |
| 36 | + afterEach(() => { |
| 37 | + vi.restoreAllMocks() |
| 38 | + }) |
| 39 | + |
| 40 | + describe("validateFileTokenBudget", () => { |
| 41 | + it("should not truncate files smaller than FILE_SIZE_THRESHOLD", async () => { |
| 42 | + const filePath = "/test/small-file.txt" |
| 43 | + const contextWindow = 200000 |
| 44 | + const currentTokens = 10000 |
| 45 | + |
| 46 | + // Mock file stats - small file (50KB) |
| 47 | + mockStat.mockResolvedValue({ |
| 48 | + size: 50000, |
| 49 | + } as any) |
| 50 | + |
| 51 | + const result = await validateFileTokenBudget(filePath, contextWindow, currentTokens) |
| 52 | + |
| 53 | + expect(result.shouldTruncate).toBe(false) |
| 54 | + expect(mockReadFile).not.toHaveBeenCalled() |
| 55 | + expect(mockCountTokens).not.toHaveBeenCalled() |
| 56 | + }) |
| 57 | + |
| 58 | + it("should validate and not truncate large files that fit within budget", async () => { |
| 59 | + const filePath = "/test/large-file.txt" |
| 60 | + const contextWindow = 200000 |
| 61 | + const currentTokens = 10000 |
| 62 | + const fileContent = "x".repeat(150000) // 150KB file |
| 63 | + |
| 64 | + // Mock file stats - large file (150KB) |
| 65 | + mockStat.mockResolvedValue({ |
| 66 | + size: 150000, |
| 67 | + } as any) |
| 68 | + |
| 69 | + // Mock file read |
| 70 | + mockReadFile.mockResolvedValue(fileContent) |
| 71 | + |
| 72 | + // Mock token counting - file uses 30k tokens (within 60% of 190k remaining = 114k budget) |
| 73 | + mockCountTokens.mockResolvedValue(30000) |
| 74 | + |
| 75 | + const result = await validateFileTokenBudget(filePath, contextWindow, currentTokens) |
| 76 | + |
| 77 | + expect(result.shouldTruncate).toBe(false) |
| 78 | + expect(mockReadFile).toHaveBeenCalledWith(filePath, "utf-8") |
| 79 | + expect(mockCountTokens).toHaveBeenCalled() |
| 80 | + }) |
| 81 | + |
| 82 | + it("should truncate large files that exceed token budget", async () => { |
| 83 | + const filePath = "/test/huge-file.txt" |
| 84 | + const contextWindow = 200000 |
| 85 | + const currentTokens = 10000 |
| 86 | + const fileContent = "x".repeat(500000) // 500KB file |
| 87 | + |
| 88 | + // Mock file stats - huge file (500KB) |
| 89 | + mockStat.mockResolvedValue({ |
| 90 | + size: 500000, |
| 91 | + } as any) |
| 92 | + |
| 93 | + // Mock file read |
| 94 | + mockReadFile.mockResolvedValue(fileContent) |
| 95 | + |
| 96 | + // Mock token counting - file uses 150k tokens (exceeds 60% of 190k remaining = 114k budget) |
| 97 | + mockCountTokens.mockResolvedValue(150000) |
| 98 | + |
| 99 | + const result = await validateFileTokenBudget(filePath, contextWindow, currentTokens) |
| 100 | + |
| 101 | + expect(result.shouldTruncate).toBe(true) |
| 102 | + expect(result.maxChars).toBeDefined() |
| 103 | + expect(result.maxChars).toBeGreaterThan(0) |
| 104 | + expect(result.reason).toContain("150000 tokens") |
| 105 | + expect(result.reason).toContain("114000 tokens available") |
| 106 | + }) |
| 107 | + |
| 108 | + it("should handle case where no budget is available", async () => { |
| 109 | + const filePath = "/test/file.txt" |
| 110 | + const contextWindow = 200000 |
| 111 | + const currentTokens = 200000 // Context is full |
| 112 | + |
| 113 | + // Mock file stats - large file |
| 114 | + mockStat.mockResolvedValue({ |
| 115 | + size: 150000, |
| 116 | + } as any) |
| 117 | + |
| 118 | + const result = await validateFileTokenBudget(filePath, contextWindow, currentTokens) |
| 119 | + |
| 120 | + expect(result.shouldTruncate).toBe(true) |
| 121 | + expect(result.maxChars).toBe(0) |
| 122 | + expect(result.reason).toContain("No available context budget") |
| 123 | + }) |
| 124 | + |
| 125 | + it("should handle errors gracefully and not truncate", async () => { |
| 126 | + const filePath = "/test/error-file.txt" |
| 127 | + const contextWindow = 200000 |
| 128 | + const currentTokens = 10000 |
| 129 | + |
| 130 | + // Mock file stats to throw an error |
| 131 | + mockStat.mockRejectedValue(new Error("File not found")) |
| 132 | + |
| 133 | + const result = await validateFileTokenBudget(filePath, contextWindow, currentTokens) |
| 134 | + |
| 135 | + expect(result.shouldTruncate).toBe(false) |
| 136 | + }) |
| 137 | + |
| 138 | + it("should calculate correct token budget with 60/40 split", async () => { |
| 139 | + const filePath = "/test/file.txt" |
| 140 | + const contextWindow = 100000 |
| 141 | + const currentTokens = 20000 // 80k remaining |
| 142 | + const fileContent = "test content" |
| 143 | + |
| 144 | + mockStat.mockResolvedValue({ size: 150000 } as any) |
| 145 | + mockReadFile.mockResolvedValue(fileContent) |
| 146 | + |
| 147 | + // Available budget should be: (100000 - 20000) * 0.6 = 48000 |
| 148 | + // File uses 50k tokens, should be truncated |
| 149 | + mockCountTokens.mockResolvedValue(50000) |
| 150 | + |
| 151 | + const result = await validateFileTokenBudget(filePath, contextWindow, currentTokens) |
| 152 | + |
| 153 | + expect(result.shouldTruncate).toBe(true) |
| 154 | + // maxChars should be approximately 48000 * 3 = 144000 |
| 155 | + expect(result.maxChars).toBe(144000) |
| 156 | + }) |
| 157 | + |
| 158 | + it("should validate files at the FILE_SIZE_THRESHOLD boundary", async () => { |
| 159 | + const filePath = "/test/boundary-file.txt" |
| 160 | + const contextWindow = 200000 |
| 161 | + const currentTokens = 10000 |
| 162 | + const fileContent = "x".repeat(1000) |
| 163 | + |
| 164 | + // Mock file stats - exactly at threshold (should trigger validation) |
| 165 | + mockStat.mockResolvedValue({ |
| 166 | + size: FILE_SIZE_THRESHOLD, |
| 167 | + } as any) |
| 168 | + |
| 169 | + mockReadFile.mockResolvedValue(fileContent) |
| 170 | + mockCountTokens.mockResolvedValue(30000) // Within budget |
| 171 | + |
| 172 | + const result = await validateFileTokenBudget(filePath, contextWindow, currentTokens) |
| 173 | + |
| 174 | + // At exactly the threshold, it should validate |
| 175 | + expect(mockReadFile).toHaveBeenCalled() |
| 176 | + expect(mockCountTokens).toHaveBeenCalled() |
| 177 | + expect(result.shouldTruncate).toBe(false) |
| 178 | + }) |
| 179 | + |
| 180 | + it("should provide preview for files exceeding MAX_FILE_SIZE_FOR_TOKENIZATION", async () => { |
| 181 | + const filePath = "/test/huge-file.txt" |
| 182 | + const contextWindow = 200000 |
| 183 | + const currentTokens = 10000 |
| 184 | + const previewContent = "x".repeat(PREVIEW_SIZE_FOR_LARGE_FILES) |
| 185 | + |
| 186 | + // Mock file stats - file exceeds max tokenization size (e.g., 10MB when max is 5MB) |
| 187 | + mockStat.mockResolvedValue({ |
| 188 | + size: MAX_FILE_SIZE_FOR_TOKENIZATION + 1000000, // 1MB over the limit |
| 189 | + } as any) |
| 190 | + |
| 191 | + // Mock file.open and read for preview |
| 192 | + const mockRead = vi.fn().mockResolvedValue({ |
| 193 | + bytesRead: PREVIEW_SIZE_FOR_LARGE_FILES, |
| 194 | + }) |
| 195 | + const mockClose = vi.fn().mockResolvedValue(undefined) |
| 196 | + mockOpen.mockResolvedValue({ |
| 197 | + read: mockRead, |
| 198 | + close: mockClose, |
| 199 | + } as any) |
| 200 | + |
| 201 | + // Mock token counting for the preview |
| 202 | + mockCountTokens.mockResolvedValue(30000) // Preview fits within budget |
| 203 | + |
| 204 | + const result = await validateFileTokenBudget(filePath, contextWindow, currentTokens) |
| 205 | + |
| 206 | + expect(result.shouldTruncate).toBe(true) |
| 207 | + expect(result.isPreview).toBe(true) |
| 208 | + expect(result.reason).toContain("too large") |
| 209 | + expect(result.reason).toContain("preview") |
| 210 | + // Should read preview and count tokens |
| 211 | + expect(mockOpen).toHaveBeenCalled() |
| 212 | + expect(mockCountTokens).toHaveBeenCalled() |
| 213 | + }) |
| 214 | + |
| 215 | + it("should handle files exactly at MAX_FILE_SIZE_FOR_TOKENIZATION boundary", async () => { |
| 216 | + const filePath = "/test/boundary-file.txt" |
| 217 | + const contextWindow = 200000 |
| 218 | + const currentTokens = 10000 |
| 219 | + const fileContent = "x".repeat(1000) |
| 220 | + |
| 221 | + // Mock file stats - exactly at max size |
| 222 | + mockStat.mockResolvedValue({ |
| 223 | + size: MAX_FILE_SIZE_FOR_TOKENIZATION, |
| 224 | + } as any) |
| 225 | + |
| 226 | + mockReadFile.mockResolvedValue(fileContent) |
| 227 | + mockCountTokens.mockResolvedValue(30000) // Within budget |
| 228 | + |
| 229 | + const result = await validateFileTokenBudget(filePath, contextWindow, currentTokens) |
| 230 | + |
| 231 | + // At exactly the limit, should still attempt to tokenize |
| 232 | + expect(mockReadFile).toHaveBeenCalled() |
| 233 | + expect(mockCountTokens).toHaveBeenCalled() |
| 234 | + }) |
| 235 | + |
| 236 | + it("should handle tokenizer unreachable errors gracefully", async () => { |
| 237 | + const filePath = "/test/problematic-file.txt" |
| 238 | + const contextWindow = 200000 |
| 239 | + const currentTokens = 10000 |
| 240 | + const fileContent = "x".repeat(200000) // Content that might cause issues |
| 241 | + |
| 242 | + // Mock file stats - within size limits but content causes tokenizer crash |
| 243 | + mockStat.mockResolvedValue({ |
| 244 | + size: 200000, |
| 245 | + } as any) |
| 246 | + |
| 247 | + mockReadFile.mockResolvedValue(fileContent) |
| 248 | + // Simulate tokenizer "unreachable" error |
| 249 | + mockCountTokens.mockRejectedValue(new Error("unreachable")) |
| 250 | + |
| 251 | + const result = await validateFileTokenBudget(filePath, contextWindow, currentTokens) |
| 252 | + |
| 253 | + // Should fallback with conservative estimation |
| 254 | + const remainingTokens = contextWindow - currentTokens |
| 255 | + const safeReadBudget = Math.floor(remainingTokens * 0.6) // 114000 |
| 256 | + |
| 257 | + expect(result.shouldTruncate).toBe(true) |
| 258 | + expect(result.isPreview).toBe(true) |
| 259 | + expect(result.reason).toContain("tokenizer error") |
| 260 | + |
| 261 | + // The actual maxChars depends on conservative estimation |
| 262 | + // content.length (200000) is used as estimate since tokenizer failed |
| 263 | + expect(result.maxChars).toBeDefined() |
| 264 | + expect(typeof result.maxChars).toBe("number") |
| 265 | + }) |
| 266 | + |
| 267 | + it("should handle other tokenizer errors conservatively", async () => { |
| 268 | + const filePath = "/test/error-file.txt" |
| 269 | + const contextWindow = 200000 |
| 270 | + const currentTokens = 10000 |
| 271 | + const fileContent = "test content" |
| 272 | + |
| 273 | + mockStat.mockResolvedValue({ size: 150000 } as any) |
| 274 | + mockReadFile.mockResolvedValue(fileContent) |
| 275 | + // Simulate a different error |
| 276 | + mockCountTokens.mockRejectedValue(new Error("Network error")) |
| 277 | + |
| 278 | + const result = await validateFileTokenBudget(filePath, contextWindow, currentTokens) |
| 279 | + |
| 280 | + // Should return safe fallback (don't truncate, let normal error handling take over) |
| 281 | + expect(result.shouldTruncate).toBe(false) |
| 282 | + }) |
| 283 | + }) |
| 284 | + |
| 285 | + describe("truncateFileContent", () => { |
| 286 | + it("should truncate content to specified character limit", () => { |
| 287 | + const content = "a".repeat(1000) |
| 288 | + const maxChars = 500 |
| 289 | + const totalChars = 1000 |
| 290 | + |
| 291 | + const result = truncateFileContent(content, maxChars, totalChars, false) |
| 292 | + |
| 293 | + expect(result.content).toHaveLength(500) |
| 294 | + expect(result.content).toBe("a".repeat(500)) |
| 295 | + expect(result.notice).toContain("500 of 1000 characters") |
| 296 | + expect(result.notice).toContain("context limitations") |
| 297 | + }) |
| 298 | + |
| 299 | + it("should show preview message for large files", () => { |
| 300 | + const content = "x".repeat(10000000) // ~10MB (9.54MB in binary) |
| 301 | + const maxChars = 100000 // 100KB preview |
| 302 | + const totalChars = 10000000 |
| 303 | + |
| 304 | + const result = truncateFileContent(content, maxChars, totalChars, true) |
| 305 | + |
| 306 | + expect(result.content).toHaveLength(maxChars) |
| 307 | + expect(result.notice).toContain("Preview") |
| 308 | + expect(result.notice).toContain("0.1MB") // 100KB = 0.1MB |
| 309 | + expect(result.notice).toContain("9.54MB") // Binary MB calculation |
| 310 | + expect(result.notice).toContain("line_range") |
| 311 | + }) |
| 312 | + |
| 313 | + it("should include helpful notice about using line_range", () => { |
| 314 | + const content = "test content that is very long" |
| 315 | + const maxChars = 10 |
| 316 | + const totalChars = 31 |
| 317 | + |
| 318 | + const result = truncateFileContent(content, maxChars, totalChars) |
| 319 | + |
| 320 | + expect(result.notice).toContain("line_range") |
| 321 | + expect(result.notice).toContain("specific sections") |
| 322 | + }) |
| 323 | + |
| 324 | + it("should handle empty content", () => { |
| 325 | + const content = "" |
| 326 | + const maxChars = 100 |
| 327 | + const totalChars = 0 |
| 328 | + |
| 329 | + const result = truncateFileContent(content, maxChars, totalChars) |
| 330 | + |
| 331 | + expect(result.content).toBe("") |
| 332 | + expect(result.notice).toContain("0 of 0 characters") |
| 333 | + }) |
| 334 | + |
| 335 | + it("should truncate multi-line content correctly", () => { |
| 336 | + const content = "line1\nline2\nline3\nline4\nline5" |
| 337 | + const maxChars = 15 |
| 338 | + const totalChars = content.length |
| 339 | + |
| 340 | + const result = truncateFileContent(content, maxChars, totalChars) |
| 341 | + |
| 342 | + expect(result.content).toBe("line1\nline2\nlin") |
| 343 | + expect(result.content).toHaveLength(15) |
| 344 | + }) |
| 345 | + |
| 346 | + it("should work with unicode characters", () => { |
| 347 | + const content = "Hello 😀 World 🌍 Test 🎉" |
| 348 | + const maxChars = 10 |
| 349 | + const totalChars = content.length |
| 350 | + |
| 351 | + const result = truncateFileContent(content, maxChars, totalChars) |
| 352 | + |
| 353 | + expect(result.content).toHaveLength(10) |
| 354 | + expect(result.notice).toBeDefined() |
| 355 | + }) |
| 356 | + }) |
| 357 | +}) |
0 commit comments