|
| 1 | +import { fc, test as fcTest } from '@fast-check/vitest'; |
1 | 2 | import { TfidfIndex } from './tfidf-index'; |
2 | 3 |
|
3 | 4 | describe('TF-IDF Index - Core Functionality', () => { |
@@ -247,3 +248,114 @@ describe('TF-IDF Index - IDF Calculation', () => { |
247 | 248 | expect(rareResults[0]?.score ?? 0).toBeGreaterThan(0); |
248 | 249 | }); |
249 | 250 | }); |
| 251 | + |
| 252 | +describe('TF-IDF Index - Property-Based Tests', () => { |
| 253 | + // Arbitrary for generating document corpora |
| 254 | + const documentArbitrary = fc.record({ |
| 255 | + id: fc.string({ minLength: 1, maxLength: 20 }).filter((s) => s.trim().length > 0), |
| 256 | + text: fc.string({ minLength: 1, maxLength: 200 }), |
| 257 | + }); |
| 258 | + |
| 259 | + const corpusArbitrary = fc.array(documentArbitrary, { minLength: 1, maxLength: 20 }); |
| 260 | + |
| 261 | + // Arbitrary for generating non-empty queries with alphanumeric content |
| 262 | + const queryArbitrary = fc |
| 263 | + .array(fc.stringMatching(/^[a-zA-Z][a-zA-Z0-9]*$/), { minLength: 1, maxLength: 5 }) |
| 264 | + .map((words) => words.join(' ')); |
| 265 | + |
| 266 | + fcTest.prop([corpusArbitrary, queryArbitrary], { numRuns: 100 })( |
| 267 | + 'scores are always within [0, 1] range', |
| 268 | + (corpus, query) => { |
| 269 | + const index = new TfidfIndex(); |
| 270 | + index.build(corpus); |
| 271 | + const results = index.search(query, 100); |
| 272 | + |
| 273 | + for (const result of results) { |
| 274 | + expect(result.score).toBeGreaterThanOrEqual(0); |
| 275 | + expect(result.score).toBeLessThanOrEqual(1); |
| 276 | + } |
| 277 | + }, |
| 278 | + ); |
| 279 | + |
| 280 | + fcTest.prop([corpusArbitrary, queryArbitrary], { numRuns: 100 })( |
| 281 | + 'results are always sorted by score in descending order', |
| 282 | + (corpus, query) => { |
| 283 | + const index = new TfidfIndex(); |
| 284 | + index.build(corpus); |
| 285 | + const results = index.search(query, 100); |
| 286 | + |
| 287 | + for (let i = 0; i < results.length - 1; i++) { |
| 288 | + expect(results[i]?.score ?? 0).toBeGreaterThanOrEqual(results[i + 1]?.score ?? 0); |
| 289 | + } |
| 290 | + }, |
| 291 | + ); |
| 292 | + |
| 293 | + fcTest.prop([corpusArbitrary, queryArbitrary, fc.integer({ min: 1, max: 50 })], { numRuns: 100 })( |
| 294 | + 'search returns at most k results', |
| 295 | + (corpus, query, k) => { |
| 296 | + const index = new TfidfIndex(); |
| 297 | + index.build(corpus); |
| 298 | + const results = index.search(query, k); |
| 299 | + |
| 300 | + expect(results.length).toBeLessThanOrEqual(k); |
| 301 | + }, |
| 302 | + ); |
| 303 | + |
| 304 | + fcTest.prop([corpusArbitrary, queryArbitrary], { numRuns: 100 })( |
| 305 | + 'search is case-insensitive (same results for different cases)', |
| 306 | + (corpus, query) => { |
| 307 | + const index = new TfidfIndex(); |
| 308 | + index.build(corpus); |
| 309 | + |
| 310 | + const lowerResults = index.search(query.toLowerCase()); |
| 311 | + const upperResults = index.search(query.toUpperCase()); |
| 312 | + |
| 313 | + expect(lowerResults.length).toBe(upperResults.length); |
| 314 | + for (let i = 0; i < lowerResults.length; i++) { |
| 315 | + expect(lowerResults[i]?.id).toBe(upperResults[i]?.id); |
| 316 | + expect(lowerResults[i]?.score).toBeCloseTo(upperResults[i]?.score ?? 0, 10); |
| 317 | + } |
| 318 | + }, |
| 319 | + ); |
| 320 | + |
| 321 | + fcTest.prop([queryArbitrary], { numRuns: 50 })( |
| 322 | + 'empty corpus always returns empty results', |
| 323 | + (query) => { |
| 324 | + const index = new TfidfIndex(); |
| 325 | + index.build([]); |
| 326 | + const results = index.search(query); |
| 327 | + |
| 328 | + expect(results).toHaveLength(0); |
| 329 | + }, |
| 330 | + ); |
| 331 | + |
| 332 | + fcTest.prop([corpusArbitrary, queryArbitrary], { numRuns: 100 })( |
| 333 | + 'result IDs are always from the indexed corpus', |
| 334 | + (corpus, query) => { |
| 335 | + const index = new TfidfIndex(); |
| 336 | + index.build(corpus); |
| 337 | + const results = index.search(query, 100); |
| 338 | + |
| 339 | + const corpusIds = new Set(corpus.map((doc) => doc.id)); |
| 340 | + for (const result of results) { |
| 341 | + expect(corpusIds.has(result.id)).toBe(true); |
| 342 | + } |
| 343 | + }, |
| 344 | + ); |
| 345 | + |
| 346 | + fcTest.prop([corpusArbitrary, queryArbitrary], { numRuns: 50 })( |
| 347 | + 'search is deterministic (same input produces same output)', |
| 348 | + (corpus, query) => { |
| 349 | + const index1 = new TfidfIndex(); |
| 350 | + const index2 = new TfidfIndex(); |
| 351 | + |
| 352 | + index1.build(corpus); |
| 353 | + index2.build(corpus); |
| 354 | + |
| 355 | + const results1 = index1.search(query, 10); |
| 356 | + const results2 = index2.search(query, 10); |
| 357 | + |
| 358 | + expect(results1).toEqual(results2); |
| 359 | + }, |
| 360 | + ); |
| 361 | +}); |
0 commit comments