@@ -32,115 +32,20 @@ describe('TF-IDF Index - Core Functionality', () => {
3232 expect ( result ?. score ?? 0 ) . toBeGreaterThan ( 0 ) ;
3333 } ) ;
3434
35- test ( 'returns no matches when query shares no terms with the corpus' , ( ) => {
36- const index = new TfidfIndex ( ) ;
37- index . build ( [
38- { id : 'doc1' , text : 'generate billing statement' } ,
39- { id : 'doc2' , text : 'update user profile' } ,
40- ] ) ;
41-
42- const results = index . search ( 'predict weather forecast' ) ;
43-
44- expect ( results ) . toHaveLength ( 0 ) ;
45- } ) ;
46- } ) ;
47-
48- describe ( 'TF-IDF Index - Score Validation' , ( ) => {
49- test ( 'returns scores within [0, 1] range' , ( ) => {
50- const index = new TfidfIndex ( ) ;
51- index . build ( [
52- { id : 'doc1' , text : 'employee management system' } ,
53- { id : 'doc2' , text : 'employee database records' } ,
54- { id : 'doc3' , text : 'candidate tracking application' } ,
55- ] ) ;
56-
57- const results = index . search ( 'employee' , 10 ) ;
58-
59- expect ( results . length ) . toBeGreaterThan ( 0 ) ;
60- for ( const result of results ) {
61- expect ( result . score ) . toBeGreaterThanOrEqual ( 0 ) ;
62- expect ( result . score ) . toBeLessThanOrEqual ( 1 ) ;
63- }
64- } ) ;
65-
66- test ( 'sorts results by score in descending order' , ( ) => {
67- const index = new TfidfIndex ( ) ;
68- index . build ( [
69- { id : 'doc1' , text : 'create employee' } ,
70- { id : 'doc2' , text : 'employee employee' } ,
71- { id : 'doc3' , text : 'list employee data' } ,
72- ] ) ;
73-
74- const results = index . search ( 'employee' , 10 ) ;
75-
76- expect ( results . length ) . toBeGreaterThan ( 1 ) ;
77- for ( let i = 0 ; i < results . length - 1 ; i ++ ) {
78- expect ( results [ i ] ?. score ?? 0 ) . toBeGreaterThanOrEqual ( results [ i + 1 ] ?. score ?? 0 ) ;
79- }
80- } ) ;
81- } ) ;
82-
83- describe ( 'TF-IDF Index - Edge Cases' , ( ) => {
84- test ( 'handles empty query' , ( ) => {
85- const index = new TfidfIndex ( ) ;
86- index . build ( [ { id : 'doc1' , text : 'some text' } ] ) ;
87-
88- const results = index . search ( '' ) ;
89-
90- expect ( results ) . toHaveLength ( 0 ) ;
91- } ) ;
92-
93- test ( 'handles empty corpus' , ( ) => {
94- const index = new TfidfIndex ( ) ;
95- index . build ( [ ] ) ;
96-
97- const results = index . search ( 'test query' ) ;
98-
99- expect ( results ) . toHaveLength ( 0 ) ;
100- } ) ;
101-
102- test ( 'handles single document corpus' , ( ) => {
103- const index = new TfidfIndex ( ) ;
104- index . build ( [ { id : 'doc1' , text : 'unique document' } ] ) ;
105-
106- const results = index . search ( 'document' ) ;
107-
108- expect ( results ) . toHaveLength ( 1 ) ;
109- expect ( results [ 0 ] ?. id ) . toBe ( 'doc1' ) ;
110- expect ( results [ 0 ] ?. score ?? 0 ) . toBeGreaterThan ( 0 ) ;
111- } ) ;
112-
113- test ( 'handles query with only stopwords' , ( ) => {
114- const index = new TfidfIndex ( ) ;
115- index . build ( [
116- { id : 'doc1' , text : 'important content here' } ,
117- { id : 'doc2' , text : 'another document' } ,
118- ] ) ;
119-
120- const results = index . search ( 'the and or but' ) ;
121-
122- expect ( results ) . toHaveLength ( 0 ) ;
123- } ) ;
124- } ) ;
125-
126- describe ( 'TF-IDF Index - Case Sensitivity' , ( ) => {
127- test ( 'performs case-insensitive search' , ( ) => {
35+ test ( 'assigns higher IDF to rare terms' , ( ) => {
12836 const index = new TfidfIndex ( ) ;
12937 index . build ( [
130- { id : 'doc1' , text : 'EMPLOYEE record' } ,
131- { id : 'doc2' , text : 'candidate profile' } ,
38+ { id : 'doc1' , text : 'common term appears everywhere' } ,
39+ { id : 'doc2' , text : 'common term appears here' } ,
40+ { id : 'doc3' , text : 'common term and rare word' } ,
13241 ] ) ;
13342
134- const resultsLower = index . search ( 'employee' ) ;
135- const resultsUpper = index . search ( 'EMPLOYEE' ) ;
136- const resultsMixed = index . search ( 'EmPlOyEe' ) ;
43+ const rareResults = index . search ( 'rare' ) ;
44+ const commonResults = index . search ( 'common' ) ;
13745
138- expect ( resultsLower . length ) . toBeGreaterThan ( 0 ) ;
139- expect ( resultsUpper . length ) . toBe ( resultsLower . length ) ;
140- expect ( resultsMixed . length ) . toBe ( resultsLower . length ) ;
141- expect ( resultsLower [ 0 ] ?. id ) . toBe ( 'doc1' ) ;
142- expect ( resultsUpper [ 0 ] ?. id ) . toBe ( 'doc1' ) ;
143- expect ( resultsMixed [ 0 ] ?. id ) . toBe ( 'doc1' ) ;
46+ expect ( rareResults . length ) . toBeGreaterThan ( 0 ) ;
47+ expect ( commonResults . length ) . toBeGreaterThan ( 0 ) ;
48+ expect ( rareResults [ 0 ] ?. score ?? 0 ) . toBeGreaterThan ( 0 ) ;
14449 } ) ;
14550} ) ;
14651
@@ -153,32 +58,13 @@ describe('TF-IDF Index - Tool Name Scenarios', () => {
15358 { id : 'workday_create_candidate' , text : 'workday_create_candidate create candidate workday' } ,
15459 ] ) ;
15560
156- // Search for terms that appear in tool names
15761 const results = index . search ( 'bamboohr create employee' ) ;
15862
15963 expect ( results . length ) . toBeGreaterThan ( 0 ) ;
160- // The BambooHR create employee tool should be highly ranked
16164 const topIds = results . slice ( 0 , 2 ) . map ( ( r ) => r . id ) ;
16265 expect ( topIds ) . toContain ( 'bamboohr_create_employee' ) ;
16366 } ) ;
16467
165- test ( 'finds relevant tools with multiple query terms' , ( ) => {
166- const index = new TfidfIndex ( ) ;
167- index . build ( [
168- { id : 'bamboohr_create_employee' , text : 'create employee bamboohr system' } ,
169- { id : 'bamboohr_list_employees' , text : 'list employees bamboohr system' } ,
170- { id : 'workday_create_candidate' , text : 'create candidate workday system' } ,
171- { id : 'salesforce_list_contacts' , text : 'list contacts salesforce system' } ,
172- ] ) ;
173-
174- const results = index . search ( 'employee bamboohr' ) ;
175-
176- expect ( results . length ) . toBeGreaterThan ( 0 ) ;
177- // BambooHR employee tools should be top ranked
178- const topIds = results . slice ( 0 , 2 ) . map ( ( r ) => r . id ) ;
179- expect ( topIds . some ( ( id ) => id . includes ( 'bamboohr' ) && id . includes ( 'employee' ) ) ) . toBe ( true ) ;
180- } ) ;
181-
18268 test ( 'ranks by action type (create, list, etc)' , ( ) => {
18369 const index = new TfidfIndex ( ) ;
18470 index . build ( [
@@ -191,74 +77,18 @@ describe('TF-IDF Index - Tool Name Scenarios', () => {
19177 const results = index . search ( 'create employee' ) ;
19278
19379 expect ( results . length ) . toBeGreaterThan ( 0 ) ;
194- // create_employee should be top result
19580 expect ( results [ 0 ] ?. id ) . toBe ( 'bamboohr_create_employee' ) ;
19681 } ) ;
19782} ) ;
19883
199- describe ( 'TF-IDF Index - Search Limits' , ( ) => {
200- test ( 'respects k parameter limit' , ( ) => {
201- const index = new TfidfIndex ( ) ;
202- index . build ( [
203- { id : 'doc1' , text : 'alpha' } ,
204- { id : 'doc2' , text : 'alpha beta' } ,
205- { id : 'doc3' , text : 'alpha gamma' } ,
206- { id : 'doc4' , text : 'alpha delta' } ,
207- { id : 'doc5' , text : 'alpha epsilon' } ,
208- ] ) ;
209-
210- const results = index . search ( 'alpha' , 2 ) ;
211-
212- expect ( results . length ) . toBeLessThanOrEqual ( 2 ) ;
213- } ) ;
214-
215- test ( 'returns all matches when k exceeds corpus size' , ( ) => {
216- const index = new TfidfIndex ( ) ;
217- index . build ( [
218- { id : 'doc1' , text : 'test document' } ,
219- { id : 'doc2' , text : 'test file' } ,
220- ] ) ;
221-
222- const results = index . search ( 'test' , 100 ) ;
223-
224- // Should return at most 2 results (corpus size)
225- expect ( results . length ) . toBeLessThanOrEqual ( 2 ) ;
226- } ) ;
227- } ) ;
228-
229- describe ( 'TF-IDF Index - IDF Calculation' , ( ) => {
230- test ( 'assigns higher IDF to rare terms' , ( ) => {
231- const index = new TfidfIndex ( ) ;
232- index . build ( [
233- { id : 'doc1' , text : 'common term appears everywhere' } ,
234- { id : 'doc2' , text : 'common term appears here' } ,
235- { id : 'doc3' , text : 'common term and rare word' } ,
236- ] ) ;
237-
238- // Search for the rare term
239- const rareResults = index . search ( 'rare' ) ;
240- // Search for the common term
241- const commonResults = index . search ( 'common' ) ;
242-
243- // Both should return results
244- expect ( rareResults . length ) . toBeGreaterThan ( 0 ) ;
245- expect ( commonResults . length ) . toBeGreaterThan ( 0 ) ;
246-
247- // The document with "rare" should have a good score because it's unique
248- expect ( rareResults [ 0 ] ?. score ?? 0 ) . toBeGreaterThan ( 0 ) ;
249- } ) ;
250- } ) ;
251-
25284describe ( 'TF-IDF Index - Property-Based Tests' , ( ) => {
253- // Arbitrary for generating document corpora
25485 const documentArbitrary = fc . record ( {
25586 id : fc . string ( { minLength : 1 , maxLength : 20 } ) . filter ( ( s ) => s . trim ( ) . length > 0 ) ,
25687 text : fc . string ( { minLength : 1 , maxLength : 200 } ) ,
25788 } ) ;
25889
25990 const corpusArbitrary = fc . array ( documentArbitrary , { minLength : 1 , maxLength : 20 } ) ;
26091
261- // Arbitrary for generating non-empty queries with alphanumeric content
26292 const queryArbitrary = fc
26393 . array ( fc . stringMatching ( / ^ [ a - z A - Z ] [ a - z A - Z 0 - 9 ] * $ / ) , { minLength : 1 , maxLength : 5 } )
26494 . map ( ( words ) => words . join ( ' ' ) ) ;
@@ -302,7 +132,7 @@ describe('TF-IDF Index - Property-Based Tests', () => {
302132 ) ;
303133
304134 fcTest . prop ( [ corpusArbitrary , queryArbitrary ] , { numRuns : 100 } ) (
305- 'search is case-insensitive (same results for different cases) ' ,
135+ 'search is case-insensitive' ,
306136 ( corpus , query ) => {
307137 const index = new TfidfIndex ( ) ;
308138 index . build ( corpus ) ;
@@ -318,19 +148,16 @@ describe('TF-IDF Index - Property-Based Tests', () => {
318148 } ,
319149 ) ;
320150
321- fcTest . prop ( [ queryArbitrary ] , { numRuns : 50 } ) (
322- 'empty corpus always returns empty results' ,
323- ( query ) => {
324- const index = new TfidfIndex ( ) ;
325- index . build ( [ ] ) ;
326- const results = index . search ( query ) ;
151+ fcTest . prop ( [ queryArbitrary ] , { numRuns : 50 } ) ( 'empty corpus returns empty results' , ( query ) => {
152+ const index = new TfidfIndex ( ) ;
153+ index . build ( [ ] ) ;
154+ const results = index . search ( query ) ;
327155
328- expect ( results ) . toHaveLength ( 0 ) ;
329- } ,
330- ) ;
156+ expect ( results ) . toHaveLength ( 0 ) ;
157+ } ) ;
331158
332159 fcTest . prop ( [ corpusArbitrary , queryArbitrary ] , { numRuns : 100 } ) (
333- 'result IDs are always from the indexed corpus' ,
160+ 'result IDs are from the indexed corpus' ,
334161 ( corpus , query ) => {
335162 const index = new TfidfIndex ( ) ;
336163 index . build ( corpus ) ;
@@ -344,7 +171,7 @@ describe('TF-IDF Index - Property-Based Tests', () => {
344171 ) ;
345172
346173 fcTest . prop ( [ corpusArbitrary , queryArbitrary ] , { numRuns : 50 } ) (
347- 'search is deterministic (same input produces same output) ' ,
174+ 'search is deterministic' ,
348175 ( corpus , query ) => {
349176 const index1 = new TfidfIndex ( ) ;
350177 const index2 = new TfidfIndex ( ) ;
0 commit comments