@@ -112,7 +112,7 @@ export class BrowserTaskClassifier {
112112 ] ;
113113
114114 for ( const keyword of allKeywords ) {
115- const normalizedKeyword = this . normalizeKeyword ( keyword ) ;
115+ const normalizedKeyword = this . normalizeText ( keyword ) ;
116116 if ( ! index [ normalizedKeyword ] ) {
117117 index [ normalizedKeyword ] = [ ] ;
118118 }
@@ -165,7 +165,7 @@ export class BrowserTaskClassifier {
165165 classifyWithEnhancedSemantics ( taskDescription ) {
166166 console . log ( '🔄 Using enhanced semantic similarity classification...' ) ;
167167
168- const text = this . preprocessText ( taskDescription ) ;
168+ const text = this . normalizeText ( taskDescription ) ;
169169 const categoryScores = { } ;
170170 const subcategoryScores = { } ;
171171
@@ -218,19 +218,16 @@ export class BrowserTaskClassifier {
218218 classifyWithEnhancedKeywords ( taskDescription ) {
219219 console . log ( '🔄 Using enhanced keyword classification...' ) ;
220220
221- const text = this . preprocessText ( taskDescription ) ;
221+ const text = this . normalizeText ( taskDescription ) ;
222222 const words = text . split ( / \s + / ) ;
223- const bigrams = this . generateBigrams ( words ) ;
224- const trigrams = this . generateTrigrams ( words ) ;
225-
226223 const categoryScores = { } ;
227224 const subcategoryScores = { } ;
228-
225+
229226 // Match unigrams, bigrams, and trigrams
230- const allNgrams = [ ...words , ...bigrams , ...trigrams ] ;
227+ const allNgrams = [ ...words , ...this . generateNgrams ( words , 2 ) , ...this . generateNgrams ( words , 3 ) ] ;
231228
232229 for ( const ngram of allNgrams ) {
233- const normalizedNgram = this . normalizeKeyword ( ngram ) ;
230+ const normalizedNgram = this . normalizeText ( ngram ) ;
234231 const matches = this . keywordIndex [ normalizedNgram ] || [ ] ;
235232
236233 for ( const match of matches ) {
@@ -308,7 +305,7 @@ export class BrowserTaskClassifier {
308305 const subcategoryPredictions = [ ] ;
309306
310307 // Use enhanced keyword matching for subcategories
311- const text = this . preprocessText ( taskDescription ) ;
308+ const text = this . normalizeText ( taskDescription ) ;
312309
313310 for ( const prediction of categoryPredictions . slice ( 0 , 2 ) ) {
314311 const categoryData = this . taskTaxonomy [ prediction . category ] ;
@@ -352,44 +349,45 @@ export class BrowserTaskClassifier {
352349 }
353350
354351 /**
355- * Normalize category predictions
352+ * Normalize and rank score entries
353+ * @param {Object } scores - Map of key -> { score, matches, label, ... }
354+ * @param {Function } mapFn - Transforms (key, data) to prediction object
355+ * @param {Object } options - { limit, normalizeScores }
356356 */
357- normalizePredictions ( categoryScores ) {
358- const predictions = Object . entries ( categoryScores )
359- . map ( ( [ category , data ] ) => ( {
360- category,
361- label : data . label ,
362- score : data . score / Math . max ( 1 , data . matches ) , // Normalize by match count
357+ normalizeScores ( scores , mapFn , { limit = 3 , normalizeScoresToOne = false } = { } ) {
358+ const predictions = Object . entries ( scores )
359+ . map ( ( [ key , data ] ) => ( {
360+ ...mapFn ( key , data ) ,
361+ score : data . score / Math . max ( 1 , data . matches ) ,
363362 matchCount : data . matches
364363 } ) )
365364 . sort ( ( a , b ) => b . score - a . score )
366- . slice ( 0 , 3 ) ;
367-
368- // Normalize scores to sum to 1
369- const totalScore = predictions . reduce ( ( sum , p ) => sum + p . score , 0 ) ;
370- if ( totalScore > 0 ) {
371- predictions . forEach ( p => p . score = Math . min ( 1 , p . score / totalScore ) ) ;
365+ . slice ( 0 , limit ) ;
366+
367+ if ( normalizeScoresToOne ) {
368+ const totalScore = predictions . reduce ( ( sum , p ) => sum + p . score , 0 ) ;
369+ if ( totalScore > 0 ) {
370+ predictions . forEach ( p => p . score = Math . min ( 1 , p . score / totalScore ) ) ;
371+ }
372372 }
373-
373+
374374 return predictions ;
375375 }
376376
377- /**
378- * Normalize subcategory predictions
379- */
377+ normalizePredictions ( categoryScores ) {
378+ return this . normalizeScores (
379+ categoryScores ,
380+ ( category , data ) => ( { category, label : data . label } ) ,
381+ { limit : 3 , normalizeScoresToOne : true }
382+ ) ;
383+ }
384+
380385 normalizeSubcategoryPredictions ( subcategoryScores ) {
381- const predictions = Object . entries ( subcategoryScores )
382- . map ( ( [ key , data ] ) => ( {
383- category : data . category ,
384- subcategory : data . subcategory ,
385- label : data . label ,
386- score : data . score / Math . max ( 1 , data . matches ) ,
387- matchCount : data . matches
388- } ) )
389- . sort ( ( a , b ) => b . score - a . score )
390- . slice ( 0 , 5 ) ;
391-
392- return predictions ;
386+ return this . normalizeScores (
387+ subcategoryScores ,
388+ ( _key , data ) => ( { category : data . category , subcategory : data . subcategory , label : data . label } ) ,
389+ { limit : 5 }
390+ ) ;
393391 }
394392
395393 /**
@@ -420,9 +418,10 @@ export class BrowserTaskClassifier {
420418 }
421419
422420 /**
423- * Preprocess text for classification
421+ * Normalize text: lowercase, strip punctuation, collapse whitespace
422+ * Used for both preprocessing input text and normalizing keywords
424423 */
425- preprocessText ( text ) {
424+ normalizeText ( text ) {
426425 return text
427426 . toLowerCase ( )
428427 . replace ( / [ ^ \w \s ] / g, ' ' )
@@ -431,36 +430,14 @@ export class BrowserTaskClassifier {
431430 }
432431
433432 /**
434- * Normalize keyword for indexing
435- */
436- normalizeKeyword ( keyword ) {
437- return keyword
438- . toLowerCase ( )
439- . replace ( / [ ^ \w \s ] / g, ' ' )
440- . replace ( / \s + / g, ' ' )
441- . trim ( ) ;
442- }
443-
444- /**
445- * Generate bigrams from words
446- */
447- generateBigrams ( words ) {
448- const bigrams = [ ] ;
449- for ( let i = 0 ; i < words . length - 1 ; i ++ ) {
450- bigrams . push ( `${ words [ i ] } ${ words [ i + 1 ] } ` ) ;
451- }
452- return bigrams ;
453- }
454-
455- /**
456- * Generate trigrams from words
433+ * Generate n-grams (bigrams, trigrams, etc.) from words
457434 */
458- generateTrigrams ( words ) {
459- const trigrams = [ ] ;
460- for ( let i = 0 ; i < words . length - 2 ; i ++ ) {
461- trigrams . push ( ` ${ words [ i ] } ${ words [ i + 1 ] } ${ words [ i + 2 ] } ` ) ;
435+ generateNgrams ( words , n ) {
436+ const ngrams = [ ] ;
437+ for ( let i = 0 ; i <= words . length - n ; i ++ ) {
438+ ngrams . push ( words . slice ( i , i + n ) . join ( ' ' ) ) ;
462439 }
463- return trigrams ;
440+ return ngrams ;
464441 }
465442
466443 /**
@@ -551,7 +528,4 @@ export class BrowserTaskClassifier {
551528
552529 return suggestions ;
553530 }
554- }
555-
556- // Export browser-compatible classifier
557- export const browserTaskClassifier = new BrowserTaskClassifier ( ) ;
531+ }
0 commit comments