@@ -13,6 +13,77 @@ function normalizeText(text: string): string {
1313 return text . toLowerCase ( ) . trim ( )
1414}
1515
16+ /**
17+ * Remove stack traces from text to avoid false matches
18+ */
19+ function removeStackTraces ( text : string ) : string {
20+ return text
21+ . split ( '\n' )
22+ . filter ( ( line ) => {
23+ const trimmed = line . trim ( )
24+ // Skip lines starting with "at " (stack traces)
25+ if ( trimmed . startsWith ( 'at ' ) ) return false
26+ // Skip lines with file paths (e.g., "node_modules/", "src/", ".js:", ".ts:")
27+ if ( / n o d e _ m o d u l e s | \. j s : | \. t s : | \. t s x : | \. j s x : / . test ( trimmed ) ) return false
28+ return true
29+ } )
30+ . join ( '\n' )
31+ }
32+
33+ /**
34+ * Detect multi-word phrases with higher confidence
35+ */
36+ function detectPhrases ( text : string ) : Map < string , number > {
37+ const phrases = new Map < string , number > ( )
38+ const lowerText = text . toLowerCase ( )
39+
40+ // High-value phrases that should boost category scores
41+ const phrasePatterns : Record < string , string [ ] > = {
42+ 'rtk-query' : [
43+ 'rtk query' ,
44+ 'rtk-query' ,
45+ 'createapi' ,
46+ 'fetch base query' ,
47+ 'optimistic update' ,
48+ 'cache invalidation' ,
49+ ] ,
50+ codegen : [
51+ 'code generation' ,
52+ 'openapi' ,
53+ 'swagger' ,
54+ 'output files' ,
55+ 'codegen' ,
56+ 'rtk-query codegen' ,
57+ '@rtk-query/codegen-openapi' ,
58+ 'generate api' ,
59+ 'generate endpoints' ,
60+ ] ,
61+ 'build-tooling' : [ 'webpack' , 'vite' , 'rollup' , 'bundler' ] ,
62+ publishing : [
63+ 'npm publish' ,
64+ 'trusted publishing' ,
65+ 'provenance' ,
66+ 'package registry' ,
67+ ] ,
68+ migration : [ 'migration guide' , 'breaking change' , 'upgrade path' ] ,
69+ architecture : [ 'api design' , 'architecture decision' , 'design pattern' ] ,
70+ }
71+
72+ for ( const [ category , categoryPhrases ] of Object . entries ( phrasePatterns ) ) {
73+ let score = 0
74+ for ( const phrase of categoryPhrases ) {
75+ if ( lowerText . includes ( phrase ) ) {
76+ score += 3.0 // High score for phrase matches
77+ }
78+ }
79+ if ( score > 0 ) {
80+ phrases . set ( category , score )
81+ }
82+ }
83+
84+ return phrases
85+ }
86+
1687/**
1788 * Check if issue has any matching labels (Tier 1)
1889 */
@@ -45,30 +116,69 @@ function checkLabels(issue: Issue): Categorization | null {
45116
46117/**
47118 * Calculate keyword scores for each category (Tier 2)
119+ * Implements context-aware matching with title prioritization
48120 */
49121function calculateKeywordScores ( issue : Issue ) : Map < string , number > {
50122 const scores = new Map < string , number > ( )
51- const text = normalizeText ( `${ issue . title } ${ issue . body } ` )
123+ const title = normalizeText ( issue . title )
124+ const body = normalizeText ( issue . body || '' )
125+
126+ // Remove stack traces from body for matching
127+ const bodyWithoutStackTraces = removeStackTraces ( body )
128+
129+ // Add phrase detection before keyword matching
130+ const phraseScores = detectPhrases ( `${ title } ${ bodyWithoutStackTraces } ` )
131+ for ( const [ category , phraseScore ] of phraseScores ) {
132+ const currentScore = scores . get ( category ) || 0
133+ scores . set ( category , currentScore + phraseScore )
134+ }
52135
53136 for ( const category of CATEGORIES ) {
54137 if ( category . name === 'uncategorized' ) continue
55138
56139 let score = 0
57140
58- // Check keywords
141+ // Check keywords with title prioritization (2x weight for title)
59142 for ( const keyword of category . keywords ) {
60- if ( text . includes ( normalizeText ( keyword ) ) ) {
61- score += category . weight
62- }
143+ const normalizedKeyword = normalizeText ( keyword )
144+
145+ // Title matches (2x weight)
146+ const titleMatches = (
147+ title . match (
148+ new RegExp (
149+ normalizedKeyword . replace ( / [ . * + ? ^ $ { } ( ) | [ \] \\ ] / g, '\\$&' ) ,
150+ 'gi' ,
151+ ) ,
152+ ) || [ ]
153+ ) . length
154+ score += titleMatches * category . weight * 2.0
155+
156+ // Body matches (1x weight, excluding stack traces)
157+ const bodyMatches = (
158+ bodyWithoutStackTraces . match (
159+ new RegExp (
160+ normalizedKeyword . replace ( / [ . * + ? ^ $ { } ( ) | [ \] \\ ] / g, '\\$&' ) ,
161+ 'gi' ,
162+ ) ,
163+ ) || [ ]
164+ ) . length
165+ score += bodyMatches * category . weight
63166 }
64167
65- // Check patterns (weighted higher )
168+ // Check patterns (1.5x weight for title, 1x for body )
66169 for ( const pattern of category . patterns ) {
67- if ( pattern . test ( issue . title ) || pattern . test ( issue . body ) ) {
170+ if ( pattern . test ( title ) ) {
68171 score += category . weight * 1.5
172+ } else if ( pattern . test ( bodyWithoutStackTraces ) ) {
173+ score += category . weight
69174 }
70175 }
71176
177+ // Penalty for documentation category on bugs
178+ if ( category . name === 'documentation' && detectIssueType ( issue ) === 'bug' ) {
179+ score *= 0.5
180+ }
181+
72182 if ( score > 0 ) {
73183 scores . set ( category . name , score )
74184 }
@@ -228,15 +338,16 @@ function detectIssueType(
228338
229339/**
230340 * Main categorization function using multi-tier approach
341+ * Implements confidence scoring based on match location
231342 */
232343export function categorizeIssue ( issue : Issue ) : Categorization {
233- // Tier 1: Label-based (highest confidence)
344+ // Tier 1: Label-based (highest confidence - 95% )
234345 const labelResult = checkLabels ( issue )
235346 if ( labelResult ) {
236347 return labelResult
237348 }
238349
239- // Tier 2: Keyword matching
350+ // Tier 2: Keyword matching with context-aware scoring
240351 const scores = calculateKeywordScores ( issue )
241352 if ( scores . size > 0 ) {
242353 let maxScore = 0
@@ -254,7 +365,19 @@ export function categorizeIssue(issue: Issue): Categorization {
254365 if ( categoryConfig ) {
255366 const type = detectIssueType ( issue )
256367 const subcategory = findSubcategory ( issue , categoryConfig )
257- const confidence = Math . min ( 0.8 , 0.5 + maxScore * 0.08 )
368+
369+ // Adjust confidence based on match strength and location
370+ let confidence : number
371+ if ( maxScore > 6.0 ) {
372+ // Strong title match (score > 3 * 2.0)
373+ confidence = 0.9
374+ } else if ( maxScore > 4.0 ) {
375+ // Good body match (score > 2 * 2.0)
376+ confidence = 0.8
377+ } else {
378+ // Pattern detection level
379+ confidence = 0.7
380+ }
258381
259382 return {
260383 primary : topCategory ,
@@ -267,17 +390,17 @@ export function categorizeIssue(issue: Issue): Categorization {
267390 }
268391 }
269392
270- // Tier 3: Pattern detection
393+ // Tier 3: Pattern detection (70% confidence)
271394 const patternResult = detectPatterns ( issue )
272395 if ( patternResult ) {
273396 return patternResult
274397 }
275398
276- // Fallback: Uncategorized
399+ // Fallback: Uncategorized (60% confidence)
277400 return {
278401 primary : 'uncategorized' ,
279402 type : detectIssueType ( issue ) ,
280- confidence : 0.3 ,
403+ confidence : 0.6 ,
281404 method : 'manual' ,
282405 }
283406}
0 commit comments