Skip to content

Commit 1ce2fd1

Browse files
committed
Add initial categorization logic
1 parent c0d3e56 commit 1ce2fd1

File tree

7 files changed

+1194
-35
lines changed

7 files changed

+1194
-35
lines changed
Lines changed: 283 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,283 @@
1+
/**
2+
* Issue categorization logic
3+
*/
4+
5+
import type { Issue } from '../github/types.js'
6+
import type { Categorization } from './types.js'
7+
import { CATEGORIES, type CategoryConfig } from './config.js'
8+
9+
/**
10+
* Normalize text for matching (lowercase, trim)
11+
*/
12+
function normalizeText(text: string): string {
13+
return text.toLowerCase().trim()
14+
}
15+
16+
/**
17+
* Check if issue has any matching labels (Tier 1)
18+
*/
19+
function checkLabels(issue: Issue): Categorization | null {
20+
const issueLabels = issue.labels.map(normalizeText)
21+
22+
for (const category of CATEGORIES) {
23+
if (category.name === 'uncategorized') continue
24+
25+
const hasMatchingLabel = category.labels.some((label) =>
26+
issueLabels.includes(normalizeText(label)),
27+
)
28+
29+
if (hasMatchingLabel) {
30+
const type = detectIssueType(issue)
31+
const subcategory = findSubcategory(issue, category)
32+
33+
return {
34+
primary: category.name,
35+
secondary: subcategory,
36+
type,
37+
confidence: 0.95,
38+
method: 'label',
39+
}
40+
}
41+
}
42+
43+
return null
44+
}
45+
46+
/**
47+
* Calculate keyword scores for each category (Tier 2)
48+
*/
49+
function calculateKeywordScores(issue: Issue): Map<string, number> {
50+
const scores = new Map<string, number>()
51+
const text = normalizeText(`${issue.title} ${issue.body}`)
52+
53+
for (const category of CATEGORIES) {
54+
if (category.name === 'uncategorized') continue
55+
56+
let score = 0
57+
58+
// Check keywords
59+
for (const keyword of category.keywords) {
60+
if (text.includes(normalizeText(keyword))) {
61+
score += category.weight
62+
}
63+
}
64+
65+
// Check patterns (weighted higher)
66+
for (const pattern of category.patterns) {
67+
if (pattern.test(issue.title) || pattern.test(issue.body)) {
68+
score += category.weight * 1.5
69+
}
70+
}
71+
72+
if (score > 0) {
73+
scores.set(category.name, score)
74+
}
75+
}
76+
77+
return scores
78+
}
79+
80+
/**
81+
* Detect patterns in issue content (Tier 3)
82+
*/
83+
function detectPatterns(issue: Issue): Categorization | null {
84+
const scores = calculateKeywordScores(issue)
85+
86+
if (scores.size === 0) {
87+
return null
88+
}
89+
90+
// Get the highest scoring category
91+
let maxScore = 0
92+
let topCategory = ''
93+
94+
for (const [category, score] of scores.entries()) {
95+
if (score > maxScore) {
96+
maxScore = score
97+
topCategory = category
98+
}
99+
}
100+
101+
// Require a minimum score threshold
102+
if (maxScore < 2.0) {
103+
return null
104+
}
105+
106+
const categoryConfig = CATEGORIES.find((c) => c.name === topCategory)
107+
if (!categoryConfig) return null
108+
109+
const type = detectIssueType(issue)
110+
const subcategory = findSubcategory(issue, categoryConfig)
111+
112+
// Calculate confidence based on score
113+
const confidence = Math.min(0.85, 0.5 + maxScore * 0.1)
114+
115+
return {
116+
primary: topCategory,
117+
secondary: subcategory,
118+
type,
119+
confidence,
120+
method: 'pattern',
121+
}
122+
}
123+
124+
/**
125+
* Find subcategory within a primary category
126+
*/
127+
function findSubcategory(
128+
issue: Issue,
129+
category: CategoryConfig,
130+
): string | undefined {
131+
if (!category.subcategories) return undefined
132+
133+
const text = normalizeText(`${issue.title} ${issue.body}`)
134+
let maxScore = 0
135+
let topSubcategory = ''
136+
137+
for (const subcategory of category.subcategories) {
138+
let score = 0
139+
140+
// Check keywords
141+
for (const keyword of subcategory.keywords) {
142+
if (text.includes(normalizeText(keyword))) {
143+
score += 1
144+
}
145+
}
146+
147+
// Check patterns
148+
for (const pattern of subcategory.patterns) {
149+
if (pattern.test(issue.title) || pattern.test(issue.body)) {
150+
score += 2
151+
}
152+
}
153+
154+
if (score > maxScore) {
155+
maxScore = score
156+
topSubcategory = subcategory.name
157+
}
158+
}
159+
160+
// Require minimum score for subcategory
161+
return maxScore >= 1 ? topSubcategory : undefined
162+
}
163+
164+
/**
165+
* Detect issue type (bug, feature, question, docs)
166+
*/
167+
function detectIssueType(
168+
issue: Issue,
169+
): 'bug' | 'feature' | 'question' | 'docs' {
170+
const labels = issue.labels.map(normalizeText)
171+
const text = normalizeText(`${issue.title} ${issue.body}`)
172+
173+
// Check labels first
174+
if (
175+
labels.some((l) => ['bug', 'type: bug', 'regression', 'defect'].includes(l))
176+
) {
177+
return 'bug'
178+
}
179+
180+
if (
181+
labels.some((l) =>
182+
['enhancement', 'feature', 'feature request', 'type: feature'].includes(
183+
l,
184+
),
185+
)
186+
) {
187+
return 'feature'
188+
}
189+
190+
if (labels.some((l) => ['documentation', 'docs', 'type: docs'].includes(l))) {
191+
return 'docs'
192+
}
193+
194+
if (
195+
labels.some((l) =>
196+
['question', 'help wanted', 'type: question'].includes(l),
197+
)
198+
) {
199+
return 'question'
200+
}
201+
202+
// Check content patterns
203+
if (
204+
/\b(bug|error|issue|broken|not working|doesn't work|crash|fail)/i.test(text)
205+
) {
206+
return 'bug'
207+
}
208+
209+
if (
210+
/\b(feature|enhancement|add|support|would be nice|could we|suggestion)/i.test(
211+
text,
212+
)
213+
) {
214+
return 'feature'
215+
}
216+
217+
if (/\b(how|what|why|when|where|question|\?)/i.test(issue.title)) {
218+
return 'question'
219+
}
220+
221+
if (/\b(docs|documentation|readme|guide|tutorial|example)/i.test(text)) {
222+
return 'docs'
223+
}
224+
225+
// Default to question for unclear cases
226+
return 'question'
227+
}
228+
229+
/**
230+
* Main categorization function using multi-tier approach
231+
*/
232+
export function categorizeIssue(issue: Issue): Categorization {
233+
// Tier 1: Label-based (highest confidence)
234+
const labelResult = checkLabels(issue)
235+
if (labelResult) {
236+
return labelResult
237+
}
238+
239+
// Tier 2: Keyword matching
240+
const scores = calculateKeywordScores(issue)
241+
if (scores.size > 0) {
242+
let maxScore = 0
243+
let topCategory = ''
244+
245+
for (const [category, score] of scores.entries()) {
246+
if (score > maxScore) {
247+
maxScore = score
248+
topCategory = category
249+
}
250+
}
251+
252+
if (maxScore >= 2.0) {
253+
const categoryConfig = CATEGORIES.find((c) => c.name === topCategory)
254+
if (categoryConfig) {
255+
const type = detectIssueType(issue)
256+
const subcategory = findSubcategory(issue, categoryConfig)
257+
const confidence = Math.min(0.8, 0.5 + maxScore * 0.08)
258+
259+
return {
260+
primary: topCategory,
261+
secondary: subcategory,
262+
type,
263+
confidence,
264+
method: 'keyword',
265+
}
266+
}
267+
}
268+
}
269+
270+
// Tier 3: Pattern detection
271+
const patternResult = detectPatterns(issue)
272+
if (patternResult) {
273+
return patternResult
274+
}
275+
276+
// Fallback: Uncategorized
277+
return {
278+
primary: 'uncategorized',
279+
type: detectIssueType(issue),
280+
confidence: 0.3,
281+
method: 'manual',
282+
}
283+
}

0 commit comments

Comments
 (0)