Skip to content

Commit c806555

Browse files
authored
Add non-English issue detection using GitHub AI models (home-assistant#146547)
1 parent 4836930 commit c806555

File tree

1 file changed

+184
-0
lines changed

1 file changed

+184
-0
lines changed
Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
name: Auto-detect non-English issues
2+
3+
# yamllint disable-line rule:truthy
4+
on:
5+
issues:
6+
types: [opened]
7+
8+
permissions:
9+
issues: write
10+
models: read
11+
12+
jobs:
13+
detect-language:
14+
runs-on: ubuntu-latest
15+
16+
steps:
17+
- name: Check issue language
18+
id: detect_language
19+
uses: actions/[email protected]
20+
env:
21+
ISSUE_NUMBER: ${{ github.event.issue.number }}
22+
ISSUE_TITLE: ${{ github.event.issue.title }}
23+
ISSUE_BODY: ${{ github.event.issue.body }}
24+
ISSUE_USER_TYPE: ${{ github.event.issue.user.type }}
25+
with:
26+
script: |
27+
// Get the issue details from environment variables
28+
const issueNumber = process.env.ISSUE_NUMBER;
29+
const issueTitle = process.env.ISSUE_TITLE || '';
30+
const issueBody = process.env.ISSUE_BODY || '';
31+
const userType = process.env.ISSUE_USER_TYPE;
32+
33+
// Skip language detection for bot users
34+
if (userType === 'Bot') {
35+
console.log('Skipping language detection for bot user');
36+
core.setOutput('should_continue', 'false');
37+
return;
38+
}
39+
40+
console.log(`Checking language for issue #${issueNumber}`);
41+
console.log(`Title: ${issueTitle}`);
42+
43+
// Combine title and body for language detection
44+
const fullText = `${issueTitle}\n\n${issueBody}`;
45+
46+
// Check if the text is too short to reliably detect language
47+
if (fullText.trim().length < 20) {
48+
console.log('Text too short for reliable language detection');
49+
core.setOutput('should_continue', 'false'); // Skip processing for very short text
50+
return;
51+
}
52+
53+
core.setOutput('issue_number', issueNumber);
54+
core.setOutput('issue_text', fullText);
55+
core.setOutput('should_continue', 'true');
56+
57+
- name: Detect language using AI
58+
id: ai_language_detection
59+
if: steps.detect_language.outputs.should_continue == 'true'
60+
uses: actions/[email protected]
61+
with:
62+
model: openai/gpt-4o-mini
63+
system-prompt: |
64+
You are a language detection system. Your task is to determine if the provided text is written in English or another language.
65+
66+
Rules:
67+
1. Analyze the text and determine the primary language
68+
2. IGNORE markdown headers (lines starting with #, ##, ###, etc.) as these are from issue templates, not user input
69+
3. IGNORE all code blocks (text between ``` or ` markers) as they may contain system-generated error messages in other languages
70+
4. Consider technical terms, code snippets, and URLs as neutral (they don't indicate non-English)
71+
5. Focus on the actual sentences and descriptions written by the user
72+
6. Return ONLY a JSON object with two fields:
73+
- "is_english": boolean (true if the text is primarily in English, false otherwise)
74+
- "detected_language": string (the name of the detected language, e.g., "English", "Spanish", "Chinese", etc.)
75+
7. Be lenient - if the text is mostly English with minor non-English elements, consider it English
76+
8. Common programming terms, error messages, and technical jargon should not be considered as non-English
77+
78+
Example response:
79+
{"is_english": false, "detected_language": "Spanish"}
80+
81+
prompt: |
82+
Please analyze the following issue text and determine if it is written in English:
83+
84+
${{ steps.detect_language.outputs.issue_text }}
85+
86+
max-tokens: 50
87+
88+
- name: Process non-English issues
89+
if: steps.detect_language.outputs.should_continue == 'true'
90+
uses: actions/[email protected]
91+
env:
92+
AI_RESPONSE: ${{ steps.ai_language_detection.outputs.response }}
93+
ISSUE_NUMBER: ${{ steps.detect_language.outputs.issue_number }}
94+
with:
95+
script: |
96+
const issueNumber = parseInt(process.env.ISSUE_NUMBER);
97+
const aiResponse = process.env.AI_RESPONSE;
98+
99+
console.log('AI language detection response:', aiResponse);
100+
101+
let languageResult;
102+
try {
103+
languageResult = JSON.parse(aiResponse.trim());
104+
105+
// Validate the response structure
106+
if (!languageResult || typeof languageResult.is_english !== 'boolean') {
107+
throw new Error('Invalid response structure');
108+
}
109+
} catch (error) {
110+
core.error(`Failed to parse AI response: ${error.message}`);
111+
console.log('Raw AI response:', aiResponse);
112+
113+
// Log more details for debugging
114+
core.warning('Defaulting to English due to parsing error');
115+
116+
// Default to English if we can't parse the response
117+
return;
118+
}
119+
120+
if (languageResult.is_english) {
121+
console.log('Issue is in English, no action needed');
122+
return;
123+
}
124+
125+
console.log(`Issue detected as non-English: ${languageResult.detected_language}`);
126+
127+
// Post comment explaining the language requirement
128+
const commentBody = [
129+
'<!-- workflow: detect-non-english-issues -->',
130+
'### 🌐 Non-English issue detected',
131+
'',
132+
`This issue appears to be written in **${languageResult.detected_language}** rather than English.`,
133+
'',
134+
'The Home Assistant project uses English as the primary language for issues to ensure that everyone in our international community can participate and help resolve issues. This allows any of our thousands of contributors to jump in and provide assistance.',
135+
'',
136+
'**What to do:**',
137+
'1. Re-create the issue using the English language',
138+
'2. If you need help with translation, consider using:',
139+
' - Translation tools like Google Translate',
140+
' - AI assistants like ChatGPT or Claude',
141+
'',
142+
'This helps our community provide the best possible support and ensures your issue gets the attention it deserves from our global contributor base.',
143+
'',
144+
'Thank you for your understanding! 🙏'
145+
].join('\n');
146+
147+
try {
148+
// Add comment
149+
await github.rest.issues.createComment({
150+
owner: context.repo.owner,
151+
repo: context.repo.repo,
152+
issue_number: issueNumber,
153+
body: commentBody
154+
});
155+
156+
console.log('Posted language requirement comment');
157+
158+
// Add non-english label
159+
await github.rest.issues.addLabels({
160+
owner: context.repo.owner,
161+
repo: context.repo.repo,
162+
issue_number: issueNumber,
163+
labels: ['non-english']
164+
});
165+
166+
console.log('Added non-english label');
167+
168+
// Close the issue
169+
await github.rest.issues.update({
170+
owner: context.repo.owner,
171+
repo: context.repo.repo,
172+
issue_number: issueNumber,
173+
state: 'closed',
174+
state_reason: 'not_planned'
175+
});
176+
177+
console.log('Closed the issue');
178+
179+
} catch (error) {
180+
core.error('Failed to process non-English issue:', error.message);
181+
if (error.status === 403) {
182+
core.error('Permission denied or rate limit exceeded');
183+
}
184+
}

0 commit comments

Comments
 (0)