forked from home-assistant/core
-
Notifications
You must be signed in to change notification settings - Fork 0
193 lines (161 loc) · 8.38 KB
/
detect-non-english-issues.yml
File metadata and controls
193 lines (161 loc) · 8.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
name: Auto-detect non-English issues
# yamllint disable-line rule:truthy
on:
issues:
types: [opened]
permissions:
issues: write
models: read
jobs:
detect-language:
runs-on: ubuntu-latest
steps:
- name: Check issue language
id: detect_language
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
env:
ISSUE_NUMBER: ${{ github.event.issue.number }}
ISSUE_TITLE: ${{ github.event.issue.title }}
ISSUE_BODY: ${{ github.event.issue.body }}
ISSUE_USER_TYPE: ${{ github.event.issue.user.type }}
with:
script: |
// Get the issue details from environment variables
const issueNumber = process.env.ISSUE_NUMBER;
const issueTitle = process.env.ISSUE_TITLE || '';
const issueBody = process.env.ISSUE_BODY || '';
const userType = process.env.ISSUE_USER_TYPE;
// Skip language detection for bot users
if (userType === 'Bot') {
console.log('Skipping language detection for bot user');
core.setOutput('should_continue', 'false');
return;
}
console.log(`Checking language for issue #${issueNumber}`);
console.log(`Title: ${issueTitle}`);
// Combine title and body for language detection
const fullText = `${issueTitle}\n\n${issueBody}`;
// Check if the text is too short to reliably detect language
if (fullText.trim().length < 20) {
console.log('Text too short for reliable language detection');
core.setOutput('should_continue', 'false'); // Skip processing for very short text
return;
}
core.setOutput('issue_number', issueNumber);
core.setOutput('issue_text', fullText);
core.setOutput('should_continue', 'true');
- name: Detect language using AI
id: ai_language_detection
if: steps.detect_language.outputs.should_continue == 'true'
uses: actions/ai-inference@a6101c89c6feaecc585efdd8d461f18bb7896f20 # v2.0.5
with:
model: openai/gpt-4o-mini
system-prompt: |
You are a language detection system. Your task is to determine if the provided text is written in English or another language.
Rules:
1. Analyze the text and determine the primary language of the USER'S DESCRIPTION only
2. IGNORE markdown headers (lines starting with #, ##, ###, etc.) as these are from issue templates, not user input
3. IGNORE all code blocks (text between ``` or ` markers) as they may contain system-generated error messages in other languages
4. IGNORE error messages, logs, and system output even if not in code blocks - these often appear in the user's system language
5. Consider technical terms, code snippets, URLs, and file paths as neutral (they don't indicate non-English)
6. Focus ONLY on the actual sentences and descriptions written by the user explaining their issue
7. If the user's explanation/description is in English but includes non-English error messages or logs, consider it ENGLISH
8. Return ONLY a JSON object with two fields:
- "is_english": boolean (true if the user's description is primarily in English, false otherwise)
- "detected_language": string (the name of the detected language, e.g., "English", "Spanish", "Chinese", etc.)
9. Be lenient - if the user's explanation is in English with non-English system output, it's still English
10. Common programming terms, error messages, and technical jargon should not be considered as non-English
11. If you cannot reliably determine the language, set detected_language to "undefined"
Example response:
{"is_english": false, "detected_language": "Spanish"}
prompt: |
Please analyze the following issue text and determine if it is written in English:
${{ steps.detect_language.outputs.issue_text }}
max-tokens: 50
- name: Process non-English issues
if: steps.detect_language.outputs.should_continue == 'true'
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
env:
AI_RESPONSE: ${{ steps.ai_language_detection.outputs.response }}
ISSUE_NUMBER: ${{ steps.detect_language.outputs.issue_number }}
with:
script: |
const issueNumber = parseInt(process.env.ISSUE_NUMBER);
const aiResponse = process.env.AI_RESPONSE;
console.log('AI language detection response:', aiResponse);
let languageResult;
try {
languageResult = JSON.parse(aiResponse.trim());
// Validate the response structure
if (!languageResult || typeof languageResult.is_english !== 'boolean') {
throw new Error('Invalid response structure');
}
} catch (error) {
core.error(`Failed to parse AI response: ${error.message}`);
console.log('Raw AI response:', aiResponse);
// Log more details for debugging
core.warning('Defaulting to English due to parsing error');
// Default to English if we can't parse the response
return;
}
if (languageResult.is_english) {
console.log('Issue is in English, no action needed');
return;
}
// If language is undefined or not detected, skip processing
if (!languageResult.detected_language || languageResult.detected_language === 'undefined') {
console.log('Language could not be determined, skipping processing');
return;
}
console.log(`Issue detected as non-English: ${languageResult.detected_language}`);
// Post comment explaining the language requirement
const commentBody = [
'<!-- workflow: detect-non-english-issues -->',
'### 🌐 Non-English issue detected',
'',
`This issue appears to be written in **${languageResult.detected_language}** rather than English.`,
'',
'The Home Assistant project uses English as the primary language for issues to ensure that everyone in our international community can participate and help resolve issues. This allows any of our thousands of contributors to jump in and provide assistance.',
'',
'**What to do:**',
'1. Re-create the issue using the English language',
'2. If you need help with translation, consider using:',
' - Translation tools like Google Translate',
' - AI assistants like ChatGPT or Claude',
'',
'This helps our community provide the best possible support and ensures your issue gets the attention it deserves from our global contributor base.',
'',
'Thank you for your understanding! 🙏'
].join('\n');
try {
// Add comment
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: issueNumber,
body: commentBody
});
console.log('Posted language requirement comment');
// Add non-english label
await github.rest.issues.addLabels({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: issueNumber,
labels: ['non-english']
});
console.log('Added non-english label');
// Close the issue
await github.rest.issues.update({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: issueNumber,
state: 'closed',
state_reason: 'not_planned'
});
console.log('Closed the issue');
} catch (error) {
core.error('Failed to process non-English issue:', error.message);
if (error.status === 403) {
core.error('Permission denied or rate limit exceeded');
}
}