Skip to content

Commit 2e752f8

Browse files
Update intent classification system with latest improvements
- Updated Swift IntentClassifier to match Kotlin implementation - Added HuggingFace tokenizer support for end-to-end model - Replaced two-step architecture with single TFLite model - Updated to use label_encoder.json instead of model_metadata.json - Added phone number detection to SlotExtractor contact extraction - Copied required model files (intent_classifier.tflite, label_encoder.json, tokenizer files) to Swift project - Removed emojis from logging statements - Enhanced contact extraction to handle phone numbers like 'call 9978673889' - Updated error handling and file references to match Android implementation
1 parent 7dbbb39 commit 2e752f8

File tree

10 files changed

+61552
-135
lines changed

10 files changed

+61552
-135
lines changed

examples/whisper.android/.idea/vcs.xml

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

examples/whisper.android/app/src/main/java/com/whispercppdemo/intent/SlotExtractor.kt

Lines changed: 48 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -798,6 +798,8 @@ class SlotExtractor {
798798
"brightness" to "\\b(?:brightness|screen\\s+brightness|display\\s+brightness|luminosity|backlight|screen\\s+light|light\\s+level|dim|dimness|brighten|darken|auto\\s+brightness|adaptive\\s+brightness|brightness\\s+level|screen\\s+intensity|display\\s+intensity|illumination|glow|radiance)\\b",
799799

800800
"volume" to "\\b(?:volume|sound\\s+level|sound\\s+volume|audio\\s+level|audio\\s+volume|loudness|loud|quiet|soft|sound|audio|speaker\\s+volume|media\\s+volume|ringtone\\s+volume|notification\\s+volume|alarm\\s+volume|call\\s+volume|ringer|sound\\s+output|audio\\s+output|volume\\s+level)\\b",
801+
802+
"torch" to "\\b(?:torch|flashlight|flash\\s+light|led\\s+light|led\\s+torch|camera\\s+flash|light|lamp|lantern|beam|illumination|bright\\s+light|phone\\s+light|mobile\\s+light|emergency\\s+light|torch\\s+light|strobe|strobe\\s+light|spotlight|searchlight|headlight|flash\\s+lamp|portable\\s+light|hand\\s+light|led\\s+flash|camera\\s+light|phone\\s+torch|device\\s+light|built-in\\s+light|integrated\\s+light)\\b"
801803
)
802804

803805
// Try to match features in order of specificity (more specific patterns first)
@@ -965,7 +967,11 @@ class SlotExtractor {
965967

966968
private fun extractAppAction(text: String): String? {
967969
val appActions = mapOf(
968-
"open" to "\\b(?:open|opened|opening|launch|launched|launching|start|show|display|view|access|load|bring\\s+up|pull\\s+up|fire\\s+up|boot|go\\s+to|navigate\\s+to|switch\\s+to|take\\s+me\\s+to)\\b"
970+
"open" to "\\b(?:open|opened|opening|launch|launched|launching|start|show|display|view|access|load|bring\\s+up|pull\\s+up|fire\\s+up|boot|go\\s+to|navigate\\s+to|switch\\s+to|take\\s+me\\s+to|turn\\s+on|on|enable|enabled|activate|activated|power\\s+on|switch\\s+on)\\b",
971+
972+
"increase" to "\\b(?:increase|increased|increasing|up|higher|raise|raised|raising|boost|boosted|boosting|amplify|amplified|amplifying|enhance|enhanced|enhancing|elevate|elevated|elevating|pump\\s+up|turn\\s+up|crank\\s+up|ramp\\s+up|scale\\s+up|step\\s+up|jack\\s+up|bump\\s+up|push\\s+up|bring\\s+up|make\\s+it\\s+higher|louder|brighter|stronger|more|maximize|max\\s+out|intensify)\\b",
973+
974+
"decrease" to "\\b(?:decrease|decreased|decreasing|down|lower|lowered|lowering|reduce|reduced|reducing|diminish|diminished|diminishing|lessen|lessened|lessening|drop|dropped|dropping|cut|cutting|turn\\s+down|bring\\s+down|scale\\s+down|step\\s+down|tone\\s+down|dial\\s+down|wind\\s+down|ramp\\s+down|make\\s+it\\s+lower|quieter|dimmer|weaker|less|minimize|min\\s+out|soften)\\b"
969975
)
970976

971977
// Sort actions by pattern specificity (longer patterns first for better matching)
@@ -1053,7 +1059,9 @@ class SlotExtractor {
10531059

10541060
"blood oxygen" to "\\b(?:blood\\s+oxygen|oxygen|o2|spo2|sp\\s+o2|oxygen\\s+saturation|oxygen\\s+level|oxygen\\s+levels|blood\\s+o2|oxygen\\s+sat|o2\\s+sat|o2\\s+level|o2\\s+saturation|pulse\\s+ox|pulse\\s+oximetry|oximeter|oxygen\\s+reading|oxygen\\s+sensor|saturation|sat|blood\\s+oxygen\\s+level|arterial\\s+oxygen|respiratory|respiration|breathing|breath|lung\\s+function|oxygenation|hypoxia|oxygen\\s+content)\\b",
10551061

1056-
"stress" to "\\b(?:stress|stressed|stressful|stress\\s+level|stress\\s+score|stress\\s+index|anxiety|anxious|worried|worry|worrying|tension|tense|pressure|pressured|strain|strained|overwhelm|overwhelmed|nervous|nervousness|burnout|burnt\\s+out|mental\\s+stress|emotional\\s+stress|psychological\\s+stress|chronic\\s+stress|acute\\s+stress|relaxation|relax|calm|calmness|peace|peaceful|tranquil|serene|zen|mindfulness)\\b"
1062+
"stress" to "\\b(?:stress|stressed|stressful|stress\\s+level|stress\\s+score|stress\\s+index|anxiety|anxious|worried|worry|worrying|tension|tense|pressure|pressured|strain|strained|overwhelm|overwhelmed|nervous|nervousness|burnout|burnt\\s+out|mental\\s+stress|emotional\\s+stress|psychological\\s+stress|chronic\\s+stress|acute\\s+stress|relaxation|relax|calm|calmness|peace|peaceful|tranquil|serene|zen|mindfulness)\\b",
1063+
1064+
"brightness" to "\\b(?:brightness|bright|brighter|brighten|brightening|screen\\s+brightness|display\\s+brightness|luminosity|luminance|backlight|screen\\s+light|light\\s+level|dim|dimmer|dimming|dimness|darken|darker|darkening|auto\\s+brightness|adaptive\\s+brightness|brightness\\s+level|screen\\s+intensity|display\\s+intensity|illumination|illuminate|glow|glowing|radiance|light\\s+output|ambient\\s+light|screen\\s+glow|visibility|contrast|gamma|exposure|luminous)\\b"
10571065
)
10581066

10591067
// Sort by pattern length for more specific matching
@@ -1098,14 +1106,17 @@ class SlotExtractor {
10981106

10991107
// If no hardcoded contact found, try to extract any name after phone action keywords
11001108
val phoneActionPatterns = listOf(
1101-
// Pattern 1: Standard action + contact (handles "call suraj india")
1102-
"(?:call|calling|phone|dial|dialing|ring|ringing|contact|reach|reach\\s+out|give\\s+a\\s+call|make\\s+a\\s+call|place\\s+a\\s+call|telephone|buzz|video\\s+call|voice\\s+call|facetime)\\s+(.+?)(?:\\s*$|\\s+(?:now|please|right\\s+now|immediately|asap|urgently)\\s*$)",
1109+
// Pattern 1: Standard action + optional modifiers + contact (handles "call back rahul india")
1110+
"(?:call|calling|phone|dial|dialing|ring|ringing|contact|reach|reach\\s+out|give\\s+a\\s+call|make\\s+a\\s+call|place\\s+a\\s+call|telephone|buzz|video\\s+call|voice\\s+call|facetime)\\s+(?:back\\s+)?(.+?)(?:\\s*$|\\s+(?:now|please|right\\s+now|immediately|asap|urgently)\\s*$)",
11031111

11041112
// Pattern 2: Messaging actions + contact
11051113
"(?:message|messaging|text|texting|sms|send|sending|write|compose|type|drop\\s+a\\s+message|send\\s+a\\s+text|shoot\\s+a\\s+message|ping|dm|direct\\s+message|whatsapp|imessage|chat|msg)\\s+(.+?)(?:\\s*$|\\s+(?:now|please|right\\s+now|immediately|asap|urgently)\\s*$)",
11061114

11071115
// Pattern 3: "get in touch with X" format
1108-
"(?:get\\s+in\\s+touch\\s+with|reach\\s+out\\s+to|contact)\\s+(.+?)(?:\\s*$|\\s+(?:now|please|right\\s+now|immediately|asap|urgently)\\s*$)"
1116+
"(?:get\\s+in\\s+touch\\s+with|reach\\s+out\\s+to|contact)\\s+(.+?)(?:\\s*$|\\s+(?:now|please|right\\s+now|immediately|asap|urgently)\\s*$)",
1117+
1118+
// Pattern 4: "call back" specific pattern
1119+
"(?:call\\s+back|ring\\s+back|phone\\s+back)\\s+(.+?)(?:\\s*$|\\s+(?:now|please|right\\s+now|immediately|asap|urgently)\\s*$)"
11091120
)
11101121

11111122
for (pattern in phoneActionPatterns) {
@@ -1114,6 +1125,12 @@ class SlotExtractor {
11141125
if (match != null && match.groupValues.size > 1) {
11151126
val extractedName = match.groupValues[1].trim()
11161127

1128+
// Check if the extracted text is a phone number
1129+
val phoneNumberPattern = "\\b\\d{10,15}\\b".toRegex()
1130+
if (phoneNumberPattern.matches(extractedName)) {
1131+
return extractedName // Return the phone number directly
1132+
}
1133+
11171134
// Clean up the extracted name (remove common stop words and punctuation at the beginning)
11181135
val cleanedName = extractedName
11191136
.replace(Regex("^(?:to|my|the|a|an)\\s+", RegexOption.IGNORE_CASE), "")
@@ -1137,6 +1154,32 @@ class SlotExtractor {
11371154
}
11381155
}
11391156

1157+
// Additional pattern specifically for phone numbers with common formats
1158+
val phoneNumberPatterns = listOf(
1159+
// Pattern for standalone phone numbers (10-15 digits) - updated to handle "back" modifier
1160+
"(?:call|calling|phone|dial|dialing|ring|ringing|contact|reach|message|messaging|text|texting|sms)\\s+(?:back\\s+)?(\\d{10,15})\\b",
1161+
1162+
// Pattern for phone numbers with separators (spaces, hyphens, dots)
1163+
"(?:call|calling|phone|dial|dialing|ring|ringing|contact|reach|message|messaging|text|texting|sms)\\s+(?:back\\s+)?(\\d{3,4}[\\s\\-\\.]{0,1}\\d{3,4}[\\s\\-\\.]{0,1}\\d{4,6})\\b",
1164+
1165+
// Pattern for phone numbers with country codes (+91, +1, etc.)
1166+
"(?:call|calling|phone|dial|dialing|ring|ringing|contact|reach|message|messaging|text|texting|sms)\\s+(?:back\\s+)?(?:\\+\\d{1,3}[\\s\\-\\.]{0,1})?([\\d\\s\\-\\.]{10,20})\\b"
1167+
)
1168+
1169+
for (pattern in phoneNumberPatterns) {
1170+
val regex = pattern.toRegex(RegexOption.IGNORE_CASE)
1171+
val match = regex.find(text)
1172+
if (match != null && match.groupValues.size > 1) {
1173+
val extractedNumber = match.groupValues[1].trim()
1174+
// Clean the number by removing separators and keeping only digits
1175+
val cleanedNumber = extractedNumber.replace(Regex("[^\\d]"), "")
1176+
// Validate that it's a reasonable phone number length (10-15 digits)
1177+
if (cleanedNumber.length in 10..15) {
1178+
return cleanedNumber
1179+
}
1180+
}
1181+
}
1182+
11401183
return null
11411184
}
11421185

0 commit comments

Comments
 (0)