-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathemotion_detector.py
More file actions
101 lines (84 loc) · 3.67 KB
/
emotion_detector.py
File metadata and controls
101 lines (84 loc) · 3.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
"""
Emotion Detector für Timus
Erkennt Emotionen aus Sprache mittels SpeechBrain
"""
import numpy as np
import torch
import soundfile as sf
import tempfile
import os
class EmotionDetector:
def __init__(self):
self.classifier = None
self.emotion_map = {
"NEU": "neutral",
"HAP": "happy",
"SAD": "sad",
"ANG": "angry"
}
self._load_model()
def _load_model(self):
try:
from speechbrain.inference.interfaces import foreign_class
# CPU nutzen weil GPU voll mit Moondream
run_opts = {"device": "cpu"}
self.classifier = foreign_class(
source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP",
pymodule_file="custom_interface.py",
classname="CustomEncoderWav2vec2Classifier",
run_opts=run_opts
)
print(f"✅ Emotion Detector geladen (auf CPU)")
except Exception as e:
print(f"⚠️ Emotion Detector nicht verfügbar: {e}")
def detect_from_file(self, audio_path: str) -> dict:
"""Erkennt Emotion aus Audio-Datei."""
if not self.classifier:
return {"emotion": "unknown", "confidence": 0.0}
try:
# SpeechBrain Analyse
out_prob, score, index, text_lab = self.classifier.classify_file(audio_path)
# Bugfix: text_lab ist eine Liste ['NEU'], wir brauchen das erste Element
label_key = text_lab[0] if isinstance(text_lab, list) else text_lab
# Confidence berechnen (Tensor zu Float wandeln)
if hasattr(score, "__iter__"):
confidence = score[0].item() if hasattr(score[0], "item") else float(score[0])
else:
confidence = score.item() if hasattr(score, "item") else float(score)
emotion = self.emotion_map.get(label_key, "neutral")
return {
"emotion": emotion,
"confidence": confidence,
"raw_label": label_key
}
except Exception as e:
print(f"Fehler bei Emotion Detection: {e}")
return {"emotion": "error", "confidence": 0.0, "error": str(e)}
def detect_from_array(self, audio_array: np.ndarray, sample_rate: int = 16000) -> dict:
"""Erkennt Emotion aus Audio-Array (z.B. direkt vom Mikrofon)."""
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
temp_name = f.name
try:
if len(audio_array.shape) > 1:
audio_array = audio_array.flatten()
sf.write(temp_name, audio_array, sample_rate)
result = self.detect_from_file(temp_name)
finally:
if os.path.exists(temp_name):
os.remove(temp_name)
return result
def get_response_style(self, emotion: str) -> dict:
"""Gibt Antwort-Stil basierend auf Emotion zurück."""
styles = {
"happy": {"tone": "enthusiastic", "prefix": "Das freut mich! 😊 ", "emoji": "😊"},
"sad": {"tone": "empathetic", "prefix": "Oje, das klingt nicht gut. ", "emoji": "😔"},
"angry": {"tone": "calm", "prefix": "Ganz ruhig, ich bin ja da. ", "emoji": "😤"},
"neutral": {"tone": "professional", "prefix": "", "emoji": "🙂"}
}
return styles.get(emotion, styles["neutral"])
_detector = None
def get_emotion_detector() -> EmotionDetector:
global _detector
if _detector is None:
_detector = EmotionDetector()
return _detector