feat: add emotion timeline to /process_video

Aarchi-07 · Aarchi-07 · commit 0c2de5482445 · 2026-03-19T23:37:12.000+05:30
diff --git a/routes/video_routes.py b/routes/video_routes.py
@@ -22,18 +22,17 @@
 
 logger = logging.getLogger(__name__)
 
-def analyze_clip(emotion_analysis_service, video_path):
+def analyze_clip(emotion_analysis_service, video_path, interval_s=10):
     logger.info(f"Analyzing video: {video_path}")
     try:
-        result = emotion_analysis_service.get_emotion_percentages(video_path)
+        result = emotion_analysis_service.get_emotion_percentages(video_path, interval_s=interval_s)
         logger.info(f"Emotion analysis result: {result}")
-        result_dict = result if isinstance(result, dict) else result.__dict__
-        return result_dict
+        return result
     except Exception as e:
         logger.error(f"Failed to analyze video: {e}")
         return None
 
-def download_and_analyze_video(video_name):
+def download_and_analyze_video(video_name, interval_s=10):
     logger.info(f"Attempting to download video: {video_name} from storage.")
     try:
         local_path = f"static/videos/{video_name}"
@@ -61,11 +60,11 @@ def download_and_analyze_video(video_name):
     logger.info("Initializing emotion analysis.")
     emotion_analysis_service = EmotionsAnalysisImp(model_path="models/model2/model2.h5")
     start_analysis = time.time()
-    result = analyze_clip(emotion_analysis_service, video_path)
+    result = analyze_clip(emotion_analysis_service, video_path, interval_s=interval_s)
     end_analysis = time.time()
     logger.info(f"Time taken for analysis: {end_analysis - start_analysis} seconds")
 
-    return result  # retorna o objeto de emoções diretamente
+    return result
 
 @video_routes.route("/process_video", methods=["POST", "OPTIONS"])
 def process_video():
@@ -77,14 +76,19 @@ def process_video():
     if not video_name:
         return jsonify({"error": "Video name missing"}), 400
 
+    interval_s = request.json.get("interval_s", 10)
+
     try:
-        result = download_and_analyze_video(video_name)
+        result = download_and_analyze_video(video_name, interval_s=interval_s)
         delete_video()
     except Exception as e:
         logger.exception("Video processing failed")
         return jsonify({"error": "Video processing failed"}), 500
 
-    return jsonify({"emotions": result}), 200
+    if result is None:
+        return jsonify({"error": "No result from analysis"}), 500
+
+    return jsonify(result.model_dump()), 200
 
 
 @video_routes.route("/test", methods=["GET"])
diff --git a/schemas/emotion_schema.py b/schemas/emotion_schema.py
@@ -1,4 +1,6 @@
 from pydantic import BaseModel
+from typing import List
+
 
 class GetEmotionPercentagesResponse(BaseModel):
     Angry: float
@@ -7,4 +9,17 @@ class GetEmotionPercentagesResponse(BaseModel):
     Happy: float
     Neutral: float
     Sad: float
-    Surprised: float
+    Surprised: float
+
+
+class TimelineEntry(BaseModel):
+    id: int
+    starting_time: str
+    ending_time: str
+    emotion: str
+    value: float
+
+
+class EmotionAnalysisResponse(BaseModel):
+    emotions: GetEmotionPercentagesResponse
+    timeline: List[TimelineEntry]
diff --git a/services/emotion_analysis/emotion_analysis_imp.py b/services/emotion_analysis/emotion_analysis_imp.py
@@ -1,10 +1,80 @@
 import os
-from schemas.emotion_schema import GetEmotionPercentagesResponse
+from collections import defaultdict
+from schemas.emotion_schema import GetEmotionPercentagesResponse, TimelineEntry, EmotionAnalysisResponse
 from services.emotion_analysis.emotion_analysis_service import EmotionsAnalysisService
 import logging
 import coloredlogs
 from utils.utils import load_model, load_face_cascade, extract_features, predict_emotion, getPercentages
 import cv2
+import numpy as np
+
+EMOTION_LABELS = {0: 'Angry', 1: 'Disgusted', 2: 'Fearful', 3: 'Happy', 4: 'Neutral', 5: 'Sad', 6: 'Surprised'}
+
+
+def _format_time(seconds):
+    return f"{int(seconds)//60:02d}:{int(seconds)%60:02d}"
+
+
+def _get_dominant(preds):
+    """Return (dominant_label, avg_confidence_%) from list of (label, confidence) tuples."""
+    if not preds:
+        return None, 0.0
+    counts, confs = defaultdict(int), defaultdict(list)
+    for label, conf in preds:
+        counts[label] += 1
+        confs[label].append(conf)
+    dominant = max(counts, key=counts.get)
+    return dominant, round(np.mean(confs[dominant]) * 100, 2)
+
+
+def _build_timeline_fixed(timed_preds, interval_s, duration):
+    """Group predictions into fixed time windows, return dominant emotion per window."""
+    if not timed_preds or duration <= 0:
+        return []
+    num_windows = max(1, int(duration / interval_s) + (1 if duration % interval_s else 0))
+    windows = defaultdict(list)
+    for ts, label, conf in timed_preds:
+        windows[min(int(ts / interval_s), num_windows - 1)].append((label, conf))
+
+    timeline = []
+    for i in range(num_windows):
+        if i not in windows:
+            continue
+        dominant, avg_conf = _get_dominant(windows[i])
+        timeline.append(TimelineEntry(
+            id=len(timeline) + 1,
+            starting_time=_format_time(i * interval_s),
+            ending_time=_format_time(min((i + 1) * interval_s, duration)),
+            emotion=dominant.upper(), value=avg_conf,
+        ))
+    return timeline
+
+
+def _build_timeline_dynamic(timed_preds, duration):
+    """Segment timeline whenever the dominant emotion changes."""
+    if not timed_preds or duration <= 0:
+        return []
+    timeline, seg_preds = [], []
+    cur_label, seg_start = timed_preds[0][1], timed_preds[0][0]
+
+    for ts, label, conf in timed_preds:
+        if label != cur_label:
+            _, avg_conf = _get_dominant(seg_preds)
+            timeline.append(TimelineEntry(
+                id=len(timeline) + 1, starting_time=_format_time(seg_start),
+                ending_time=_format_time(ts), emotion=cur_label.upper(), value=avg_conf,
+            ))
+            cur_label, seg_start, seg_preds = label, ts, []
+        seg_preds.append((label, conf))
+
+    if seg_preds:
+        _, avg_conf = _get_dominant(seg_preds)
+        timeline.append(TimelineEntry(
+            id=len(timeline) + 1, starting_time=_format_time(seg_start),
+            ending_time=_format_time(duration), emotion=cur_label.upper(), value=avg_conf,
+        ))
+    return timeline
+
 
 class EmotionsAnalysisImp(EmotionsAnalysisService):
     def __init__(self, model_path: str):
@@ -13,82 +83,71 @@ def __init__(self, model_path: str):
         coloredlogs.install(level="INFO", fmt="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
         self.logger = logging.getLogger(__name__)
 
-    def get_emotion_percentages(self, video_path: str) -> GetEmotionPercentagesResponse:
-        predictions = []
-        labels = {0: 'Angry', 1: 'Disgusted', 2: 'Fearful', 3: 'Happy', 4: 'Neutral', 5: 'Sad', 6: 'Surprised'}
+    def get_emotion_percentages(self, video_path: str, interval_s: int = 10) -> EmotionAnalysisResponse:
+        predictions, timed_predictions = [], []
         self.logger.info(f"Loading video from path: {video_path}")
 
+        empty = EmotionAnalysisResponse(
+            emotions=GetEmotionPercentagesResponse(
+                Angry=0, Disgusted=0, Fearful=0, Happy=0, Neutral=0, Sad=0, Surprised=0),
+            timeline=[],
+        )
+
         if not os.path.exists(video_path):
             self.logger.error(f"Video file does not exist: {video_path}")
             directory = os.path.dirname(video_path)
             if os.path.exists(directory):
-                self.logger.info(f"Contents of the directory {directory}:")
-                for item in os.listdir(directory):
-                    self.logger.info(f" - {item}")
-            else:
-                self.logger.error(f"Directory does not exist: {directory}")
-            return GetEmotionPercentagesResponse(Angry=0, Disgusted=0, Fearful=0, Happy=0, Neutral=0, Sad=0, Surprised=0)
+                self.logger.info(f"Contents of {directory}: {os.listdir(directory)}")
+            return empty
 
         video = cv2.VideoCapture(video_path)
         if not video.isOpened():
             self.logger.error(f"Failed to open video file: {video_path}")
-            return GetEmotionPercentagesResponse(Angry=0, Disgusted=0, Fearful=0, Happy=0, Neutral=0, Sad=0, Surprised=0)
+            return empty
 
+        fps = video.get(cv2.CAP_PROP_FPS)
+        video_duration = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) / fps if fps > 0 else 0
         last_processed_second = -1
-
-    
-        frame_count = 0
-        processed_frames = 0
-        face_count = 0
+        frame_count, face_count = 0, 0
 
         while True:
             ret, im = video.read()
             if not ret:
                 break
-
             timestamp_ms = video.get(cv2.CAP_PROP_POS_MSEC)
-            current_second = int(timestamp_ms / 500 ) # 2 frame per second 
-
+            current_second = int(timestamp_ms / 500)  # 2 frames per second
             if current_second == last_processed_second:
                 continue
             last_processed_second = current_second
-            
             frame_count += 1
-
-            processed_frames += 1
             gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
             faces = self.face_cascade.detectMultiScale(gray, 1.3, 5)
             try:
                 for (p, q, r, s) in faces:
                     face_count += 1
-                    image = gray[q:q + s, p:p + r]
-                    image = cv2.resize(image, (48, 48))
-                    img = extract_features(image)
-                    pred = predict_emotion(self.model, img)
-                    prediction_label = labels[pred.argmax()]
-                    self.logger.info(f"Prediction for frame {frame_count}: {prediction_label}")
-                    predictions.append(prediction_label)
+                    image = cv2.resize(gray[q:q+s, p:p+r], (48, 48))
+                    pred = predict_emotion(self.model, extract_features(image))
+                    pred_idx = pred.argmax()
+                    label = EMOTION_LABELS[pred_idx]
+                    conf = float(pred[0][pred_idx])
+                    predictions.append(label)
+                    timed_predictions.append((timestamp_ms / 1000.0, label, conf))
             except cv2.error as e:
                 self.logger.error(f"OpenCV error: {e}")
-                pass
 
         video.release()
-
-        self.logger.info(f"Total frames in video: {frame_count}")
-        self.logger.info(f"Frames actually processed: {processed_frames}")
-        self.logger.info(f"Total faces detected: {face_count}")
+        self.logger.info(f"Processed {frame_count} frames, {face_count} faces detected")
 
         if not predictions:
             self.logger.warning("No faces detected or no predictions made.")
 
         percentages = getPercentages(predictions)
-        self.logger.info(f"Percentages of emotions detected: {percentages}")
-        return GetEmotionPercentagesResponse(
-            Angry=percentages['Angry'],
-            Disgusted=percentages['Disgusted'],
-            Fearful=percentages['Fearful'],
-            Happy=percentages['Happy'],
-            Neutral=percentages['Neutral'],
-            Sad=percentages['Sad'],
-            Surprised=percentages['Surprised']
-        )
+        emotions = GetEmotionPercentagesResponse(**percentages)
+
+        if interval_s == 0:
+            timeline = _build_timeline_dynamic(timed_predictions, video_duration)
+        else:
+            timeline = _build_timeline_fixed(timed_predictions, interval_s, video_duration)
+        self.logger.info(f"Timeline: {len(timeline)} entries")
+
+        return EmotionAnalysisResponse(emotions=emotions, timeline=timeline)
diff --git a/services/emotion_analysis/emotion_analysis_service.py b/services/emotion_analysis/emotion_analysis_service.py
@@ -1,8 +1,9 @@
 from abc import ABC, abstractmethod
 
-from schemas.emotion_schema import GetEmotionPercentagesResponse
+from schemas.emotion_schema import EmotionAnalysisResponse
+
 
 class EmotionsAnalysisService(ABC):
     @abstractmethod
-    def get_emotion_percentages(self, video_path: str) -> GetEmotionPercentagesResponse:
+    def get_emotion_percentages(self, video_path: str, interval_s: int = 10) -> EmotionAnalysisResponse:
         pass
diff --git a/tests/test_timeline.py b/tests/test_timeline.py