From 60498137f27e8cbd3febfc3996dff73c59d24f52 Mon Sep 17 00:00:00 2001
From: cc-fuyu <cc-fuyu@users.noreply.github.com>
Date: Fri, 27 Feb 2026 11:36:38 -0500
Subject: [PATCH] feat: add standardized time-series emotion output schema

Introduce a new StandardizedEmotionOutput schema that includes:
- AnalysisMetadata: video name, analysis timestamp, frame/face counts, duration
- EmotionEvent timeline: per-frame emotion label, timestamp, and confidence score
- EmotionSummary: aggregated emotion percentages (backward-compatible)

Add get_standardized_output() method to EmotionsAnalysisImp and a new
/process_video_standardized endpoint that returns the structured output.

The original /process_video endpoint remains unchanged for backward
compatibility.

This addresses the 'Standard Output Schemas' key feature of the GSoC
'Sentiment and Emotion Output Standardization' project.
---
 routes/video_routes.py                        |  73 +++++++++++
 schemas/standard_output_schema.py             |  81 ++++++++++++
 .../emotion_analysis/emotion_analysis_imp.py  | 122 ++++++++++++++++++
 .../emotion_analysis_service.py               |  13 +-
 4 files changed, 288 insertions(+), 1 deletion(-)
 create mode 100644 schemas/standard_output_schema.py

diff --git a/routes/video_routes.py b/routes/video_routes.py
index 6118f5a..2cd1e05 100644
--- a/routes/video_routes.py
+++ b/routes/video_routes.py
@@ -87,6 +87,79 @@ def process_video():
     return jsonify({"emotions": result}), 200
 
 
+def _download_and_prepare_video(video_name: str):
+    """Download a video from Firebase Storage and prepare it for analysis.
+
+    Returns the local file path to the (optionally trimmed) video,
+    or *None* when the download fails.
+    """
+    logger.info(f"Attempting to download video: {video_name} from storage.")
+    try:
+        os.makedirs("static/videos", exist_ok=True)
+        video_path = firebase_service.download_video_from_storage(video_name)
+        logger.info(f"Video downloaded successfully to: {video_path}")
+    except Exception as e:
+        logger.error(f"Failed to download video: {e}")
+        return None
+
+    # Reduce FPS when needed
+    try:
+        clip = VideoFileClip(video_path)
+        if clip.fps > 1:
+            logger.warning(f"High FPS detected ({clip.fps}). Reducing to 1fps.")
+            clip = clip.set_fps(1)
+        trimmed_path = video_path.replace(".webm", "_trimmed.mp4")
+        clip.write_videofile(trimmed_path, codec="libx264", audio=False, logger=None)
+        video_path = trimmed_path
+        logger.info(f"Trimmed video saved: {video_path}")
+    except Exception as e:
+        logger.warning(f"Failed to trim video, continuing anyway: {e}")
+
+    return video_path
+
+
+@video_routes.route("/process_video_standardized", methods=["POST", "OPTIONS"])
+def process_video_standardized():
+    """Analyze a video and return a **standardized** emotion output.
+
+    The response follows the ``StandardizedEmotionOutput`` schema which
+    includes analysis metadata, a chronological timeline of per-frame
+    emotion events (with confidence scores), and an aggregated summary.
+
+    Request JSON body:
+        ``{ "video_name": "<name-in-firebase-storage>" }``
+    """
+    if request.method == "OPTIONS":
+        return "", 204
+
+    video_name = request.json.get("video_name")
+    if not video_name:
+        return jsonify({"error": "Video name missing"}), 400
+
+    try:
+        video_path = _download_and_prepare_video(video_name)
+        if video_path is None:
+            return jsonify({"error": "Failed to download video"}), 500
+
+        emotion_analysis_service = EmotionsAnalysisImp(
+            model_path="models/model2/model2.h5"
+        )
+
+        start_analysis = time.time()
+        result = emotion_analysis_service.get_standardized_output(
+            video_path, video_name=video_name
+        )
+        elapsed = time.time() - start_analysis
+        logger.info(f"Standardized analysis completed in {elapsed:.2f}s")
+
+        delete_video()
+    except Exception as e:
+        logger.exception("Standardized video processing failed")
+        return jsonify({"error": "Video processing failed"}), 500
+
+    return jsonify(result.model_dump()), 200
+
+
 @video_routes.route("/test", methods=["GET"])
 def call_hello_world():
     logger.info("Attempting to call test firebase function.")
diff --git a/schemas/standard_output_schema.py b/schemas/standard_output_schema.py
new file mode 100644
index 0000000..960dae6
--- /dev/null
+++ b/schemas/standard_output_schema.py
@@ -0,0 +1,81 @@
+from pydantic import BaseModel, Field
+from typing import List, Optional
+from datetime import datetime
+
+
+class EmotionEvent(BaseModel):
+    """Represents a single emotion detection event at a specific point in time."""
+
+    timestamp_sec: float = Field(
+        ...,
+        description="Timestamp in seconds within the video when this emotion was detected.",
+    )
+    emotion: str = Field(
+        ...,
+        description="The predicted emotion label (e.g., 'Happy', 'Sad', 'Angry').",
+    )
+    confidence: float = Field(
+        ...,
+        ge=0.0,
+        le=1.0,
+        description="Model confidence score for the predicted emotion, ranging from 0.0 to 1.0.",
+    )
+
+
+class EmotionSummary(BaseModel):
+    """Aggregated emotion percentages across the entire video."""
+
+    Angry: float = Field(default=0.0, description="Percentage of frames classified as Angry.")
+    Disgusted: float = Field(default=0.0, description="Percentage of frames classified as Disgusted.")
+    Fearful: float = Field(default=0.0, description="Percentage of frames classified as Fearful.")
+    Happy: float = Field(default=0.0, description="Percentage of frames classified as Happy.")
+    Neutral: float = Field(default=0.0, description="Percentage of frames classified as Neutral.")
+    Sad: float = Field(default=0.0, description="Percentage of frames classified as Sad.")
+    Surprised: float = Field(default=0.0, description="Percentage of frames classified as Surprised.")
+
+
+class AnalysisMetadata(BaseModel):
+    """Metadata about the analysis run."""
+
+    video_name: str = Field(
+        ..., description="Name of the analyzed video file."
+    )
+    analysis_timestamp: str = Field(
+        default_factory=lambda: datetime.utcnow().isoformat() + "Z",
+        description="ISO 8601 timestamp of when the analysis was performed.",
+    )
+    total_frames_processed: int = Field(
+        default=0,
+        description="Total number of video frames that were processed.",
+    )
+    total_faces_detected: int = Field(
+        default=0,
+        description="Total number of face detections across all processed frames.",
+    )
+    video_duration_sec: Optional[float] = Field(
+        default=None,
+        description="Duration of the video in seconds, if available.",
+    )
+
+
+class StandardizedEmotionOutput(BaseModel):
+    """
+    Standardized output format for facial emotion analysis results.
+
+    This schema is designed to provide a structured, time-aware, and
+    metadata-rich output that can be consistently integrated into
+    RUXAILAB reports and dashboards, following the goals of the
+    'Sentiment and Emotion Output Standardization' GSoC project.
+    """
+
+    metadata: AnalysisMetadata = Field(
+        ..., description="Metadata about the analysis run."
+    )
+    timeline: List[EmotionEvent] = Field(
+        default_factory=list,
+        description="Chronologically ordered list of per-frame emotion detection events.",
+    )
+    summary: EmotionSummary = Field(
+        default_factory=EmotionSummary,
+        description="Aggregated emotion percentages across the entire video.",
+    )
diff --git a/services/emotion_analysis/emotion_analysis_imp.py b/services/emotion_analysis/emotion_analysis_imp.py
index 69f751b..ea7ea08 100644
--- a/services/emotion_analysis/emotion_analysis_imp.py
+++ b/services/emotion_analysis/emotion_analysis_imp.py
@@ -1,11 +1,18 @@
 import os
 from schemas.emotion_schema import GetEmotionPercentagesResponse
+from schemas.standard_output_schema import (
+    StandardizedEmotionOutput,
+    AnalysisMetadata,
+    EmotionEvent,
+    EmotionSummary,
+)
 from services.emotion_analysis.emotion_analysis_service import EmotionsAnalysisService
 import logging
 import coloredlogs
 from utils.utils import load_model, load_face_cascade, extract_features, predict_emotion, getPercentages
 import cv2
 
+
 class EmotionsAnalysisImp(EmotionsAnalysisService):
     def __init__(self, model_path: str):
         self.model = load_model(model_path)
@@ -92,3 +99,118 @@ def get_emotion_percentages(self, video_path: str) -> GetEmotionPercentagesRespo
             Sad=percentages['Sad'],
             Surprised=percentages['Surprised']
         )
+
+    def get_standardized_output(self, video_path: str, video_name: str = "") -> StandardizedEmotionOutput:
+        """
+        Analyze a video and return a fully standardized output including
+        metadata, a chronological timeline of per-frame emotion events
+        with confidence scores, and an aggregated summary.
+
+        This method extends the existing analysis pipeline to produce
+        structured, time-aware results suitable for integration into
+        RUXAILAB reports and dashboards.
+        """
+        labels = {
+            0: 'Angry', 1: 'Disgusted', 2: 'Fearful',
+            3: 'Happy', 4: 'Neutral', 5: 'Sad', 6: 'Surprised',
+        }
+        timeline: list[EmotionEvent] = []
+        predictions: list[str] = []
+
+        self.logger.info(f"[Standardized] Loading video from path: {video_path}")
+
+        # --- Handle missing / unopenable video ---
+        if not os.path.exists(video_path):
+            self.logger.error(f"Video file does not exist: {video_path}")
+            return StandardizedEmotionOutput(
+                metadata=AnalysisMetadata(video_name=video_name or os.path.basename(video_path)),
+                timeline=[],
+                summary=EmotionSummary(),
+            )
+
+        video = cv2.VideoCapture(video_path)
+        if not video.isOpened():
+            self.logger.error(f"Failed to open video file: {video_path}")
+            return StandardizedEmotionOutput(
+                metadata=AnalysisMetadata(video_name=video_name or os.path.basename(video_path)),
+                timeline=[],
+                summary=EmotionSummary(),
+            )
+
+        # Retrieve video duration from the capture object
+        fps = video.get(cv2.CAP_PROP_FPS) or 1.0
+        total_frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
+        video_duration_sec = total_frame_count / fps if fps > 0 else None
+
+        last_processed_second = -1
+        frame_count = 0
+        processed_frames = 0
+        face_count = 0
+
+        while True:
+            ret, im = video.read()
+            if not ret:
+                break
+
+            timestamp_ms = video.get(cv2.CAP_PROP_POS_MSEC)
+            current_second = int(timestamp_ms / 500)  # sample ~2 frames per second
+
+            if current_second == last_processed_second:
+                continue
+            last_processed_second = current_second
+
+            frame_count += 1
+            processed_frames += 1
+            timestamp_sec = round(timestamp_ms / 1000.0, 3)
+
+            gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
+            faces = self.face_cascade.detectMultiScale(gray, 1.3, 5)
+
+            try:
+                for (p, q, r, s) in faces:
+                    face_count += 1
+                    face_img = gray[q:q + s, p:p + r]
+                    face_img = cv2.resize(face_img, (48, 48))
+                    img = extract_features(face_img)
+                    pred = predict_emotion(self.model, img)
+
+                    prediction_label = labels[pred.argmax()]
+                    confidence = float(pred.max())
+
+                    predictions.append(prediction_label)
+                    timeline.append(
+                        EmotionEvent(
+                            timestamp_sec=timestamp_sec,
+                            emotion=prediction_label,
+                            confidence=round(confidence, 4),
+                        )
+                    )
+                    self.logger.info(
+                        f"[Standardized] t={timestamp_sec}s  emotion={prediction_label}  "
+                        f"confidence={confidence:.4f}"
+                    )
+            except cv2.error as e:
+                self.logger.error(f"OpenCV error at t={timestamp_sec}s: {e}")
+
+        video.release()
+
+        self.logger.info(f"[Standardized] Total frames: {frame_count}")
+        self.logger.info(f"[Standardized] Processed frames: {processed_frames}")
+        self.logger.info(f"[Standardized] Faces detected: {face_count}")
+
+        # Build aggregated summary
+        percentages = getPercentages(predictions)
+        summary = EmotionSummary(**percentages)
+
+        metadata = AnalysisMetadata(
+            video_name=video_name or os.path.basename(video_path),
+            total_frames_processed=processed_frames,
+            total_faces_detected=face_count,
+            video_duration_sec=video_duration_sec,
+        )
+
+        return StandardizedEmotionOutput(
+            metadata=metadata,
+            timeline=timeline,
+            summary=summary,
+        )
diff --git a/services/emotion_analysis/emotion_analysis_service.py b/services/emotion_analysis/emotion_analysis_service.py
index be557e2..19a1eb7 100644
--- a/services/emotion_analysis/emotion_analysis_service.py
+++ b/services/emotion_analysis/emotion_analysis_service.py
@@ -1,8 +1,19 @@
 from abc import ABC, abstractmethod
 
 from schemas.emotion_schema import GetEmotionPercentagesResponse
+from schemas.standard_output_schema import StandardizedEmotionOutput
+
 
 class EmotionsAnalysisService(ABC):
     @abstractmethod
     def get_emotion_percentages(self, video_path: str) -> GetEmotionPercentagesResponse:
-        pass
\ No newline at end of file
+        pass
+
+    @abstractmethod
+    def get_standardized_output(self, video_path: str, video_name: str = "") -> StandardizedEmotionOutput:
+        """
+        Analyze a video and return a standardized output that includes
+        metadata, a chronological timeline of emotion events with
+        confidence scores, and an aggregated summary.
+        """
+        pass