From 60498137f27e8cbd3febfc3996dff73c59d24f52 Mon Sep 17 00:00:00 2001 From: cc-fuyu Date: Fri, 27 Feb 2026 11:36:38 -0500 Subject: [PATCH] feat: add standardized time-series emotion output schema Introduce a new StandardizedEmotionOutput schema that includes: - AnalysisMetadata: video name, analysis timestamp, frame/face counts, duration - EmotionEvent timeline: per-frame emotion label, timestamp, and confidence score - EmotionSummary: aggregated emotion percentages (backward-compatible) Add get_standardized_output() method to EmotionsAnalysisImp and a new /process_video_standardized endpoint that returns the structured output. The original /process_video endpoint remains unchanged for backward compatibility. This addresses the 'Standard Output Schemas' key feature of the GSoC 'Sentiment and Emotion Output Standardization' project. --- routes/video_routes.py | 73 +++++++++++ schemas/standard_output_schema.py | 81 ++++++++++++ .../emotion_analysis/emotion_analysis_imp.py | 122 ++++++++++++++++++ .../emotion_analysis_service.py | 13 +- 4 files changed, 288 insertions(+), 1 deletion(-) create mode 100644 schemas/standard_output_schema.py diff --git a/routes/video_routes.py b/routes/video_routes.py index 6118f5a..2cd1e05 100644 --- a/routes/video_routes.py +++ b/routes/video_routes.py @@ -87,6 +87,79 @@ def process_video(): return jsonify({"emotions": result}), 200 +def _download_and_prepare_video(video_name: str): + """Download a video from Firebase Storage and prepare it for analysis. + + Returns the local file path to the (optionally trimmed) video, + or *None* when the download fails. + """ + logger.info(f"Attempting to download video: {video_name} from storage.") + try: + os.makedirs("static/videos", exist_ok=True) + video_path = firebase_service.download_video_from_storage(video_name) + logger.info(f"Video downloaded successfully to: {video_path}") + except Exception as e: + logger.error(f"Failed to download video: {e}") + return None + + # Reduce FPS when needed + try: + clip = VideoFileClip(video_path) + if clip.fps > 1: + logger.warning(f"High FPS detected ({clip.fps}). Reducing to 1fps.") + clip = clip.set_fps(1) + trimmed_path = video_path.replace(".webm", "_trimmed.mp4") + clip.write_videofile(trimmed_path, codec="libx264", audio=False, logger=None) + video_path = trimmed_path + logger.info(f"Trimmed video saved: {video_path}") + except Exception as e: + logger.warning(f"Failed to trim video, continuing anyway: {e}") + + return video_path + + +@video_routes.route("/process_video_standardized", methods=["POST", "OPTIONS"]) +def process_video_standardized(): + """Analyze a video and return a **standardized** emotion output. + + The response follows the ``StandardizedEmotionOutput`` schema which + includes analysis metadata, a chronological timeline of per-frame + emotion events (with confidence scores), and an aggregated summary. + + Request JSON body: + ``{ "video_name": "" }`` + """ + if request.method == "OPTIONS": + return "", 204 + + video_name = request.json.get("video_name") + if not video_name: + return jsonify({"error": "Video name missing"}), 400 + + try: + video_path = _download_and_prepare_video(video_name) + if video_path is None: + return jsonify({"error": "Failed to download video"}), 500 + + emotion_analysis_service = EmotionsAnalysisImp( + model_path="models/model2/model2.h5" + ) + + start_analysis = time.time() + result = emotion_analysis_service.get_standardized_output( + video_path, video_name=video_name + ) + elapsed = time.time() - start_analysis + logger.info(f"Standardized analysis completed in {elapsed:.2f}s") + + delete_video() + except Exception as e: + logger.exception("Standardized video processing failed") + return jsonify({"error": "Video processing failed"}), 500 + + return jsonify(result.model_dump()), 200 + + @video_routes.route("/test", methods=["GET"]) def call_hello_world(): logger.info("Attempting to call test firebase function.") diff --git a/schemas/standard_output_schema.py b/schemas/standard_output_schema.py new file mode 100644 index 0000000..960dae6 --- /dev/null +++ b/schemas/standard_output_schema.py @@ -0,0 +1,81 @@ +from pydantic import BaseModel, Field +from typing import List, Optional +from datetime import datetime + + +class EmotionEvent(BaseModel): + """Represents a single emotion detection event at a specific point in time.""" + + timestamp_sec: float = Field( + ..., + description="Timestamp in seconds within the video when this emotion was detected.", + ) + emotion: str = Field( + ..., + description="The predicted emotion label (e.g., 'Happy', 'Sad', 'Angry').", + ) + confidence: float = Field( + ..., + ge=0.0, + le=1.0, + description="Model confidence score for the predicted emotion, ranging from 0.0 to 1.0.", + ) + + +class EmotionSummary(BaseModel): + """Aggregated emotion percentages across the entire video.""" + + Angry: float = Field(default=0.0, description="Percentage of frames classified as Angry.") + Disgusted: float = Field(default=0.0, description="Percentage of frames classified as Disgusted.") + Fearful: float = Field(default=0.0, description="Percentage of frames classified as Fearful.") + Happy: float = Field(default=0.0, description="Percentage of frames classified as Happy.") + Neutral: float = Field(default=0.0, description="Percentage of frames classified as Neutral.") + Sad: float = Field(default=0.0, description="Percentage of frames classified as Sad.") + Surprised: float = Field(default=0.0, description="Percentage of frames classified as Surprised.") + + +class AnalysisMetadata(BaseModel): + """Metadata about the analysis run.""" + + video_name: str = Field( + ..., description="Name of the analyzed video file." + ) + analysis_timestamp: str = Field( + default_factory=lambda: datetime.utcnow().isoformat() + "Z", + description="ISO 8601 timestamp of when the analysis was performed.", + ) + total_frames_processed: int = Field( + default=0, + description="Total number of video frames that were processed.", + ) + total_faces_detected: int = Field( + default=0, + description="Total number of face detections across all processed frames.", + ) + video_duration_sec: Optional[float] = Field( + default=None, + description="Duration of the video in seconds, if available.", + ) + + +class StandardizedEmotionOutput(BaseModel): + """ + Standardized output format for facial emotion analysis results. + + This schema is designed to provide a structured, time-aware, and + metadata-rich output that can be consistently integrated into + RUXAILAB reports and dashboards, following the goals of the + 'Sentiment and Emotion Output Standardization' GSoC project. + """ + + metadata: AnalysisMetadata = Field( + ..., description="Metadata about the analysis run." + ) + timeline: List[EmotionEvent] = Field( + default_factory=list, + description="Chronologically ordered list of per-frame emotion detection events.", + ) + summary: EmotionSummary = Field( + default_factory=EmotionSummary, + description="Aggregated emotion percentages across the entire video.", + ) diff --git a/services/emotion_analysis/emotion_analysis_imp.py b/services/emotion_analysis/emotion_analysis_imp.py index 69f751b..ea7ea08 100644 --- a/services/emotion_analysis/emotion_analysis_imp.py +++ b/services/emotion_analysis/emotion_analysis_imp.py @@ -1,11 +1,18 @@ import os from schemas.emotion_schema import GetEmotionPercentagesResponse +from schemas.standard_output_schema import ( + StandardizedEmotionOutput, + AnalysisMetadata, + EmotionEvent, + EmotionSummary, +) from services.emotion_analysis.emotion_analysis_service import EmotionsAnalysisService import logging import coloredlogs from utils.utils import load_model, load_face_cascade, extract_features, predict_emotion, getPercentages import cv2 + class EmotionsAnalysisImp(EmotionsAnalysisService): def __init__(self, model_path: str): self.model = load_model(model_path) @@ -92,3 +99,118 @@ def get_emotion_percentages(self, video_path: str) -> GetEmotionPercentagesRespo Sad=percentages['Sad'], Surprised=percentages['Surprised'] ) + + def get_standardized_output(self, video_path: str, video_name: str = "") -> StandardizedEmotionOutput: + """ + Analyze a video and return a fully standardized output including + metadata, a chronological timeline of per-frame emotion events + with confidence scores, and an aggregated summary. + + This method extends the existing analysis pipeline to produce + structured, time-aware results suitable for integration into + RUXAILAB reports and dashboards. + """ + labels = { + 0: 'Angry', 1: 'Disgusted', 2: 'Fearful', + 3: 'Happy', 4: 'Neutral', 5: 'Sad', 6: 'Surprised', + } + timeline: list[EmotionEvent] = [] + predictions: list[str] = [] + + self.logger.info(f"[Standardized] Loading video from path: {video_path}") + + # --- Handle missing / unopenable video --- + if not os.path.exists(video_path): + self.logger.error(f"Video file does not exist: {video_path}") + return StandardizedEmotionOutput( + metadata=AnalysisMetadata(video_name=video_name or os.path.basename(video_path)), + timeline=[], + summary=EmotionSummary(), + ) + + video = cv2.VideoCapture(video_path) + if not video.isOpened(): + self.logger.error(f"Failed to open video file: {video_path}") + return StandardizedEmotionOutput( + metadata=AnalysisMetadata(video_name=video_name or os.path.basename(video_path)), + timeline=[], + summary=EmotionSummary(), + ) + + # Retrieve video duration from the capture object + fps = video.get(cv2.CAP_PROP_FPS) or 1.0 + total_frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) + video_duration_sec = total_frame_count / fps if fps > 0 else None + + last_processed_second = -1 + frame_count = 0 + processed_frames = 0 + face_count = 0 + + while True: + ret, im = video.read() + if not ret: + break + + timestamp_ms = video.get(cv2.CAP_PROP_POS_MSEC) + current_second = int(timestamp_ms / 500) # sample ~2 frames per second + + if current_second == last_processed_second: + continue + last_processed_second = current_second + + frame_count += 1 + processed_frames += 1 + timestamp_sec = round(timestamp_ms / 1000.0, 3) + + gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) + faces = self.face_cascade.detectMultiScale(gray, 1.3, 5) + + try: + for (p, q, r, s) in faces: + face_count += 1 + face_img = gray[q:q + s, p:p + r] + face_img = cv2.resize(face_img, (48, 48)) + img = extract_features(face_img) + pred = predict_emotion(self.model, img) + + prediction_label = labels[pred.argmax()] + confidence = float(pred.max()) + + predictions.append(prediction_label) + timeline.append( + EmotionEvent( + timestamp_sec=timestamp_sec, + emotion=prediction_label, + confidence=round(confidence, 4), + ) + ) + self.logger.info( + f"[Standardized] t={timestamp_sec}s emotion={prediction_label} " + f"confidence={confidence:.4f}" + ) + except cv2.error as e: + self.logger.error(f"OpenCV error at t={timestamp_sec}s: {e}") + + video.release() + + self.logger.info(f"[Standardized] Total frames: {frame_count}") + self.logger.info(f"[Standardized] Processed frames: {processed_frames}") + self.logger.info(f"[Standardized] Faces detected: {face_count}") + + # Build aggregated summary + percentages = getPercentages(predictions) + summary = EmotionSummary(**percentages) + + metadata = AnalysisMetadata( + video_name=video_name or os.path.basename(video_path), + total_frames_processed=processed_frames, + total_faces_detected=face_count, + video_duration_sec=video_duration_sec, + ) + + return StandardizedEmotionOutput( + metadata=metadata, + timeline=timeline, + summary=summary, + ) diff --git a/services/emotion_analysis/emotion_analysis_service.py b/services/emotion_analysis/emotion_analysis_service.py index be557e2..19a1eb7 100644 --- a/services/emotion_analysis/emotion_analysis_service.py +++ b/services/emotion_analysis/emotion_analysis_service.py @@ -1,8 +1,19 @@ from abc import ABC, abstractmethod from schemas.emotion_schema import GetEmotionPercentagesResponse +from schemas.standard_output_schema import StandardizedEmotionOutput + class EmotionsAnalysisService(ABC): @abstractmethod def get_emotion_percentages(self, video_path: str) -> GetEmotionPercentagesResponse: - pass \ No newline at end of file + pass + + @abstractmethod + def get_standardized_output(self, video_path: str, video_name: str = "") -> StandardizedEmotionOutput: + """ + Analyze a video and return a standardized output that includes + metadata, a chronological timeline of emotion events with + confidence scores, and an aggregated summary. + """ + pass