openmpf · kburkewv · Jun 27, 2025 · Jul 15, 2025 · Jul 15, 2025 · Jul 16, 2025
diff --git a/python/GeminiVideoSummarization/Dockerfile b/python/GeminiVideoSummarization/Dockerfile
@@ -0,0 +1,67 @@
+# syntax=docker/dockerfile:experimental
+
+#############################################################################
+# NOTICE                                                                    #
+#                                                                           #
+# This software (or technical data) was produced for the U.S. Government    #
+# under contract, and is subject to the Rights in Data-General Clause       #
+# 52.227-14, Alt. IV (DEC 2007).                                            #
+#                                                                           #
+# Copyright 2024 The MITRE Corporation. All Rights Reserved.                #
+#############################################################################
+
+#############################################################################
+# Copyright 2024 The MITRE Corporation                                      #
+#                                                                           #
+# Licensed under the Apache License, Version 2.0 (the "License");           #
+# you may not use this file except in compliance with the License.          #
+# You may obtain a copy of the License at                                   #
+#                                                                           #
+#    http://www.apache.org/licenses/LICENSE-2.0                             #
+#                                                                           #
+# Unless required by applicable law or agreed to in writing, software       #
+# distributed under the License is distributed on an "AS IS" BASIS,         #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  #
+# See the License for the specific language governing permissions and       #
+# limitations under the License.                                            #
+#############################################################################
+
+ARG BUILD_REGISTRY
+ARG BUILD_TAG=latest
+FROM ${BUILD_REGISTRY}openmpf_python_executor_ssb:${BUILD_TAG}
+
+RUN --mount=type=tmpfs,target=/var/cache/apt \
+    --mount=type=tmpfs,target=/var/lib/apt/lists  \
+    --mount=type=tmpfs,target=/tmp \
+    apt-get update; \
+    DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y wget \
+    # For Google Gemini 
+    # After installing the following /usr/bin will have:
+    # python3 -> python3.8
+    # python3.8
+    # python3.9
+    python3.9 python3.9-venv libpython3.9
+
+# Create separate venv for Python 3.9 subprocess
+RUN mkdir -p /gemini-subprocess/venv; \
+    python3.9 -m venv /gemini-subprocess/venv; \
+    /gemini-subprocess/venv/bin/pip3 install google-genai google-cloud-storage pytictoc
+
+COPY gemini-process-video.py /gemini-subprocess
+
+RUN pip3 install --upgrade pip
+
+RUN pip3 install tenacity
+
+ARG RUN_TESTS=false
+
+RUN --mount=target=.,readwrite \
+    install-component.sh; \
+    if [ "${RUN_TESTS,,}" == true ]; then python tests/test_gemini.py; fi
+
+LABEL org.label-schema.license="Apache 2.0" \
+      org.label-schema.name="OpenMPF Gemini Video Summarization" \
+      org.label-schema.schema-version="1.0" \
+      org.label-schema.url="https://openmpf.github.io" \
+      org.label-schema.vcs-url="https://github.com/openmpf/openmpf-components" \
+      org.label-schema.vendor="MITRE"
diff --git a/python/GeminiVideoSummarization/README.md b/python/GeminiVideoSummarization/README.md
@@ -0,0 +1,70 @@
+# Overview
+
+This repository contains source code for the OpenMPF Gemini Video Summarization Component.
+
+This component analyzes a video with a summary and event timeline using Vertex AI on the Google Cloud Platform(GCP) and their provided models. By default, this component does a general video summarization and event timeline.
+
+# Job Properties
+
+The following are the properties that can be specified for the component. The first eight properties have default values, but the other properties have to be specified for this component.
+
+- `MODEL_NAME`: The model name for which Gemini model to use.
+- `GENERATION_PROMPT_PATH`: Path to txt file which contains prompt.
+- `GENERATION_MAX_ATTEMPTS`: The number of attempts to get a valid JSON response from the model.
+- `TIMELINE_CHECK_TARGET_THRESHOLD`: Specifies the number of seconds that video events can occur before or after video bounds. If exceeded, another attempt will be made to generate the output. Set to -1 to disable check.
+- `MERGE_TRACKS`: In the context of videos, when set to true, attempt to merge tracks from the entire video.
+- `TARGET_SEGMENT_LENGTH`: The length of segments the video is divided into.
+- `VFR_TARGET_SEGMENT_LENGTH`: The length of segments the video is divided into.
+- `PROCESS_FPS`: The amount of frames to process per second. The FPS processing limit for Gemini is (0.0, 24.0]
+- `ENABLE_TIMELINE`: When set to 1, this enables the creation of timelines. If there is no custom prompt file, it will use default_prompt.txt. When set to 0, this disables timelines. If there is no custom prompt file, it will use default_prompt_no_tl.txt.
+
+REQUIRED PROPERTIES:
+- `PROJECT_ID`: The project ID for your GCP project.
+- `BUCKET_NAME`: The GCP bucket that holds the data for processing.
+- `LABEL_PREFIX`: The prefix of your labels. ie. Your project name
+- `LABEL_USER`: The user using the GCP resources.
+- `LABEL_PURPOSE`: The reason for using the GCP resources.
+- `GOOGLE_APPLICATION_CREDENTIALS`: Your gcloud CLI credentials in a json file.
+
+# Custom Prompts
+
+For the default prompt with timelines enabled refer to gemini_video_summarization_component/data/default_prompt.txt. 
+
+For the default prompt WITHOUT timelines enabled refer to gemini_video_summarization_component/data/default_prompt_no_tl.txt.
+
+Set GENERATION_PROMPT_PATH to specify the path to a txt file containing a generation prompt to provide the model. 
+
+When making a custom prompt or altering the default prompt with timelines enabled, it is required that the parts about timestamp formatting, timestamp offsets and the JSON structured output are included in your prompt.
+
+# GCP Certificate
+
+Using this component required access to Vertex AI. Which means adding a GCP certificate required. 
+Set GOOGLE_APPLICATION_CREDENTIALS to specify the path to the GCP certificate JSON file.
+
+# Timestamps
+
+When videos exceed 2 minutes in length, timestamps for events become more inaccurate. For accurate timestamps, it is recommended to keep TARGET_SEGMENT_LENGTH and VFR_TARGET_SEGMENT_LENGTH at 120.
+If you'd prefer more cohesive summaries over timeline accuracy, you can pass the whole video as one segment by setting TARGET_SEGMENT_LENGTH and VFR_TARGET_SEGMENT_LENGTH to -1. 
+Keep in mind the maximum length of a video that can be processed is 45 minutes(2700s). 
+This means TARGET_SEGMENT_LENGTH and VFR_TARGET_SEGMENT_LENGTH both have a max of 2700 and are REQUIRED to be set for videos longer than 45 minutes.
+
+To prevent further inaccuracies, Gemini does timestamps best when formatting them in MM:SS format. This means the component does conversions between that format to seconds and back itself. 
+So, if altering the prompt, leave in instructions about timestamp formatting.
+
+Another cause of timestamp inaccuracies is the model you are using. Not only does descriptions and summaries lower in quality with Gemini flash models, the timestamps are also become more inaccurate.
+For the most accurate timestamps, use segmentation and the latest Gemini pro model.
+
+# Docker Container
+  gemini-video-summarization:
+    <<: *detection-component-base
+    image: <IMAGE>
+    volumes:
+      - host_directory/cert_name.json:container_directory/cert_name.json:ro # Mount the GCP file from your localhost to the container
+      - host_directory/prompt_file.txt:container_directory/prompt_file.txt:ro # OPTIONAL: Mount the custom prompt file from your localhost to the container
+      - shared_data:/opt/mpf/share
+    environment:
+      - MPF_PROP_PROJECT_ID=<PROJECT ID>
+      - MPF_PROP_BUCKET_NAME=<BUCKET NAME>
+    ... # Add more properties here
+      - MPF_PROP_GOOGLE_APPLICATION_CREDENTIALS=container_directory/cert_name.json
+      - MPF_PROP_GENERATION_PROMPT_PATH=container_directory/prompt_file.txt # OPTIONAL, but needed if mounted a custom prompt file
diff --git a/python/GeminiVideoSummarization/gemini-process-video.py b/python/GeminiVideoSummarization/gemini-process-video.py
@@ -0,0 +1,160 @@
+#############################################################################
+# NOTICE                                                                    #
+#                                                                           #
+# This software (or technical data) was produced for the U.S. Government    #
+# under contract, and is subject to the Rights in Data-General Clause       #
+# 52.227-14, Alt. IV (DEC 2007).                                            #
+#                                                                           #
+# Copyright 2024 The MITRE Corporation. All Rights Reserved.                #
+#############################################################################
+
+#############################################################################
+# Copyright 2024 The MITRE Corporation                                      #
+#                                                                           #
+# Licensed under the Apache License, Version 2.0 (the "License");           #
+# you may not use this file except in compliance with the License.          #
+# You may obtain a copy of the License at                                   #
+#                                                                           #
+#    http://www.apache.org/licenses/LICENSE-2.0                             #
+#                                                                           #
+# Unless required by applicable law or agreed to in writing, software       #
+# distributed under the License is distributed on an "AS IS" BASIS,         #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  #
+# See the License for the specific language governing permissions and       #
+# limitations under the License.                                            #
+#############################################################################
+
+import argparse
+import json
+import os
+import sys
+
+from google import genai
+from google.genai import types
+from google.genai.types import Part
+from google.cloud import storage
+from google.genai.errors import ClientError
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Sends image and prompt to Gemini Client for processing.')
+
+    parser.add_argument("--model", "-m", type=str, default="gemini-2.5-flash", help="The name of the Gemini model to use.")
+    parser.add_argument("--data_uri", "-d", type=str, required=True, help="Path to the media file to process with Gemini.")
+    parser.add_argument("--prompt", "-p", type=str, help="The prompt you want to use with the video.")
+    parser.add_argument("--google_application_credientials", "-c", type=str, required=True, help="The JSON file to your credentials to use Vertex AI.")
+    parser.add_argument("--project_id", "-i", type=str, required=True, help="Name of your GCP project.")
+    parser.add_argument("--bucket_name", "-b", type=str, required=True, help="Name of the GCP bucket.")
+    parser.add_argument("--label_prefix", "-l", type=str, required=True, help="Label prefix to use when uploading the video to GCP.")
+    parser.add_argument("--label_user", "-u", type=str, required=True, help="User of whom is accessing the GCP resources.")
+    parser.add_argument("--label_purpose", "-r", type=str, required=True, help="Purpose of accessing the GCP resources.")
+    parser.add_argument("--segment_start", "-s", type=str, required=True, help="Start time of the current segment.")
+    parser.add_argument("--segment_stop", "-e", type=str, required=True, help="End time of the current segment.")
+    parser.add_argument("--fps", "-f", type=str, default="1.0", help="Specifies the number of frames per second of video to process.")
+
+    args = parser.parse_args()
+
+    try:
+        os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = args.google_application_credientials
+
+        # GCP resources
+        USER = args.label_user
+        PURPOSE = args.label_purpose
+        LABEL_PREFIX = args.label_prefix
+        PROJECT_ID = args.project_id
+        BUCKET_NAME = args.bucket_name
+
+        PROMPT = args.prompt
+        MODEL = args.model
+
+        # Video segment storage information
+        FILE_PATH = args.data_uri
+        FILE_NAME = os.path.basename(FILE_PATH)
+        STORAGE_PATH = USER + "/" + FILE_NAME
+
+        SEGMENT_START = int(float(args.segment_start))
+        SEGMENT_STOP = int(float(args.segment_stop))
+        FPS = float(args.fps)
+
+        # Automatically uses ADC to authenticate
+        client = storage.Client(
+            project=PROJECT_ID
+        )
+
+        # Uploads file to GCP bucket
+        bucket = client.bucket(BUCKET_NAME)
+        blob = bucket.blob(STORAGE_PATH)
+
+        # There is no way to set a time-to-live (TTL) for a file in Google Storage.
+        # The file will be deleted manually at the end of this script.
+        # If you want to set a TTL, you can use the `lifecycle` configuration in the bucket settings.
+        # See: https://cloud.google.com/storage/docs/lifecycle
+        # For example, you can set a rule to delete files older than 30 days.
+        blob.upload_from_filename(FILE_PATH)
+
+        file_uri = f"gs://{BUCKET_NAME}/{STORAGE_PATH}"
+
+        # Automatically uses ADC to authenticate
+        client = genai.Client(
+            project=PROJECT_ID,
+            location="global",
+            vertexai=True
+        )
+
+        content_config = None
+        if(USER != "" and LABEL_PREFIX != "" and PURPOSE != ""):
+            # Use labels to help track billing and usage
+            content_config = types.GenerateContentConfig(
+                labels={
+                    LABEL_PREFIX + "user": USER,
+                    LABEL_PREFIX + "purpose": PURPOSE,
+                    LABEL_PREFIX + "modality": "video"
+                }
+            )
+
+        response = client.models.generate_content(
+            model=MODEL,
+            contents=types.Content(
+                role='user',
+                parts=[
+                    Part(
+                        file_data=types.FileData(
+                            file_uri=file_uri,
+                            mime_type='video/mp4'
+                        ),
+                        video_metadata=types.VideoMetadata(
+                            start_offset=str(SEGMENT_START) + "s", 
+                            end_offset=str(SEGMENT_STOP) + "s", 
+                            fps=FPS
+                        )
+                    ),
+                    Part(
+                        text=PROMPT
+                    )
+                ],
+            ),
+            config=content_config
+        )
+        print(f"\n{response.text}")
+
+        # Set a generation-match precondition to avoid potential race conditions
+        # and data corruptions. The request to delete is aborted if the object's
+        # generation number does not match your precondition.
+        blob.reload()  # fetch blob metadata to use in generation_match_precondition.
+        generation_match_precondition = blob.generation
+
+        blob.delete(if_generation_match=generation_match_precondition)
+
+        sys.exit(0)
+    except ClientError as e:
+        if hasattr(e, 'code') and e.code == 429:
+            print("Caught a ResourceExhausted error (429 Too Many Requests)", file=sys.stderr)
+            sys.exit(1)
+        raise
+    except Exception as e:
+        err_str = str(e)
+        print(err_str, file=sys.stderr)
+        sys.exit(1)
+
+if __name__ == "__main__":
+    main()
diff --git a/python/GeminiVideoSummarization/gemini_video_summarization_component/__init__.py b/python/GeminiVideoSummarization/gemini_video_summarization_component/__init__.py
@@ -0,0 +1,27 @@
+#############################################################################
+# NOTICE                                                                    #
+#                                                                           #
+# This software (or technical data) was produced for the U.S. Government    #
+# under contract, and is subject to the Rights in Data-General Clause       #
+# 52.227-14, Alt. IV (DEC 2007).                                            #
+#                                                                           #
+# Copyright 2025 The MITRE Corporation. All Rights Reserved.                #
+#############################################################################
+
+#############################################################################
+# Copyright 2025 The MITRE Corporation                                      #
+#                                                                           #
+# Licensed under the Apache License, Version 2.0 (the "License");           #
+# you may not use this file except in compliance with the License.          #
+# You may obtain a copy of the License at                                   #
+#                                                                           #
+#    http://www.apache.org/licenses/LICENSE-2.0                             #
+#                                                                           #
+# Unless required by applicable law or agreed to in writing, software       #
+# distributed under the License is distributed on an "AS IS" BASIS,         #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  #
+# See the License for the specific language governing permissions and       #
+# limitations under the License.                                            #
+#############################################################################
+
+from .gemini_video_summarization_component import GeminiVideoSummarizationComponent
diff --git a/python/GeminiVideoSummarization/gemini_video_summarization_component/data/default_prompt.txt b/python/GeminiVideoSummarization/gemini_video_summarization_component/data/default_prompt.txt
@@ -0,0 +1,35 @@
+You are a helpful assistant analyzing the provided video.
+
+First, write a brief summary of the video content.
+
+Next, create a timeline of key events that spans the full duration of the video. Include at least one event.
+For each event, provide 
+    - a short description
+    - the start time
+    - the end time.
+
+Use MM:SS format for all timestamps, even if the total time exceeds 60 minutes. DO NOT USE HH:MM:SS. For example, use 75:30 instead of 01:15:30.
+
+Assume the video starts at 00:00, even if it is a segment from a longer recording.
+
+Respond only with a JSON object in the following format. Do not include any extra text, commentary, or explanations:
+{
+    "video_summary": STRING,
+        "video_event_timeline": [
+            {
+                "timestamp_start": STRING,
+                "timestamp_end": STRING,
+                "description": STRING
+            },
+            {
+                "timestamp_start": STRING,
+                "timestamp_end": STRING,
+                "description": STRING
+            },
+            {
+                "timestamp_start": STRING,
+                "timestamp_end": STRING,
+                "description": STRING
+            }
+        ]
+}
diff --git a/...miniVideoSummarization/gemini_video_summarization_component/data/default_prompt_no_tl.txt b/...miniVideoSummarization/gemini_video_summarization_component/data/default_prompt_no_tl.txt
@@ -0,0 +1,8 @@
+You are a helpful assistant analyzing the provided video.
+
+Write a brief summary of the video content.
+
+Respond only with a JSON object in the following format. Do not include any extra text, commentary, or explanations:
+{
+    "video_summary": STRING
+}