diff --git a/ai/generative-ai-service/Video-Image-Analysis-using-Langchain/README.md b/ai/generative-ai-service/Video-Image-Analysis-using-Langchain/README.md new file mode 100644 index 000000000..4f505563f --- /dev/null +++ b/ai/generative-ai-service/Video-Image-Analysis-using-Langchain/README.md @@ -0,0 +1,70 @@ +# Video Content PG Rating Analyzer + +This is a Generative AI-powered application that analyzes frames from uploaded videos or images to determine their suitability for PG-rated audiences. The application leverages Oracle Cloud Infrastructure (OCI) Generative AI Vision models, specifically Llama 3.2 in this case, to evaluate visual content for explicit or age-inappropriate material. This can also be adapted as needed to extract specific elements, text (such as license plates) and other use-cases. + +## Features +- Upload image or video files (`.jpg`, `.png`, `.mp4`, `.avi`, `.mov`). +- Automatically extract frames from videos at a user-defined interval. +- Use OCI Generative AI to assess frame content for PG-appropriateness. +- Highlight frames flagged as inappropriate with detailed reasons and timecodes. +- Adjust AI confidence threshold and frame extraction interval. (Effects the prompt for confidence) + +## Prerequisites +Before running the application, ensure you have: +- Python 3.8 or later installed +- An active Oracle Cloud Infrastructure (OCI) account +- Required Python dependencies installed +- OCI Generative AI model name and compartment ID + +## How It Works +1. **Upload Media:** + - Users upload a video or image file for analysis. +2. **Frame Extraction:** + - For videos, the app extracts frames at a selected interval. +3. **AI Analysis:** + - Each frame is encoded and sent to an OCI Vision model for analysis. + - The AI responds with structured output indicating whether content is PG-appropriate. +4. **Result Display:** + - Inappropriate frames (based on confidence threshold) are displayed along with the reason and timecode. + - A final PG-rating verdict is shown at the end. + +## Example Output +```json +{ + "AgeAppropriate": "not-appropriate", + "response": "Shows intense violence and blood spatter.", + "ConfidenceLevel": 0.97 +} +``` + +## Installation +Clone this repository and navigate to the project directory: +```bash +git clone +cd +``` + +Install the required dependencies: +```bash +pip install -r requirements.txt +``` + +## Configuration +To integrate with OCI Generative AI, update the following parameters in the code: +```python +llm = ChatOCIGenAI( + model_id="Add your model name", + compartment_id="Add your compartment ID", + model_kwargs={"temperature": 0, "max_tokens": 2000}, +) +``` + +Replace `model_id` and `compartment_id` with the appropriate values from your OCI console. + +## Running the Application +Run the Streamlit app using: +```bash +streamlit run .py +``` + +Replace `.py` with the filename of your main script (e.g., `video_analyzer.py`). diff --git a/ai/generative-ai-service/Video-Image-Analysis-using-Langchain/imageLlamaVideo.py b/ai/generative-ai-service/Video-Image-Analysis-using-Langchain/imageLlamaVideo.py new file mode 100644 index 000000000..7a4a2c4ec --- /dev/null +++ b/ai/generative-ai-service/Video-Image-Analysis-using-Langchain/imageLlamaVideo.py @@ -0,0 +1,155 @@ +# ======================================== +# Imports +# ======================================== +from langchain.chains.llm import LLMChain +from langchain_core.prompts import PromptTemplate +from langchain_community.chat_models.oci_generative_ai import ChatOCIGenAI +from langchain.document_loaders import PyPDFLoader +from langchain_core.messages import HumanMessage, SystemMessage +from langchain.docstore.document import Document + +import streamlit as st +import io +import base64 +import cv2 +import os +import ast +from datetime import timedelta + +# ======================================== +# Helper Functions +# ======================================== + +def encode_image(image_path): + """Encodes an image to base64 format for LLM input.""" + with open(image_path, "rb") as image_file: + return base64.b64encode(image_file.read()).decode("utf-8") + + +def extract_frames(video_path, interval, output_folder="frames"): + """ + Extracts frames from a video at a specified interval. + + Args: + video_path (str): Path to the video file. + interval (int): Frame extraction interval. + output_folder (str): Directory to store extracted frames. + + Returns: + list: Tuples containing frame file paths and corresponding timecodes. + """ + os.makedirs(output_folder, exist_ok=True) + video_capture = cv2.VideoCapture(video_path) + frame_count = 0 + extracted_frames = [] + frame_rate = int(video_capture.get(cv2.CAP_PROP_FPS)) + + while True: + ret, frame = video_capture.read() + if not ret: + break + + if frame_count % interval == 0: + frame_path = os.path.join(output_folder, f"frame_{frame_count}.jpg") + cv2.imwrite(frame_path, frame) + timecode = str(timedelta(seconds=frame_count // frame_rate)) + extracted_frames.append((frame_path, timecode)) + + frame_count += 1 + + video_capture.release() + return extracted_frames + +# ======================================== +# Streamlit App UI and Logic +# ======================================== + +def videoAnalyze(): + # Title of the app + st.title("Analyze Images and Videos with OCI Generative AI") + + # Sidebar inputs + with st.sidebar: + st.title("Parameters") + st.selectbox("Output Language", ["English", "French"]) + confidenceThreshold = st.slider("Confidence Threshold", 0.0, 1.0) + st.caption("Adjust the corresponding parameters to control the AI's responses and accuracy") + interval = st.slider("Select the desired interval: ", 1, 48) + + # Optional: Custom styling + with open('style.css') as f: + st.markdown(f'', unsafe_allow_html=True) + + # File upload + uploaded_file = st.file_uploader("Upload an image or video", type=["png", "jpg", "jpeg", "mp4", "avi", "mov"]) + user_prompt = st.text_input("Enter your prompt for analysis:", value="Is this frame suitable for PG-rated movies?") + + if uploaded_file is not None: + # Save the uploaded file locally + temp_video_path = "temp_uploaded_video.mp4" + with open(temp_video_path, "wb") as f: + f.write(uploaded_file.getbuffer()) + + # Check if file is a video + if uploaded_file.type.startswith("video"): + # Extract frames at defined interval + with st.spinner("Extracting frames from the video..."): + frames_with_timecodes = extract_frames(temp_video_path, interval) + st.success(f"Extracted {len(frames_with_timecodes)} frames for analysis.") + + # Instantiate the OCI Generative AI Vision model + llm = ChatOCIGenAI( + model_id="meta.llama-3.2-90b-vision-instruct", + compartment_id="", # <-- Add your compartment ID here + model_kwargs={"max_tokens": 2000, "temperature": 0} + ) + + # Loop through each frame for analysis + violence_detected = False + for frame_path, timecode in frames_with_timecodes: + with st.spinner("Analyzing the frame..."): + try: + # Prepare the frame and messages + encoded_frame = encode_image(frame_path) + human_message = HumanMessage( + content=[ + {"type": "text", "text": user_prompt}, + {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_frame}"}}, + ] + ) + + system_message = SystemMessage( + content="You are an expert in assessing the age-appropriateness of visual content. Your task is to analyze the provided image and provide a detailed assessment of its suitability for a PG-rated audience." + "Respond only in dictionary format. Examples:\n" + "If the frame contains elements unsuitable for a PG-rating: " + "{'AgeAppropriate': 'not-appropriate', 'response': 'Brief description of the scene (e.g., shows graphic violence, explicit nudity).', 'ConfidenceLevel': 0.95}\n" + "If the frame complies with PG-rating guidelines: " + "{'AgeAppropriate': 'appropriate', 'response': 'Brief description of the scene (e.g., depicts a serene landscape, no concerning elements).', 'ConfidenceLevel': 0.90}\n" + "Ensure your responses are concise and focused on the image's content. Avoid unnecessary details or conversations unrelated to the task." + ) + + # LLM call + ai_response = llm.invoke(input=[human_message, system_message]) + print(ai_response.content) + response_dict = ast.literal_eval(ai_response.content) + + # Parse and validate the response + violence_status = response_dict.get("AgeAppropriate") + detailed_response = response_dict.get("response") + confidence = float(response_dict.get("ConfidenceLevel")) + + # Display flagged frames + if violence_status == "not-appropriate" and confidence >= confidenceThreshold: + st.write(f"Frame Analysis: {detailed_response}") + st.write(f"Timecode: {timecode}") + st.image(frame_path, caption="Analyzing Frame", width=500) + violence_detected = True + + except Exception as e: + print(f"Error analyzing frame: {str(e)}") + + # Final result + if violence_detected: + st.warning("This movie is NOT PG Rated!") + else: + st.success("This movie is PG Rated!") diff --git a/ai/generative-ai-service/Video-Image-Analysis-using-Langchain/requirements.txt b/ai/generative-ai-service/Video-Image-Analysis-using-Langchain/requirements.txt new file mode 100644 index 000000000..e8bf0676b --- /dev/null +++ b/ai/generative-ai-service/Video-Image-Analysis-using-Langchain/requirements.txt @@ -0,0 +1,8 @@ +streamlit==1.32.2 +opencv-python==4.9.0.80 +langchain==0.1.13 +langchain-community==0.0.30 +langchain-core==0.1.32 +oracledb==1.4.0 +tiktoken==0.6.0 +pydantic==1.10.12