diff --git a/ai/ai-vision/README.md b/ai/ai-vision/README.md index 90ff0b0f0..ebc1222e1 100644 --- a/ai/ai-vision/README.md +++ b/ai/ai-vision/README.md @@ -28,6 +28,8 @@ Reviewed: 11.06.2025 ## GitHub +- [OCI object detection and image segmentation using SAM2](.ai-vision-and-sam/) + - [OCI image classification using data labeling and vision service](https://github.com/carlgira/oci-image-classification) - [OCI object detection using data labeling and vision service](https://github.com/carlgira/oci-object-detection) - [AI vision web client](https://github.com/oracle-devrel/oci-tf-vision-web-client) diff --git a/ai/ai-vision/ai-vision-and-sam/LICENSE b/ai/ai-vision/ai-vision-and-sam/LICENSE new file mode 100644 index 000000000..46c0c79d9 --- /dev/null +++ b/ai/ai-vision/ai-vision-and-sam/LICENSE @@ -0,0 +1,35 @@ +Copyright (c) 2025 Oracle and/or its affiliates. + +The Universal Permissive License (UPL), Version 1.0 + +Subject to the condition set forth below, permission is hereby granted to any +person obtaining a copy of this software, associated documentation and/or data +(collectively the "Software"), free of charge and under any and all copyright +rights in the Software, and any and all patent rights owned or freely +licensable by each licensor hereunder covering either (i) the unmodified +Software as contributed to or provided by such licensor, or (ii) the Larger +Works (as defined below), to deal in both + +(a) the Software, and +(b) any piece of software and/or hardware listed in the lrgrwrks.txt file if +one is included with the Software (each a "Larger Work" to which the Software +is contributed by such licensors), + +without restriction, including without limitation the rights to copy, create +derivative works of, display, perform, and distribute the Software and make, +use, sell, offer for sale, import, export, have made, and have sold the +Software and the Larger Work(s), and to sublicense the foregoing rights on +either these or other terms. + +This license is subject to the following condition: +The above copyright notice and either this complete permission notice or at +a minimum a reference to the UPL must be included in all copies or +substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/ai/ai-vision/ai-vision-and-sam/README.md b/ai/ai-vision/ai-vision-and-sam/README.md new file mode 100644 index 000000000..9d0c62f6b --- /dev/null +++ b/ai/ai-vision/ai-vision-and-sam/README.md @@ -0,0 +1,26 @@ +![Example of using this asset with a picture of a chair](files/images/example_use.jpg) + +# OCI Vision and SAM2 + +The following asset is a tool designed to showcase how OCI AI Vision can be connected with SAM2 from Meta for accurate segmentations without needing high amount of data for training. + +Reviewed: 22.09.2025 + +Authors: Matthias Wolf and Cristina Granés + +# When to use this asset? + +When you need to compute areas from images, detect and segment objects with AI. + +# How to use this asset? + +See the full setup and usage guide in [`files/README.md`](./files/README.md). + + +# License + +Copyright (c) 2025 Oracle and/or its affiliates. + +Licensed under the Universal Permissive License (UPL), Version 1.0. + +See [LICENSE](LICENSE) for more details. diff --git a/ai/ai-vision/ai-vision-and-sam/files/.config b/ai/ai-vision/ai-vision-and-sam/files/.config new file mode 100644 index 000000000..cd4ebdecc --- /dev/null +++ b/ai/ai-vision/ai-vision-and-sam/files/.config @@ -0,0 +1,5 @@ +CONFIG_FILE_PATH = "~/.oci/config" +# This needs to be a valid compartment ID within your tenancy +COMPARTMENT_ID = "ocid1.compartment.oc1..aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +# Change the endpoint to match your account's region +ENDPOINT = "https://inference.generativeai.eu-frankfurt-1.oci.oraclecloud.com" diff --git a/ai/ai-vision/ai-vision-and-sam/files/.streamlit/config.toml b/ai/ai-vision/ai-vision-and-sam/files/.streamlit/config.toml new file mode 100644 index 000000000..faca013cb --- /dev/null +++ b/ai/ai-vision/ai-vision-and-sam/files/.streamlit/config.toml @@ -0,0 +1,13 @@ +[theme] +primaryColor = "#e83610" # Red +backgroundColor = "#1E1E1E" # Dark gray background +secondaryBackgroundColor = "#2C2C2C" # Slightly lighter sidebar +textColor = "#FFFFFF" # Clean white text +font = "sans serif" + +[client] +showSidebarNavigation = false +toolbarMode = "minimal" + +[server] +headless = true \ No newline at end of file diff --git a/ai/ai-vision/ai-vision-and-sam/files/README.md b/ai/ai-vision/ai-vision-and-sam/files/README.md new file mode 100644 index 000000000..ad3448d58 --- /dev/null +++ b/ai/ai-vision/ai-vision-and-sam/files/README.md @@ -0,0 +1,102 @@ +![Example of using this asset with a picture of a chair](images/example_use.jpg) + +# Image segmentation with OCI AI Vision and SAM2 from Meta + +In this demo, you will see how you can segment objects in images by combining **OCI AI Vision** with **Meta’s SAM2 (Segment Anything Model 2)**. + +**Segmenting objects in images has never been this easy!** +You can combine both OCI AI Vision to handle image analysis and SAM2 to provide high-precision segmentation, you can quickly identify and isolate objects in any image using just a few clicks or simple prompts. This integration demonstrates how cloud-based vision services and cutting-edge AI models can work together to streamline complex computer vision tasks. + +For more accurate results, you can build a custom object detection model in OCI AI Vision (see [the documentation](https://docs.oracle.com/en-us/iaas/Content/vision/using/custom_image_analysis_models_using.htm) for more details). + +## Some possible business cases +- Compute areas of roofs to detect the area available for solar panels. +- Segment construction sites to estimate areas from aerial images. +- Environmental monitoring: segment different land types (forest, water, urban areas,...) from satellite imagery and estimate changes in those areas. +- Urban planning: segment and detect roads, bridges, and utilities for urban development projects. +- Crop and weed segmentation to optimize pesticide usage and crop management. + +## Requirements +You can install the following demo in a personal PC. +You would need some space and at least a CPU with 300MB of memory, for `sam2.1-hiera-small`. + +## Setup +1. Install Python (this project requires Python 3.13.5 or later). You can check your current Python version by running: +
+``` +python --version +``` +or +``` +python3 --version +``` +2. Install the requirements from `requirements.txt` file. +
+``` +pip install -r /path/to/requirements.txt +``` +3. Update the `.config` file with your own `CONFIG_FILE_PATH` and `COMPARTMENT_ID`: +``` +CONFIG_FILE_PATH = +COMPARTMENT_ID = +# Change the endpoint to match your account's region +ENDPOINT = "https://inference.generativeai.eu-frankfurt-1.oci.oraclecloud.com" +``` +4. Run the application using `streamlit run app.py`. + +## Technical Details +* The solution leverages Oracle Cloud Infrastructure (OCI) AI Vision Service, an AI service designed to simplify AI adoption. +* Specifically, this demo utilizes: + + OCI Vision object detection + + SAM2 segmentation + +### About SAM2 +SAM2 is an open-source model developed by Meta AI as the next-generation version of the original Segment Anything Model (SAM). It enables fast and flexible image segmentation, allowing users to easily extract precise object masks from images with minimal input. Built to support both interactive and automated segmentation tasks, SAM2 improves on efficiency, accuracy, and generalization across diverse image types and domains. + +Key features include: +- State-of-the-art segmentation performance +- Support for promptable segmentation (points, boxes, masks) +- Open-source and ready for integration into custom workflows + +You can find more information [here](https://docs.ultralytics.com/models/sam-2/). +And you can find information about the License [here](https://github.com/facebookresearch/sam2/blob/main/LICENSE). + +## Project Structure +The repository is organized as follows: + +```plaintext +│ .config # File to be added as explained in `Setup`, with your own OCI variables +│ app.py # Main Streamlit application entry point +│ config.py # Variables for the Streamlit application +│ navigation.py # Configuration for the sidebar in the Streamlit application +│ README.md # Project documentation +│ requirements.txt # Python dependencies +│ +├───utils +│ │ ai_tools.py # Wrappers for inference on the AI models +│ │ image_utils.py # Wrappers for image functionalities +│ +├───app_images +│ │ oracle_logo.png # Logo of Oracle for Streamlit application +│ +├───.streamlit # Parameters for UI appearance of the Streamlit application +│ +└───uploaded_images # Folder to be used by the Streamlit application +``` + +## Output +The demo will display an interactive dashboard to upload an image and process it, displaying the resulting detections from OCI Vision and segmentations of those detections. + +## Authors +- Matthias Wolf +- Cristina Granés + +## Contributing +We welcome contributions to improve and expand the capabilities of this demo. Please fork the repository and submit a pull request with your changes. + +## License +Copyright (c) 2025 Oracle and/or its affiliates. + +Licensed under the Universal Permissive License (UPL), Version 1.0. + +See [LICENSE](../LICENSE) for more details. diff --git a/ai/ai-vision/ai-vision-and-sam/files/app.py b/ai/ai-vision/ai-vision-and-sam/files/app.py new file mode 100644 index 000000000..38bf045fb --- /dev/null +++ b/ai/ai-vision/ai-vision-and-sam/files/app.py @@ -0,0 +1,81 @@ +import streamlit as st +import config +from PIL import Image + +import torch +from sam2.sam2_image_predictor import SAM2ImagePredictor + +from utils.image_utils import load_image, save_uploaded_image, draw_detections, draw_masks +from utils.ai_tools import InferencePipeline +from navigation import make_sidebar + +@st.cache_resource +def load_sam(): + # This will require internet access when deploying the model + # Alternative, larger model: "facebook/sam2.1-hiera-large" + predictor = SAM2ImagePredictor.from_pretrained("facebook/sam2.1-hiera-small", device=torch.device("cpu")) + return predictor + + +def main(): + + ## Page config + icon = Image.open(config.ORACLE_LOGO) + + st.set_page_config( + page_title="Detect and segment", + page_icon=icon, + layout="wide", + ) + + ## Header ## + st.markdown("

Segment Images with OCI AI Vision + SAM2 from Meta

", unsafe_allow_html=True) + st.divider() + + + ### Init application ### + + sam_model = load_sam() + ai_pipeline = InferencePipeline(config=config, sam_model=sam_model) + + # Sidebar for upload and run + uploaded_file, run_button = make_sidebar() + + + ######### finish init + + ## Upload image & process ## + coli1, coli2 = st.columns([0.5, 0.5]) + if uploaded_file: + image = load_image(uploaded_file) + with coli1: st.image(image, caption="Original Image") + + if run_button: + with st.spinner("Processing...", show_time=True): + uploaded_file.seek(0) + # Save image: + image_path = save_uploaded_image(uploaded_file, save_dir=config.UPLOAD_PATH) + + #detections = ai_pipeline.get_detection(uploaded_file) + detections, masks = ai_pipeline.get_detections_and_masks(image_path) + + image_with_boxes = draw_detections(image, detections) + with coli2: st.image(image_with_boxes, caption="OCI Vision Detections") + + st.header("Final segmentations") + + ## Plot masks: + mask_names = [msk["class"] for msk in masks] + tabs_list = st.tabs(mask_names) + for id_msk, msk in enumerate(masks): + ctab = tabs_list[id_msk] + det = detections[id_msk] + image_with_mask = draw_masks(image, msk["mask"]) + with ctab: + st.image(image_with_mask, + width=700, + caption=f"{msk["class"]} - Detection confidence score: {det["confidence_score"]:.2f}") + + +if __name__ == '__main__': + main() diff --git a/ai/ai-vision/ai-vision-and-sam/files/app_images/oracle_logo.png b/ai/ai-vision/ai-vision-and-sam/files/app_images/oracle_logo.png new file mode 100644 index 000000000..5f5f63d6c Binary files /dev/null and b/ai/ai-vision/ai-vision-and-sam/files/app_images/oracle_logo.png differ diff --git a/ai/ai-vision/ai-vision-and-sam/files/config.py b/ai/ai-vision/ai-vision-and-sam/files/config.py new file mode 100644 index 000000000..ac924af28 --- /dev/null +++ b/ai/ai-vision/ai-vision-and-sam/files/config.py @@ -0,0 +1,12 @@ +import os +from dotenv import load_dotenv +load_dotenv(dotenv_path=".config") + +# config, compartments and endpoints +CONFIG_FILE_PATH = os.getenv("CONFIG_FILE_PATH") +COMPARTMENT_ID = os.getenv("COMPARTMENT_ID") +ENDPOINT= os.getenv("ENDPOINT") + +## Other config params +ORACLE_LOGO = "app_images/oracle_logo.png" +UPLOAD_PATH = "uploaded_images" \ No newline at end of file diff --git a/ai/ai-vision/ai-vision-and-sam/files/images/example_use.jpg b/ai/ai-vision/ai-vision-and-sam/files/images/example_use.jpg new file mode 100644 index 000000000..1d31d4184 Binary files /dev/null and b/ai/ai-vision/ai-vision-and-sam/files/images/example_use.jpg differ diff --git a/ai/ai-vision/ai-vision-and-sam/files/navigation.py b/ai/ai-vision/ai-vision-and-sam/files/navigation.py new file mode 100644 index 000000000..273521d7c --- /dev/null +++ b/ai/ai-vision/ai-vision-and-sam/files/navigation.py @@ -0,0 +1,42 @@ +import base64 +import streamlit as st +import config + +def make_sidebar(): + with open(config.ORACLE_LOGO, "rb") as f: + icon_base64 = base64.b64encode(f.read()).decode() + with st.sidebar: + + st.markdown( + f""" +
+ + Upload & Run +
+ """, + unsafe_allow_html=True + ) + st.markdown(""" + + """, unsafe_allow_html=True) + + st.write("") + st.write("") + + uploaded_file = st.sidebar.file_uploader("Upload an Image", type=['png', 'jpg']) + run_button = st.sidebar.button("Run") + + st.write("") + st.write("") + + return uploaded_file, run_button + diff --git a/ai/ai-vision/ai-vision-and-sam/files/requirements.txt b/ai/ai-vision/ai-vision-and-sam/files/requirements.txt new file mode 100644 index 000000000..54d067ed9 --- /dev/null +++ b/ai/ai-vision/ai-vision-and-sam/files/requirements.txt @@ -0,0 +1,11 @@ +huggingface-hub==0.35.0 +numpy==2.2.6 +oci==2.160.1 +opencv-python==4.12.0.88 +pandas==2.3.2 +pillow==11.3.0 +python-dotenv==1.1.1 +sam2==1.1.0 +streamlit==1.49.1 +torch==2.8.0 +torchvision==0.23.0 \ No newline at end of file diff --git a/ai/ai-vision/ai-vision-and-sam/files/utils/ai_tools.py b/ai/ai-vision/ai-vision-and-sam/files/utils/ai_tools.py new file mode 100644 index 000000000..2680a1188 --- /dev/null +++ b/ai/ai-vision/ai-vision-and-sam/files/utils/ai_tools.py @@ -0,0 +1,95 @@ +import oci +import base64 +import torch +import numpy as np +import cv2 + +class InferencePipeline: + """ + A class used to translate text using a OCI AI Vision Service. + """ + def __init__(self, config, sam_model=None): + self.sam_model = sam_model + oci_config = oci.config.from_file(config.CONFIG_FILE_PATH) + self.client = oci.ai_vision.AIServiceVisionClient(oci_config) + self.config = config + + def _encode_image(self, image_path): + with open(image_path, "rb") as image_file: + encoded_image = base64.b64encode(image_file.read()) + #encoded_image = base64.b64encode(image_path.read()) + return encoded_image + + def _format_output(self, output): + objects_list = [] + for obj in output.data.image_objects: + vertices = obj.bounding_polygon.normalized_vertices + x_values, y_values = [v.x for v in vertices], [v.y for v in vertices] + min_x, max_x = min(x_values), max(x_values) + min_y, max_y = min(y_values), max(y_values) + objects_list.append({ + "label": obj.name, + "confidence_score": obj.confidence, + "bbox": [min_x, min_y, max_x, max_y] + }) + return objects_list + + def _inference_ai_vision(self, image_path): + encoded_image = self._encode_image(image_path) + + oci_vision_detection = self.client.analyze_image( + analyze_image_details=oci.ai_vision.models.AnalyzeImageDetails( + features=[ + oci.ai_vision.models.ImageObjectDetectionFeature( + feature_type="OBJECT_DETECTION", + max_results=1000000 + ), + ], + image=oci.ai_vision.models.InlineImageDetails( + source="INLINE", + data=encoded_image.decode("utf-8")), + compartment_id=self.config.COMPARTMENT_ID), + ) + + return oci_vision_detection + + def get_detection(self, image_path): + oci_vision_detection = self._inference_ai_vision(image_path) + detections = self._format_output(oci_vision_detection) + return detections + + def get_detections_and_masks(self, image_path): + img = cv2.imread(image_path) + detections = self.get_detection(image_path) + segmentations = [] + + if self.sam_model is None: + raise Exception("Unable to prepare segmentations, SAM model is None") + + segmentations = [] + with torch.inference_mode(), torch.autocast("cpu"): + self.sam_model.set_image(img) + for detected_object in detections: + bounding_box = detected_object["bbox"] + name = detected_object["label"] + score = detected_object["confidence_score"] + + bbox = [ + int(bounding_box[0] * img.shape[1]), + int(bounding_box[1] * img.shape[0]), + int(bounding_box[2] * img.shape[1]), + int(bounding_box[3] * img.shape[0]), + ] + masks, scores, _ = self.sam_model.predict(box=bbox) + best_mask, best_score = sorted(zip(masks, scores), key=lambda t: t[1])[-1] + indices = np.nonzero(best_mask) + + segmentations.append({ + "class": name, + "score": score, + "bounding_box": bbox, + "mask": best_mask, + "mask_indices": [[int(y), int(x)] for y, x in zip(*indices)], + "shape": best_mask.shape, + }) + return detections, segmentations diff --git a/ai/ai-vision/ai-vision-and-sam/files/utils/image_utils.py b/ai/ai-vision/ai-vision-and-sam/files/utils/image_utils.py new file mode 100644 index 000000000..eb61947b3 --- /dev/null +++ b/ai/ai-vision/ai-vision-and-sam/files/utils/image_utils.py @@ -0,0 +1,65 @@ +import os +from PIL import Image +import numpy as np +import cv2 + +def load_image(uploaded_file): + image = Image.open(uploaded_file).convert("RGB") + return image + +def save_uploaded_image(uploaded_file, save_dir): + os.makedirs(save_dir, exist_ok=True) + save_path = os.path.join(save_dir, uploaded_file.name) + + with open(save_path, "wb") as f: + f.write(uploaded_file.read()) + return save_path + +def draw_detections(image, detections, box_color=(0, 255, 0), text_color=(0, 0, 0), thickness=2, + font_scale = 0.8, font_thickness = 2): + """ + Draws bounding boxes and labels on the image. + """ + image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) + height, width = image.shape[:2] + + for det in detections: + x1 = int(det['bbox'][0] * width) + y1 = int(det['bbox'][1] * height) + x2 = int(det['bbox'][2] * width) + y2 = int(det['bbox'][3] * height) + label = det['label'] + score = det['confidence_score'] + + cv2.rectangle(image, (x1, y1), (x2, y2), box_color, thickness) + + label_text = f"{label}" + if score is not None: + label_text += f" {score:.2f}" + + # Add label text background + (text_width, text_height), _ = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, font_scale, font_thickness) + cv2.rectangle(image, (x1, y1 - text_height - 4), (x1 + text_width + 2, y1), box_color, -1) + cv2.putText(image, label_text, (x1 + 1, y1 - 2), cv2.FONT_HERSHEY_SIMPLEX, font_scale, text_color, font_thickness) + + image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + pil_image = Image.fromarray(image_rgb) + return pil_image + +def draw_masks(image, mask, alpha=0.3, color=(0, 0, 255)): + """ + Overlays binary mask onto the image. + """ + image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) + + if mask.dtype != np.uint8: + mask = (mask > 0).astype(np.uint8) + + colored_mask = np.zeros_like(image, dtype=np.uint8) + colored_mask[mask == 1] = color + + image = cv2.addWeighted(colored_mask, alpha, image, 1 - alpha, 0) + + image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + pil_image = Image.fromarray(image_rgb) + return pil_image