diff --git a/Dockerfile.ubuntu b/Dockerfile.ubuntu index 601cf5dc4b..2d227ee332 100644 --- a/Dockerfile.ubuntu +++ b/Dockerfile.ubuntu @@ -426,4 +426,4 @@ ENV PATH="$PATH:/ovms/bin" RUN echo "The source code of added GPL components is stored in https://storage.openvinotoolkit.org/repositories/openvino/ci_dependencies/container_gpl_sources/" > /ovms/thirdparty-licenses/GPL.txt USER ovms -ENTRYPOINT ["/ovms/bin/ovms"] +ENTRYPOINT ["/ovms/bin/ovms"] \ No newline at end of file diff --git a/demos/python_demos/image_embeddings/Readme.md b/demos/python_demos/image_embeddings/Readme.md new file mode 100644 index 0000000000..a49df26eaa --- /dev/null +++ b/demos/python_demos/image_embeddings/Readme.md @@ -0,0 +1,219 @@ +# Image Embeddings with OpenVINO Model Server {ovms_image_embeddings} + +Image-to-image search system using vision models (CLIP, LAION, DINO) for generating semantic embeddings with OpenVINO Model Server. The client uploads query images and receives similar images from a pre-indexed dataset based on visual content similarity. This enables applications to find visually and semantically related images without requiring text descriptions or manual tagging. The system uses Python code for preprocessing and postprocessing and MediaPipe graphs for optimized inference execution. + +## Build image + +```bash +git clone https://github.com/openvinotoolkit/model_server.git +cd model_server +``` + +If you want the docker image that supports CPU, run: + +```bash +make release_image +``` + +Else if you want the docker image that supports iGPU's, run: +```bash +make release_image GPU=1 +``` + +# Project Architecture + +1. **Model Conversion (`model_conversion/`)** + - Convert supported multimodal models (e.g., CLIP, Laion, DINO) into **OpenVINO IR format**. + - Ensures models are optimized for inference on Intel hardware. + +2. **Servable Pipeline (`servable/`)** + - **Preprocessing**: Handles image resizing, normalization(if required). + - **Postprocessing**: Handles embedding extraction, vector normalization, and formatting. + - **Config File**: `config_model.json` defines model parameters and pipeline configurations. + - **MediaPipe Graphs**: Graph definitions for processing inputs/outputs across the 3 models. + +3. **gRPC CLI (`grpc_cli.py`)** + - Iterates over a folder of images. + - Extracts embeddings using the OpenVINO-served models. + - Stores embeddings in a **Vector Database** (Qdrant) + +4. **Search API (`search_images.py`)** + - Accepts an input query image. + - Generates its embedding and queries the Vector DB. + - Returns the most similar images based on cosine similarity or other distance metrics. + +5. **Search App (`streamlit_app.py`)** + - Provided a frontend for the users to interact with the project and test it + - Allows users to upload images and perform semantic search + + +## Installation and setup + +```bash +cd demos/python_demos/image_embeddings +python3 -m venv venv +pip install -r requirements.txt +``` + + +## Model conversion +```bash +python3 -m venv venv +cd demos/python_demos/image_embeddings/model_conversion +``` + +For clip +```bash +python clip_conversion.py +``` + +For laion +```bash +python laion_conversion.py +``` + +For Dino +```bash +python dino_conversion.py +``` + +## Deploying OpenVINO Model Server +Prerequisites: +- image of OVMS with Python support and Optimum installed +Mount the `./servable` which contains: +- `post.py` and `pre.py` - python scripts which are required for execution. +- `config_model.json` - which defines which servables should be loaded. +- `graph_clip.pbtxt`, `graph_laion.pbtxt`, `graph_dino.pbtxt` - which defines MediaPipe graph containing python nodes. + + +```bash +cd demos/python_demos/image_embeddings +``` + + +To use CPU +```bash +docker run -it --rm \ +-p 9000:9000 -p 8000:8000 \ +-v ${PWD}/servable:/workspace \ +-v ${PWD}/model_conversion/saved_mod/siglip:/saved_mod/dino \ +-v ${PWD}/model_conversion/saved_mod/clip:/saved_mod/clip \ +-v ${PWD}/model_conversion/saved_mod/laion:/saved_mod/laion \ +openvino/model_server:py \ +--config_path /workspace/config_model.json \ +--port 9000 --rest_port 8000 + +``` + +To use GPU +```bash +docker run -it --rm \ + --device=/dev/dxg \ + --volume /usr/lib/wsl:/usr/lib/wsl \ + -p 9000:9000 -p 8000:8000 \ + -v ${PWD}/servable:/workspace \ + -v ${PWD}/model_conversion/saved_mod/dino:/saved_mod/dino \ + -v ${PWD}/model_conversion/saved_mod/clip:/saved_mod/clip \ + -v ${PWD}/model_conversion/saved_mod/laion:/saved_mod/laion \ + ovms-gpu-custom \ + --config_path /workspace/config_model.json \ + --port 9000 \ + --rest_port 8000 +``` + +## Deploying the Vector Database + +The next step is to start the vector database. In this project, we are using **Qdrant**, an open-source vector database optimized for efficient semantic search. + +Run the following command to start Qdrant with Docker: + +```bash +cd demos/python_demos/image_embeddings +docker run -p 6333:6333 qdrant/qdrant +``` + +## Running the Demo + +Once the vector database is running and the OpenVINO Model Server is deployed, open another terminal and run: + +```bash +source venv/bin/activate +cd demos/python_demos/image_embeddings +python grpc_cli.py +or +python grpc_cli.py --model "your selected model name" +``` +Once you run this you should see something like this +```bash +Building Image Database +================================================== +Waiting for server to be ready... +Server Ready Check: 0%| | 0/15 [00:00 0 else 0 + + print(f"\nDatabase Build Complete!") + print(f"Statistics:") + print(f" • Images processed: {len(points)}") + print(f" • Average inference time: {avg_time:.2f} ms") + print(f" • Total processing time: {total_time:.2f} s") + print(f" • Throughput: {throughput:.2f} images/sec") + print(f" • Collection: '{QDRANT_COLLECTION}'") + + return len(points) + + +def main(): + print("Building Image Database") + print("=" * 50) + + parser = argparse.ArgumentParser(description="Build image embedding database with Triton + Qdrant") + parser.add_argument("--model", type=str, help="Model to use (clip_graph, dino_graph, laion_graph)") + args = parser.parse_args() + + try: + # Setup + client = setup_grpc_client() + selected_model = select_model(args.model) + + # Build database + count = build_database(client, selected_model) + + print(f"\nDatabase built successfully!") + print(f"Total images processed: {count}") + print(f"Model saved for future searches: {selected_model}") + + except KeyboardInterrupt: + print(f"\nProcess interrupted by user") + except Exception as e: + print(f"Error: {str(e)}") + import traceback + traceback.print_exc() + + +if __name__ == "__main__": + main() diff --git a/demos/python_demos/image_embeddings/model_conversion/clip_conversion.py b/demos/python_demos/image_embeddings/model_conversion/clip_conversion.py new file mode 100644 index 0000000000..4307ee2d7c --- /dev/null +++ b/demos/python_demos/image_embeddings/model_conversion/clip_conversion.py @@ -0,0 +1,25 @@ +from transformers import CLIPProcessor, CLIPModel +from PIL import Image +import openvino as ov +import os + +model_id = "openai/clip-vit-base-patch32" +print(f"Downloading pretrained model {model_id}...") + +full_model = CLIPModel.from_pretrained(model_id) +model = full_model.vision_model +processor = CLIPProcessor.from_pretrained(model_id) + +image = Image.new("RGB", (224, 224)) +inputs = processor(images=image, return_tensors="pt")["pixel_values"] + +print("Converting model...") +ov_model = ov.convert_model(model, example_input=inputs) +ov.save_model(ov_model, "clip_image_encoder.xml") +print("Model saved!") + +mod_path = "saved_mod/clip/1" +os.makedirs(mod_path, exist_ok=True) +os.replace("clip_image_encoder.xml", f"{mod_path}/clip_image_encoder.xml") +os.replace("clip_image_encoder.bin", f"{mod_path}/clip_image_encoder.bin") +print("Model ready for OVMS") \ No newline at end of file diff --git a/demos/python_demos/image_embeddings/model_conversion/dino_conversion.py b/demos/python_demos/image_embeddings/model_conversion/dino_conversion.py new file mode 100644 index 0000000000..bc4a44a438 --- /dev/null +++ b/demos/python_demos/image_embeddings/model_conversion/dino_conversion.py @@ -0,0 +1,27 @@ +from transformers import AutoImageProcessor, AutoModel +from PIL import Image +import openvino as ov +import os + +model_id = "facebook/dinov2-base" +print(f"Downloading pretrained model {model_id}...") + +model = AutoModel.from_pretrained(model_id) +processor = AutoImageProcessor.from_pretrained(model_id) + +image = Image.new("RGB", (224, 224)) +inputs = processor(images=image, return_tensors="pt")["pixel_values"] + +print("Converting models...") +ov_model = ov.convert_model(model, example_input=inputs) +ov.save_model(ov_model, "dino_image_encoder.xml") +print("Model saved!") + +mod_path = "saved_mod/dino/1" +os.makedirs(mod_path, exist_ok=True) +os.replace("dino_image_encoder.xml", f"{mod_path}/dino_image_encoder.xml") +os.replace("dino_image_encoder.bin", f"{mod_path}/dino_image_encoder.bin") +print("Model ready for OVMS") + + + diff --git a/demos/python_demos/image_embeddings/model_conversion/laion_conversion.py b/demos/python_demos/image_embeddings/model_conversion/laion_conversion.py new file mode 100644 index 0000000000..254a54ca7f --- /dev/null +++ b/demos/python_demos/image_embeddings/model_conversion/laion_conversion.py @@ -0,0 +1,32 @@ +from transformers import CLIPProcessor, CLIPModel +from PIL import Image +import openvino as ov +import torch +import os + +# Replace this with your LAION model +model_id = "laion/CLIP-ViT-B-32-laion2B-s34B-b79K" +print(f"Downloading pretrained model: {model_id}") + +# Load processor and model +processor = CLIPProcessor.from_pretrained(model_id) +full_model = CLIPModel.from_pretrained(model_id) +image_encoder = full_model.vision_model +image_encoder.eval() + +# Dummy image input for tracing +image = Image.new("RGB", (224, 224)) +inputs = processor(images=image, return_tensors="pt")["pixel_values"] + +# Convert to OpenVINO IR +print("Converting image encoder to OpenVINO IR...") +ov_model = ov.convert_model(image_encoder, example_input=inputs) +ov.save_model(ov_model, "clip_image_encoder.xml") +print("Model saved!") + +# Move to proper OVMS path +mod_path = "saved_mod/laion/1" +os.makedirs(mod_path, exist_ok=True) +os.replace("clip_image_encoder.xml", f"{mod_path}/clip_image_encoder.xml") +os.replace("clip_image_encoder.bin", f"{mod_path}/clip_image_encoder.bin") +print(f"Model ready at {mod_path} for OpenVINO Model Server") diff --git a/demos/python_demos/image_embeddings/requirements.txt b/demos/python_demos/image_embeddings/requirements.txt new file mode 100644 index 0000000000..88150d18b8 --- /dev/null +++ b/demos/python_demos/image_embeddings/requirements.txt @@ -0,0 +1,10 @@ +tritonclient[all]==2.51.0 +numpy<2.0.0 +--extra-index-url "https://download.pytorch.org/whl/cpu" +--extra-index-url "https://storage.openvinotoolkit.org/simple/wheels/nightly" +--pre +openvino==2025.2.* +numpy<2.0 +transformers<4.52 +pillow==10.3.0 +torch==2.7.0+cpu diff --git a/demos/python_demos/image_embeddings/search_images.py b/demos/python_demos/image_embeddings/search_images.py new file mode 100644 index 0000000000..9d022b0fd5 --- /dev/null +++ b/demos/python_demos/image_embeddings/search_images.py @@ -0,0 +1,136 @@ +import sys +sys.path.append("../../common/python") +import tritonclient.grpc as grpcclient + +import numpy as np +from pathlib import Path +import os +import shutil +import json +import time +from qdrant_client import QdrantClient + +# ------------------------ Settings ------------------------ +IMAGE_FOLDER = "./demo_images" +QDRANT_COLLECTION = "image_embeddings" +OUTPUT_DIR = "./similar_images" +MODEL_CONFIG_FILE = "./selected_model.json" + +def load_selected_model(): + """Load the previously selected model""" + if os.path.exists(MODEL_CONFIG_FILE): + with open(MODEL_CONFIG_FILE, 'r') as f: + config = json.load(f) + return config.get("selected_model") + return None + +def get_query_embedding(query_image_path, model_name): + """Get embedding for query image via gRPC""" + client = grpcclient.InferenceServerClient("localhost:9000") + + with open(query_image_path, "rb") as f: + image_data = f.read() + + image_np = np.array([image_data], dtype=np.object_) + image_input = grpcclient.InferInput("image", [1], "BYTES") + image_input.set_data_from_numpy(image_np) + + start = time.perf_counter() + results = client.infer(model_name, [image_input]) + end = time.perf_counter() + + embedding = results.as_numpy('embedding')[0] + latency_ms = (end - start) * 1000 # milliseconds + + return embedding, latency_ms + +def search_similar_images(query_embedding, top_k=5): + """Search for similar images in Qdrant""" + qdrant = QdrantClient("localhost", port=6333) + + start = time.perf_counter() + search_result = qdrant.query_points( + collection_name=QDRANT_COLLECTION, + query=query_embedding.tolist(), + limit=top_k + ) + end = time.perf_counter() + + search_latency_ms = (end - start) * 1000 + return search_result.points, search_latency_ms + +def save_results(search_results, output_dir): + """Save similar images to output directory""" + Path(output_dir).mkdir(exist_ok=True, parents=True) + + print(f"\nTop {len(search_results)} similar images:") + print("-" * 50) + + for i, result in enumerate(search_results): + filename = result.payload["filename"] + similarity = result.score + + src_path = os.path.join(IMAGE_FOLDER, filename) + dst_path = os.path.join(output_dir, f"rank_{i+1}_score_{similarity:.3f}_{filename}") + + if os.path.exists(src_path): + shutil.copy2(src_path, dst_path) + print(f"{i+1}. {filename} (similarity: {similarity:.3f})") + else: + print(f"{i+1}. {filename} (similarity: {similarity:.3f}) File not found") + + print(f"\nResults saved to: {output_dir}") + +def main(): + if len(sys.argv) < 2: + print("Usage: python search_images.py [top_k]") + print("Example: python search_images.py ./mt.jpg 5") + return + + query_image_path = sys.argv[1] + top_k = int(sys.argv[2]) if len(sys.argv) > 2 else 5 + + if not os.path.exists(query_image_path): + print(f"Query image not found: {query_image_path}") + return + + # Auto-load model + model_name = load_selected_model() + if not model_name: + print("No model selection found. Please run grpc_cli.py first!") + return + + try: + print("Searching for Similar Images") + print("="*50) + print(f"Query: {query_image_path}") + print(f"Auto-loaded Model: {model_name}") + print(f"Top K: {top_k}") + + total_start = time.perf_counter() + + # Get query embedding + latency + query_embedding, infer_latency = get_query_embedding(query_image_path, model_name) + + # Search similar images + latency + results, search_latency = search_similar_images(query_embedding, top_k) + + total_end = time.perf_counter() + total_latency = (total_end - total_start) * 1000 + + # Save results + save_results(results, OUTPUT_DIR) + + print("\nPerformance Summary") + print("-" * 50) + print(f"Embedding Inference Latency : {infer_latency:.2f} ms") + print(f"Qdrant Search Latency : {search_latency:.2f} ms") + print(f"End-to-End Latency : {total_latency:.2f} ms") + + except Exception as e: + print(f"Error: {str(e)}") + import traceback + traceback.print_exc() + +if __name__ == "__main__": + main() diff --git a/demos/python_demos/image_embeddings/servable/config_model.json b/demos/python_demos/image_embeddings/servable/config_model.json new file mode 100644 index 0000000000..96bbd2e514 --- /dev/null +++ b/demos/python_demos/image_embeddings/servable/config_model.json @@ -0,0 +1,39 @@ +{ + "model_config_list": [ + { + "config": { + "name": "clip", + "base_path": "/saved_mod/clip", + "target_device": "GPU" + } + }, + { + "config": { + "name": "dino", + "base_path": "/saved_mod/dino", + "target_device": "GPU" + } + }, + { + "config": { + "name": "laion", + "base_path": "/saved_mod/laion", + "target_device": "GPU" + } + } + ], + "mediapipe_config_list": [ + { + "name": "clip_graph", + "graph_path": "/workspace/graph_clip.pbtxt" + }, + { + "name": "dino_graph", + "graph_path": "/workspace/graph_dino.pbtxt" + }, + { + "name": "laion_graph", + "graph_path": "/workspace/graph_laion.pbtxt" + } + ] +} \ No newline at end of file diff --git a/demos/python_demos/image_embeddings/servable/graph_clip.pbtxt b/demos/python_demos/image_embeddings/servable/graph_clip.pbtxt new file mode 100644 index 0000000000..85faef6b7b --- /dev/null +++ b/demos/python_demos/image_embeddings/servable/graph_clip.pbtxt @@ -0,0 +1,81 @@ +input_stream: "OVMS_PY_TENSOR:image" +output_stream: "OVMS_PY_TENSOR:embedding" + +node{ + name: "CLIPPreprocessor" + calculator: "PythonExecutorCalculator" + input_side_packet: "PYTHON_NODE_RESOURCES:py" + input_stream: "OVMS_PY_TENSOR:image" + output_stream: "OVMS_PY_TENSOR:pixel_values" + node_options:{ + [type.googleapis.com/mediapipe.PythonExecutorCalculatorOptions]:{ + handler_path: "/workspace/pre.py" + } + } +} + +node{ + name: "PixelValuesConverter" + calculator: "PyTensorOvTensorConverterCalculator" + input_stream: "OVMS_PY_TENSOR:pixel_values" + output_stream: "OVTENSOR:image_em" +} + +node { + calculator: "OpenVINOModelServerSessionCalculator" + output_side_packet: "SESSION:session" + node_options: { + [type.googleapis.com/mediapipe.OpenVINOModelServerSessionCalculatorOptions]: { + servable_name: "clip" + servable_version: "1" + } + } +} + +node { + calculator: "OpenVINOInferenceCalculator" + input_side_packet: "SESSION:session" + input_stream: "OVTENSOR:image_em" + output_stream: "OVTENSOR:pooler_output" + node_options: { + [type.googleapis.com/mediapipe.OpenVINOInferenceCalculatorOptions]: { + tag_to_input_tensor_names { + key: "OVTENSOR" + value: "40" + } + tag_to_output_tensor_names { + key: "OVTENSOR" + value: "pooler_output" + } + } + } +} + + +node { + name: "EmbeddingConverter" + calculator: "PyTensorOvTensorConverterCalculator" + input_stream: "OVTENSOR:pooler_output" + output_stream: "OVMS_PY_TENSOR:pooler_output_em" + node_options: { + [type.googleapis.com/mediapipe.PyTensorOvTensorConverterCalculatorOptions]: { + tag_to_output_tensor_names{ + key: "OVMS_PY_TENSOR" + value: "pooler_output_em" + } + } +} +} + +node { + name: "PostprocessorCLIP" + calculator: "PythonExecutorCalculator" + input_side_packet: "PYTHON_NODE_RESOURCES:py" + input_stream: "OVMS_PY_TENSOR:pooler_output_em" + output_stream: "OVMS_PY_TENSOR:embedding" + node_options: { + [type.googleapis.com/mediapipe.PythonExecutorCalculatorOptions]: { + handler_path: "/workspace/post.py" + } + } +} \ No newline at end of file diff --git a/demos/python_demos/image_embeddings/servable/graph_dino.pbtxt b/demos/python_demos/image_embeddings/servable/graph_dino.pbtxt new file mode 100644 index 0000000000..b4f3361fc6 --- /dev/null +++ b/demos/python_demos/image_embeddings/servable/graph_dino.pbtxt @@ -0,0 +1,80 @@ +input_stream: "OVMS_PY_TENSOR:image" +output_stream: "OVMS_PY_TENSOR:embedding" + +node { + name: "DINOPreprocessor" + calculator: "PythonExecutorCalculator" + input_side_packet: "PYTHON_NODE_RESOURCES:py" + input_stream: "OVMS_PY_TENSOR:image" + output_stream: "OVMS_PY_TENSOR:pixel_values" + node_options: { + [type.googleapis.com/mediapipe.PythonExecutorCalculatorOptions]: { + handler_path: "/workspace/pre.py" + } + } +} + +node { + name: "PixelValuesConverter" + calculator: "PyTensorOvTensorConverterCalculator" + input_stream: "OVMS_PY_TENSOR:pixel_values" + output_stream: "OVTENSOR:dino_img" +} + +node { + calculator: "OpenVINOModelServerSessionCalculator" + output_side_packet: "SESSION:session_dino" + node_options: { + [type.googleapis.com/mediapipe.OpenVINOModelServerSessionCalculatorOptions]: { + servable_name: "dino" + servable_version: "1" + } + } +} + +node { + calculator: "OpenVINOInferenceCalculator" + input_side_packet: "SESSION:session_dino" + input_stream: "OVTENSOR:dino_img" + output_stream: "OVTENSOR:dino_output_15" + node_options: { + [type.googleapis.com/mediapipe.OpenVINOInferenceCalculatorOptions]: { + tag_to_input_tensor_names { + key: "OVTENSOR" + value: "48" + } + tag_to_output_tensor_names { + key: "OVTENSOR" + value: "15" + } + } + } +} + +node { + name: "EmbeddingConverter" + calculator: "PyTensorOvTensorConverterCalculator" + input_stream: "OVTENSOR:dino_output_15" + output_stream: "OVMS_PY_TENSOR:normalized_dino_tensor" + node_options: { + [type.googleapis.com/mediapipe.PyTensorOvTensorConverterCalculatorOptions]: { + tag_to_output_tensor_names { + key: "OVMS_PY_TENSOR" + value: "normalized_dino_tensor" + } + } + } +} + +node { + name: "PostprocessorDINO" + calculator: "PythonExecutorCalculator" + input_side_packet: "PYTHON_NODE_RESOURCES:py" + input_stream: "OVMS_PY_TENSOR:normalized_dino_tensor" + output_stream: "OVMS_PY_TENSOR:embedding" + node_options: { + [type.googleapis.com/mediapipe.PythonExecutorCalculatorOptions]: { + handler_path: "/workspace/post.py" + } + } +} diff --git a/demos/python_demos/image_embeddings/servable/graph_laion.pbtxt b/demos/python_demos/image_embeddings/servable/graph_laion.pbtxt new file mode 100644 index 0000000000..9115ecab8a --- /dev/null +++ b/demos/python_demos/image_embeddings/servable/graph_laion.pbtxt @@ -0,0 +1,81 @@ +input_stream: "OVMS_PY_TENSOR:image" +output_stream: "OVMS_PY_TENSOR:embedding" + +node{ + name: "LAIONPreprocessor" + calculator: "PythonExecutorCalculator" + input_side_packet: "PYTHON_NODE_RESOURCES:py" + input_stream: "OVMS_PY_TENSOR:image" + output_stream: "OVMS_PY_TENSOR:pixel_values" + node_options:{ + [type.googleapis.com/mediapipe.PythonExecutorCalculatorOptions]:{ + handler_path: "/workspace/pre.py" + } + } +} + +node{ + name: "PixelValuesConverter" + calculator: "PyTensorOvTensorConverterCalculator" + input_stream: "OVMS_PY_TENSOR:pixel_values" + output_stream: "OVTENSOR:image_em" +} + +node { + calculator: "OpenVINOModelServerSessionCalculator" + output_side_packet: "SESSION:session" + node_options: { + [type.googleapis.com/mediapipe.OpenVINOModelServerSessionCalculatorOptions]: { + servable_name: "laion" + servable_version: "1" + } + } +} + +node { + calculator: "OpenVINOInferenceCalculator" + input_side_packet: "SESSION:session" + input_stream: "OVTENSOR:image_em" + output_stream: "OVTENSOR:pooler_output" + node_options: { + [type.googleapis.com/mediapipe.OpenVINOInferenceCalculatorOptions]: { + tag_to_input_tensor_names { + key: "OVTENSOR" + value: "40" + } + tag_to_output_tensor_names { + key: "OVTENSOR" + value: "pooler_output" + } + } + } +} + + +node { + name: "EmbeddingConverter" + calculator: "PyTensorOvTensorConverterCalculator" + input_stream: "OVTENSOR:pooler_output" + output_stream: "OVMS_PY_TENSOR:pooler_output_em" + node_options: { + [type.googleapis.com/mediapipe.PyTensorOvTensorConverterCalculatorOptions]: { + tag_to_output_tensor_names{ + key: "OVMS_PY_TENSOR" + value: "pooler_output_em" + } + } +} +} + +node { + name: "Postprocessorlaion" + calculator: "PythonExecutorCalculator" + input_side_packet: "PYTHON_NODE_RESOURCES:py" + input_stream: "OVMS_PY_TENSOR:pooler_output_em" + output_stream: "OVMS_PY_TENSOR:embedding" + node_options: { + [type.googleapis.com/mediapipe.PythonExecutorCalculatorOptions]: { + handler_path: "/workspace/post.py" + } + } +} \ No newline at end of file diff --git a/demos/python_demos/image_embeddings/servable/graph_pipeline.pbtxt b/demos/python_demos/image_embeddings/servable/graph_pipeline.pbtxt new file mode 100644 index 0000000000..b9fbc885e8 --- /dev/null +++ b/demos/python_demos/image_embeddings/servable/graph_pipeline.pbtxt @@ -0,0 +1,81 @@ +input_stream: "OVMS_PY_TENSOR:image" +output_stream: "OVMS_PY_TENSOR:embedding" + +node{ + name: "ImagePreprocessor" + calculator: "PythonExecutorCalculator" + input_side_packet: "PYTHON_NODE_RESOURCES:py" + input_stream: "OVMS_PY_TENSOR:image" + output_stream: "OVMS_PY_TENSOR:pixel_values" + node_options:{ + [type.googleapis.com/mediapipe.PythonExecutorCalculatorOptions]:{ + handler_path: "/workspace/pre.py" + } + } +} + +node{ + name: "PixelValuesConverter" + calculator: "PyTensorOvTensorConverterCalculator" + input_stream: "OVMS_PY_TENSOR:pixel_values" + output_stream: "OVTENSOR:image_em" +} + +node { + calculator: "OpenVINOModelServerSessionCalculator" + output_side_packet: "SESSION:session" + node_options: { + [type.googleapis.com/mediapipe.OpenVINOModelServerSessionCalculatorOptions]: { + servable_name: "clip" + servable_version: "1" + } + } +} + +node { + calculator: "OpenVINOInferenceCalculator" + input_side_packet: "SESSION:session" + input_stream: "OVTENSOR:image_em" + output_stream: "OVTENSOR:pooler_output" + node_options: { + [type.googleapis.com/mediapipe.OpenVINOInferenceCalculatorOptions]: { + tag_to_input_tensor_names { + key: "OVTENSOR" + value: "40" + } + tag_to_output_tensor_names { + key: "OVTENSOR" + value: "pooler_output" + } + } + } +} + + +node { + name: "EmbeddingConverter" + calculator: "PyTensorOvTensorConverterCalculator" + input_stream: "OVTENSOR:pooler_output" + output_stream: "OVMS_PY_TENSOR:pooler_output_em" + node_options: { + [type.googleapis.com/mediapipe.PyTensorOvTensorConverterCalculatorOptions]: { + tag_to_output_tensor_names{ + key: "OVMS_PY_TENSOR" + value: "pooler_output_em" + } + } +} +} + +node { + name: "Postprocessor" + calculator: "PythonExecutorCalculator" + input_side_packet: "PYTHON_NODE_RESOURCES:py" + input_stream: "OVMS_PY_TENSOR:pooler_output_em" + output_stream: "OVMS_PY_TENSOR:embedding" + node_options: { + [type.googleapis.com/mediapipe.PythonExecutorCalculatorOptions]: { + handler_path: "/workspace/post.py" + } + } +} \ No newline at end of file diff --git a/demos/python_demos/image_embeddings/servable/post.py b/demos/python_demos/image_embeddings/servable/post.py new file mode 100644 index 0000000000..364ca52fef --- /dev/null +++ b/demos/python_demos/image_embeddings/servable/post.py @@ -0,0 +1,26 @@ +from pyovms import Tensor +import numpy as np + +class OvmsPythonModel: + def initialize(self, kwargs: dict): + self.node_name = kwargs.get("node_name", "") + if "clip" in self.node_name.lower(): + self.mode = "clip" + elif "dino" in self.node_name.lower(): + self.mode = "dino" + elif "laion" in self.node_name.lower(): + self.mode = "laion" + else: + raise ValueError(f"Unsupported model type in node name: {self.node_name}") + + def execute(self, inputs: list) -> list: + try: + tensor = inputs[0] + embedding = np.frombuffer(tensor.data, dtype=np.float32).reshape(tensor.shape) + norm = np.linalg.norm(embedding, axis=1, keepdims=True) + 1e-10 + normalized = embedding / norm + return [Tensor(name="embedding", buffer=normalized.astype(np.float32))] + except Exception as e: + print(">>> ERROR in Postprocessor:", str(e)) + raise + diff --git a/demos/python_demos/image_embeddings/servable/pre.py b/demos/python_demos/image_embeddings/servable/pre.py new file mode 100644 index 0000000000..4f38057f38 --- /dev/null +++ b/demos/python_demos/image_embeddings/servable/pre.py @@ -0,0 +1,33 @@ +from pyovms import Tensor +from transformers import CLIPProcessor, AutoImageProcessor +from PIL import Image +import numpy as np +from io import BytesIO +from tritonclient.utils import deserialize_bytes_tensor + +class OvmsPythonModel: + def initialize(self, kwargs:dict): + self.node_name=kwargs.get("node_name","") + + if "clip" in self.node_name.lower(): + self.processor=CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32") + self.mode="clip" + + elif "dino" in self.node_name.lower(): + self.processor=AutoImageProcessor.from_pretrained("facebook/dinov2-base") + self.mode="dino" + + elif "laion" in self.node_name.lower(): + self.processor=CLIPProcessor.from_pretrained("laion/CLIP-ViT-B-32-laion2B-s34B-b79K") + self.mode="laion" + else: + raise ValueError(f"Unsupported model type in node name: {self.node_name}") + + def execute(self,inputs: list): + image_bytes = deserialize_bytes_tensor(bytes(inputs[0]))[0] + + image=Image.open(BytesIO(image_bytes)).convert("RGB") + processed=self.processor(images=image,return_tensors="np") + pixel_values=processed["pixel_values"].astype(np.float32) + return[Tensor(name="pixel_values",buffer=pixel_values)] + diff --git a/demos/python_demos/image_embeddings/streamlit_app.py b/demos/python_demos/image_embeddings/streamlit_app.py new file mode 100644 index 0000000000..c55efd82e8 --- /dev/null +++ b/demos/python_demos/image_embeddings/streamlit_app.py @@ -0,0 +1,548 @@ +import os +import io +import json +import time +from pathlib import Path +from typing import Optional, List, Tuple + +import numpy as np +import streamlit as st +from PIL import Image + +# Triton/OVMS (gRPC) +import tritonclient.grpc as grpcclient +from qdrant_client import QdrantClient +from qdrant_client.http import models as rest + +# ------------------------ Config ------------------------ +IMAGE_FOLDER = os.environ.get("IMAGE_FOLDER", "./demo_images") +QDRANT_HOST = os.environ.get("QDRANT_HOST", "localhost") +QDRANT_PORT = int(os.environ.get("QDRANT_PORT", "6333")) +TRITON_URL = os.environ.get("TRITON_URL", "localhost:9000") +QDRANT_COLLECTION = os.environ.get("QDRANT_COLLECTION", "image_embeddings") +MODEL_CONFIG_FILE = os.environ.get("MODEL_CONFIG_FILE", "./selected_model.json") +AVAILABLE_MODELS = ["clip_graph", "dino_graph", "laion_graph"] + +# Model descriptions for better UX +MODEL_DESCRIPTIONS = { + "clip_graph": "CLIP - Great for general image-text understanding", + "dino_graph": "DINO - Excellent for object detection and features", + "laion_graph": "LAION - Trained on large-scale web data" +} + +# ------------------------ Custom CSS ------------------------ +def load_custom_css(): + st.markdown(""" + + """, unsafe_allow_html=True) + +# ------------------------ Helpers ------------------------ +@st.cache_resource(show_spinner=False) +def get_triton_client(): + return grpcclient.InferenceServerClient(TRITON_URL) + +@st.cache_resource(show_spinner=False) +def get_qdrant_client(): + return QdrantClient(QDRANT_HOST, port=QDRANT_PORT) + +def check_system_status() -> Tuple[bool, bool]: + """Check if Triton and Qdrant are accessible""" + triton_ok = False + qdrant_ok = False + + try: + client = get_triton_client() + client.is_server_ready() + triton_ok = True + except Exception: + pass + + try: + client = get_qdrant_client() + collections = client.get_collections() + qdrant_ok = any(c.name == QDRANT_COLLECTION for c in collections.collections) + except Exception: + pass + + return triton_ok, qdrant_ok + +def load_selected_model_from_file() -> Optional[str]: + if os.path.exists(MODEL_CONFIG_FILE): + try: + with open(MODEL_CONFIG_FILE, "r") as f: + return json.load(f).get("selected_model") + except Exception: + return None + return None + +def save_selected_model_to_file(model_name: str): + with open(MODEL_CONFIG_FILE, "w") as f: + json.dump({"selected_model": model_name}, f) + +def embed_image_bytes(img_bytes: bytes, model_name: str) -> np.ndarray: + """Send image bytes to OVMS via gRPC and get embedding.""" + client = get_triton_client() + infer_input = grpcclient.InferInput("image", [1], "BYTES") + infer_input.set_data_from_numpy(np.array([img_bytes], dtype=np.object_)) + result = client.infer(model_name, [infer_input]) + embedding = result.as_numpy("embedding")[0] + return embedding + +def qdrant_search(embedding: np.ndarray, top_k: int = 6): + qdrant = get_qdrant_client() + try: + # Try new API first + out = qdrant.query_points( + collection_name=QDRANT_COLLECTION, + query=embedding.tolist(), + limit=top_k, + ) + return out.points + except Exception: + # Fallback to old API + out = qdrant.search( + collection_name=QDRANT_COLLECTION, + query_vector=embedding.tolist(), + limit=top_k, + ) + return out + +def list_dataset_images(folder: str) -> List[Path]: + if not os.path.isdir(folder): + return [] + exts = {".jpg", ".jpeg", ".png", ".bmp", ".webp"} + return [p for p in sorted(Path(folder).iterdir()) if p.suffix.lower() in exts] + +def get_collection_stats(): + """Get statistics about the Qdrant collection""" + try: + client = get_qdrant_client() + info = client.get_collection(QDRANT_COLLECTION) + return { + 'total_vectors': info.vectors_count or 0, + 'status': info.status or 'unknown' + } + except Exception: + return None + +# ------------------------ UI Components ------------------------ +def render_header(): + st.markdown(""" +
+

🔎 Image Similarity Search

+

Powered by OVMS + Qdrant • Find visually similar images using AI embeddings

+
+ """, unsafe_allow_html=True) + +def render_system_status(): + """Display system status in sidebar""" + triton_ok, qdrant_ok = check_system_status() + + st.markdown("### 🔧 System Status") + + col1, col2 = st.columns(2) + with col1: + status_class = "status-success" if triton_ok else "status-error" + status_text = "Online" if triton_ok else "Offline" + st.markdown(f'**Triton/OVMS**
● {status_text}', + unsafe_allow_html=True) + + with col2: + status_class = "status-success" if qdrant_ok else "status-error" + status_text = "Ready" if qdrant_ok else "Not Ready" + st.markdown(f'**Qdrant DB**
● {status_text}', + unsafe_allow_html=True) + + # Collection stats + if qdrant_ok: + stats = get_collection_stats() + if stats: + vector_count = stats['total_vectors'] or 0 + status = stats['status'] or 'unknown' + st.markdown("### 📊 Collection Stats") + st.markdown(f""" +
+ {vector_count:,} vectors indexed
+ Status: {status} +
+ """, unsafe_allow_html=True) + +def render_model_selector(): + """Enhanced model selection with descriptions""" + st.markdown("### 🤖 AI Model") + + # Try to auto-load previous selection + default_model = load_selected_model_from_file() + if default_model and default_model in AVAILABLE_MODELS: + default_idx = AVAILABLE_MODELS.index(default_model) + else: + default_idx = 0 + + # Create options with descriptions + options = [f"{model} - {MODEL_DESCRIPTIONS.get(model, 'AI embedding model')}" + for model in AVAILABLE_MODELS] + + selected_option = st.selectbox( + "Choose embedding model:", + options, + index=default_idx, + help="Different models excel at different types of image understanding" + ) + + # Extract model name from selection + model_name = selected_option.split(" - ")[0] + + col1, col2 = st.columns([1, 1]) + with col1: + if st.button("💾 Save Default", help="Remember this model choice"): + save_selected_model_to_file(model_name) + st.success(f"✅ Saved: {model_name}") + + return model_name + +def render_search_interface(): + """Enhanced search interface - without white background""" + # Remove the div wrapper that was causing the white block + st.markdown("### 📤 Upload Query Image") + + # File uploader with better styling + uploaded = st.file_uploader( + "Choose an image file", + type=["jpg", "jpeg", "png", "bmp", "webp"], + help="Upload an image to find visually similar ones" + ) + + st.markdown("**OR**") + + # Dataset image selector + st.markdown("### 📂 Choose from Dataset") + dataset_images = list_dataset_images(IMAGE_FOLDER) + + if dataset_images: + # Show thumbnail preview grid for dataset selection + st.write(f"Found {len(dataset_images)} images in dataset:") + + # Create a selectbox with image names + pick = st.selectbox( + "Select dataset image:", + ["— Select an image —"] + [p.name for p in dataset_images], + help="Pick from pre-indexed images in your dataset" + ) + + # Show preview of selected dataset image + if pick != "— Select an image —": + selected_path = Path(IMAGE_FOLDER) / pick + if selected_path.exists(): + with st.expander("🔍 Preview Selected Image", expanded=True): + img = Image.open(selected_path) + st.image(img, caption=pick, use_container_width=True) + else: + st.warning(f"📁 No images found in `{IMAGE_FOLDER}`") + pick = "— Select an image —" + + # Determine query image + query_img_bytes = None + query_img_label = None + + if uploaded is not None: + query_img_bytes = uploaded.read() + query_img_label = uploaded.name + elif pick != "— Select an image —": + fp = Path(IMAGE_FOLDER) / pick + if fp.exists(): + query_img_bytes = fp.read_bytes() + query_img_label = fp.name + + return query_img_bytes, query_img_label + +def render_search_results(results, search_time: float): + """Fixed results display with proper image loading""" + st.markdown("### 🎯 Search Results") + + if not results: + st.warning("🤷 No similar images found. Make sure the Qdrant collection is populated!") + return + + # Results header with metrics + col1, col2, col3 = st.columns(3) + with col1: + st.metric("Results Found", len(results)) + with col2: + st.metric("Search Time", f"{search_time:.2f}s") + with col3: + try: + best_score = max([r.score if hasattr(r, 'score') and r.score is not None else 0 for r in results]) + except: + best_score = 0 + st.metric("Best Match", f"{best_score:.3f}") + + st.markdown("---") + + # Results grid with fixed image loading + num_cols = min(3, len(results)) + cols = st.columns(num_cols) + + for i, result in enumerate(results): + try: + # Handle different Qdrant response formats + if hasattr(result, 'payload') and result.payload: + filename = result.payload.get("filename", "unknown") + elif hasattr(result, 'metadata') and result.metadata: + filename = result.metadata.get("filename", "unknown") + else: + filename = "unknown" + + # Get score safely + score = getattr(result, "score", 0) if hasattr(result, "score") else 0 + + # Build image path + img_path = Path(IMAGE_FOLDER) / filename + + with cols[i % num_cols]: + st.markdown(f''' +
+

#{i+1} Match

+

Score: {score:.4f}

+

File: {filename}

+
+ ''', unsafe_allow_html=True) + + if img_path.exists(): + try: + img = Image.open(img_path) + st.image(img, use_container_width=True, caption=f"Score: {score:.4f}") + + # Additional metadata if available + with st.expander("📄 Details"): + st.write(f"**Filename:** `{filename}`") + st.write(f"**Similarity Score:** `{score:.6f}`") + st.write(f"**File Size:** `{img_path.stat().st_size / 1024:.1f} KB`") + st.write(f"**Dimensions:** `{img.size[0]}x{img.size[1]}`") + except Exception as img_error: + st.error(f"❌ Error loading image: {img_error}") + else: + st.error(f"❌ Image file not found: {filename}") + + except Exception as e: + st.error(f"Error processing result {i}: {e}") + +# ------------------------ Main App ------------------------ +def main(): + st.set_page_config( + page_title="Image Similarity Search", + page_icon="🔎", + layout="wide", + initial_sidebar_state="expanded" + ) + + # Load custom CSS + load_custom_css() + + # Header + render_header() + + # Sidebar + with st.sidebar: + st.markdown("## ⚙️ Configuration") + render_system_status() + st.markdown("---") + model_name = render_model_selector() + + st.markdown("---") + st.markdown("### 🎚️ Search Settings") + top_k = st.slider( + "Number of results:", + min_value=1, + max_value=20, + value=6, + help="How many similar images to return" + ) + + st.markdown("---") + st.markdown("### 📡 Connection Info") + st.code(f""" +Host: {QDRANT_HOST}:{QDRANT_PORT} +Triton: {TRITON_URL} +Collection: {QDRANT_COLLECTION} + """) + + # Main content + col_search, col_results = st.columns([1, 2]) + + with col_search: + query_img_bytes, query_img_label = render_search_interface() + + # Show query image preview + if query_img_bytes: + st.markdown("### 🖼️ Query Image") + query_img = Image.open(io.BytesIO(query_img_bytes)) + st.image(query_img, caption=f"Query: {query_img_label}", use_container_width=True) + + # Search button + search_button = st.button( + "🔍 Find Similar Images", + type="primary", + use_container_width=True + ) + else: + st.info("👆 Upload an image or select one from the dataset to begin searching") + search_button = False + + with col_results: + if search_button and query_img_bytes: + try: + with st.spinner("🔄 Computing embeddings and searching..."): + start_time = time.time() + + # Progress bar for better UX + progress = st.progress(0) + progress.progress(25, "Computing image embedding...") + + emb = embed_image_bytes(query_img_bytes, model_name) + progress.progress(75, "Searching similar images...") + + results = qdrant_search(emb, top_k=top_k) + progress.progress(100, "Complete!") + + search_time = time.time() - start_time + progress.empty() + + render_search_results(results, search_time) + + except Exception as e: + st.error("❌ Search failed!") + st.exception(e) + + # Helpful troubleshooting + st.markdown("### 🔧 Troubleshooting") + triton_ok, qdrant_ok = check_system_status() + st.write(f"**Triton/OVMS:** {'✅ OK' if triton_ok else '❌ Failed'}") + st.write(f"**Qdrant:** {'✅ OK' if qdrant_ok else '❌ Failed'}") + + st.markdown(""" + **Common Issues:** + - Check if Triton/OVMS server is running + - Verify Qdrant database is accessible + - Ensure the collection exists and has data + - Confirm the model name matches your deployment + - Check IMAGE_FOLDER path contains the indexed images + """) + else: + st.markdown(""" +
+

🎯 Ready to Search

+

Upload an image or select from dataset to find visually similar images

+
+ """, unsafe_allow_html=True) + +if __name__ == "__main__": + main() \ No newline at end of file