diff --git a/README.md b/README.md index 097d9d95d..69451a057 100644 --- a/README.md +++ b/README.md @@ -27,18 +27,18 @@ Ultra performant data transformation framework for AI, with core engine written ⭐ Drop a star to help us grow!
- + -[Deutsch](https://readme-i18n.com/cocoindex-io/cocoindex?lang=de) | -[English](https://readme-i18n.com/cocoindex-io/cocoindex?lang=en) | -[Español](https://readme-i18n.com/cocoindex-io/cocoindex?lang=es) | -[français](https://readme-i18n.com/cocoindex-io/cocoindex?lang=fr) | -[日本語](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ja) | -[한국어](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ko) | -[Português](https://readme-i18n.com/cocoindex-io/cocoindex?lang=pt) | -[Русский](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ru) | +[Deutsch](https://readme-i18n.com/cocoindex-io/cocoindex?lang=de) | +[English](https://readme-i18n.com/cocoindex-io/cocoindex?lang=en) | +[Español](https://readme-i18n.com/cocoindex-io/cocoindex?lang=es) | +[français](https://readme-i18n.com/cocoindex-io/cocoindex?lang=fr) | +[日本語](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ja) | +[한국어](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ko) | +[Português](https://readme-i18n.com/cocoindex-io/cocoindex?lang=pt) | +[Русский](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ru) | [中文](https://readme-i18n.com/cocoindex-io/cocoindex?lang=zh) - +

@@ -183,6 +183,7 @@ It defines an index flow like this: | [FastAPI Server with Docker](examples/fastapi_server_docker) | Run the semantic search server in a Dockerized FastAPI setup | | [Product Recommendation](examples/product_recommendation) | Build real-time product recommendations with LLM and graph database| | [Image Search with Vision API](examples/image_search) | Generates detailed captions for images using a vision model, embeds them, enables live-updating semantic search via FastAPI and served on a React frontend| +| [Face Recognition](examples/face_recognition) | Recognize faces in images and build embedding index | | [Paper Metadata](examples/paper_metadata) | Index papers in PDF files, and build metadata tables for each paper | More coming and stay tuned 👀! diff --git a/examples/face_recognition/.env b/examples/face_recognition/.env new file mode 100644 index 000000000..335f30600 --- /dev/null +++ b/examples/face_recognition/.env @@ -0,0 +1,2 @@ +# Postgres database address for cocoindex +COCOINDEX_DATABASE_URL=postgres://cocoindex:cocoindex@localhost/cocoindex diff --git a/examples/face_recognition/README.md b/examples/face_recognition/README.md new file mode 100644 index 000000000..5d313dd42 --- /dev/null +++ b/examples/face_recognition/README.md @@ -0,0 +1,51 @@ +# Recognize faces in images and build embedding index +[![GitHub](https://img.shields.io/github/stars/cocoindex-io/cocoindex?color=5B5BD6)](https://github.com/cocoindex-io/cocoindex) + + +In this example, we will recognize faces in images and build embedding index. + +We appreciate a star ⭐ at [CocoIndex Github](https://github.com/cocoindex-io/cocoindex) if this is helpful. + +## Steps +### Indexing Flow + +1. We will ingest a list of images. +2. For each image, we: + - Extract faces from the image. + - Compute embeddings for each face. +3. We will export to the following tables in Postgres with PGVector: + - Filename, rect, embedding for each face. + + +## Prerequisite + +1. [Install Postgres](https://cocoindex.io/docs/getting_started/installation#-install-postgres) if you don't have one. + +2. dependencies: + + ```bash + pip install -e . + ``` + +## Run + +Update index, which will also setup the tables at the first time: + +```bash +cocoindex update --setup main.py +``` + +You can also run the command with `-L`, which will watch for file changes and update the index automatically. + +```bash +cocoindex update --setup -L main.py +``` + +## CocoInsight +I used CocoInsight (Free beta now) to troubleshoot the index generation and understand the data lineage of the pipeline. It just connects to your local CocoIndex server, with zero pipeline data retention. Run following command to start CocoInsight: + +``` +cocoindex server -ci main.py +``` + +Then open the CocoInsight UI at [https://cocoindex.io/cocoinsight](https://cocoindex.io/cocoinsight). diff --git a/examples/face_recognition/images/Carter_welcomes_Reagan.jpg b/examples/face_recognition/images/Carter_welcomes_Reagan.jpg new file mode 100644 index 000000000..b24785729 Binary files /dev/null and b/examples/face_recognition/images/Carter_welcomes_Reagan.jpg differ diff --git a/examples/face_recognition/images/Solvay_conference_1927.jpg b/examples/face_recognition/images/Solvay_conference_1927.jpg new file mode 100644 index 000000000..ce85a45a8 Binary files /dev/null and b/examples/face_recognition/images/Solvay_conference_1927.jpg differ diff --git a/examples/face_recognition/images/Steve_Jobs_and_Bill_Gates_(522695099).jpg b/examples/face_recognition/images/Steve_Jobs_and_Bill_Gates_(522695099).jpg new file mode 100644 index 000000000..6cb91f4c5 Binary files /dev/null and b/examples/face_recognition/images/Steve_Jobs_and_Bill_Gates_(522695099).jpg differ diff --git a/examples/face_recognition/images/einplanck3.jpg b/examples/face_recognition/images/einplanck3.jpg new file mode 100644 index 000000000..9bec9401d Binary files /dev/null and b/examples/face_recognition/images/einplanck3.jpg differ diff --git a/examples/face_recognition/main.py b/examples/face_recognition/main.py new file mode 100644 index 000000000..ef241265f --- /dev/null +++ b/examples/face_recognition/main.py @@ -0,0 +1,115 @@ +import cocoindex +import io +import dataclasses +import datetime +import typing + +import face_recognition +from PIL import Image +import numpy as np + + +@dataclasses.dataclass +class ImageRect: + top: int + left: int + bottom: int + right: int + + +@dataclasses.dataclass +class FaceBase: + """A face in an image.""" + + rect: ImageRect + image: bytes + + +MAX_IMAGE_WIDTH = 1280 + + +@cocoindex.op.function(cache=True, behavior_version=1, gpu=True) +def extract_faces(content: bytes) -> list[FaceBase]: + """Extract the first pages of a PDF.""" + orig_img = Image.open(io.BytesIO(content)).convert("RGB") + + # The model is too slow on large images, so we resize them if too large. + if orig_img.width > MAX_IMAGE_WIDTH: + ratio = orig_img.width * 1.0 / MAX_IMAGE_WIDTH + img = orig_img.resize( + (MAX_IMAGE_WIDTH, int(orig_img.height / ratio)), + resample=Image.Resampling.BICUBIC, + ) + else: + ratio = 1.0 + img = orig_img + + # Extract face locations. + locs = face_recognition.face_locations(np.array(img), model="cnn") + + faces: list[FaceBase] = [] + for top, right, bottom, left in locs: + rect = ImageRect( + left=int(left * ratio), + top=int(top * ratio), + right=int(right * ratio), + bottom=int(bottom * ratio), + ) + + # Crop the face and save it as a PNG. + buf = io.BytesIO() + orig_img.crop((rect.left, rect.top, rect.right, rect.bottom)).save( + buf, format="PNG" + ) + face = buf.getvalue() + faces.append(FaceBase(rect, face)) + + return faces + + +@cocoindex.op.function(cache=True, behavior_version=1, gpu=True) +def extract_face_embedding( + face: bytes, +) -> cocoindex.Vector[cocoindex.Float32, typing.Literal[128]]: + """Extract the embedding of a face.""" + img = Image.open(io.BytesIO(face)).convert("RGB") + embedding = face_recognition.face_encodings( + np.array(img), + known_face_locations=[(0, img.width - 1, img.height - 1, 0)], + )[0] + return embedding + + +@cocoindex.flow_def(name="FaceRecognition") +def face_recognition_flow( + flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope +) -> None: + """ + Define an example flow that embeds files into a vector database. + """ + data_scope["images"] = flow_builder.add_source( + cocoindex.sources.LocalFile(path="images", binary=True), + refresh_interval=datetime.timedelta(seconds=10), + ) + + face_embeddings = data_scope.add_collector() + + with data_scope["images"].row() as image: + # Extract faces + image["faces"] = image["content"].transform(extract_faces) + + with image["faces"].row() as face: + face["embedding"] = face["image"].transform(extract_face_embedding) + + # Collect embeddings + face_embeddings.collect( + filename=image["filename"], + rect=face["rect"], + embedding=face["embedding"], + ) + + face_embeddings.export( + "face_embeddings", + cocoindex.targets.Postgres(), + primary_key_fields=["filename", "rect"], + ) diff --git a/examples/face_recognition/pyproject.toml b/examples/face_recognition/pyproject.toml new file mode 100644 index 000000000..5fdd96e44 --- /dev/null +++ b/examples/face_recognition/pyproject.toml @@ -0,0 +1,14 @@ +[project] +name = "cocoindex-face-recognition-example" +version = "0.1.0" +description = "Build index for papers with both metadata and content embeddings" +requires-python = ">=3.11" +dependencies = [ + "cocoindex>=0.1.67", + "face-recognition>=1.3.0", + "pillow>=10.0.0", + "numpy>=1.26.0", +] + +[tool.setuptools] +packages = []