cocoindex-io
diff --git a/‎README.md‎
Lines changed: 11 additions & 10 deletions b/‎README.md‎
Lines changed: 11 additions & 10 deletions
diff --git a/‎examples/face_recognition/.env‎
Lines changed: 2 additions & 0 deletions b/‎examples/face_recognition/.env‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎examples/face_recognition/README.md‎
Lines changed: 51 additions & 0 deletions b/‎examples/face_recognition/README.md‎
Lines changed: 51 additions & 0 deletions
diff --git a/‎examples/face_recognition/images/Carter_welcomes_Reagan.jpg‎
146 KB b/‎examples/face_recognition/images/Carter_welcomes_Reagan.jpg‎
146 KB
diff --git a/‎examples/face_recognition/images/Solvay_conference_1927.jpg‎
786 KB b/‎examples/face_recognition/images/Solvay_conference_1927.jpg‎
786 KB
diff --git a/‎examples/face_recognition/images/Steve_Jobs_and_Bill_Gates_(522695099).jpg‎
173 KB b/‎examples/face_recognition/images/Steve_Jobs_and_Bill_Gates_(522695099).jpg‎
173 KB
diff --git a/‎examples/face_recognition/images/einplanck3.jpg‎
36.9 KB b/‎examples/face_recognition/images/einplanck3.jpg‎
36.9 KB
diff --git a/‎examples/face_recognition/main.py‎
Lines changed: 115 additions & 0 deletions b/‎examples/face_recognition/main.py‎
Lines changed: 115 additions & 0 deletions
diff --git a/‎examples/face_recognition/pyproject.toml‎
Lines changed: 14 additions & 0 deletions b/‎examples/face_recognition/pyproject.toml‎
Lines changed: 14 additions & 0 deletions
@@ -27,18 +27,18 @@ Ultra performant data transformation framework for AI, with core engine written
 ⭐ Drop a star to help us grow!
 
 <div align="center">
-    
+
 <!-- Keep these links. Translations will automatically update with the README. -->
-[Deutsch](https://readme-i18n.com/cocoindex-io/cocoindex?lang=de) | 
-[English](https://readme-i18n.com/cocoindex-io/cocoindex?lang=en) | 
-[Español](https://readme-i18n.com/cocoindex-io/cocoindex?lang=es) | 
-[français](https://readme-i18n.com/cocoindex-io/cocoindex?lang=fr) | 
-[日本語](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ja) | 
-[한국어](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ko) | 
-[Português](https://readme-i18n.com/cocoindex-io/cocoindex?lang=pt) | 
-[Русский](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ru) | 
+[Deutsch](https://readme-i18n.com/cocoindex-io/cocoindex?lang=de) |
+[English](https://readme-i18n.com/cocoindex-io/cocoindex?lang=en) |
+[Español](https://readme-i18n.com/cocoindex-io/cocoindex?lang=es) |
+[français](https://readme-i18n.com/cocoindex-io/cocoindex?lang=fr) |
+[日本語](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ja) |
+[한국어](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ko) |
+[Português](https://readme-i18n.com/cocoindex-io/cocoindex?lang=pt) |
+[Русский](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ru) |
 [中文](https://readme-i18n.com/cocoindex-io/cocoindex?lang=zh)
-    
+
 </div>
 
 </br>
@@ -183,6 +183,7 @@ It defines an index flow like this:
 | [FastAPI Server with Docker](examples/fastapi_server_docker) | Run the semantic search server in a Dockerized FastAPI setup |
 | [Product Recommendation](examples/product_recommendation) | Build real-time product recommendations with LLM and graph database|
 | [Image Search with Vision API](examples/image_search) | Generates detailed captions for images using a vision model, embeds them, enables live-updating semantic search via FastAPI and served on a React frontend|
+| [Face Recognition](examples/face_recognition) | Recognize faces in images and build embedding index |
 | [Paper Metadata](examples/paper_metadata) | Index papers in PDF files, and build metadata tables for each paper |
 
 More coming and stay tuned 👀!
 
@@ -0,0 +1,2 @@
+# Postgres database address for cocoindex
+COCOINDEX_DATABASE_URL=postgres://cocoindex:cocoindex@localhost/cocoindex
@@ -0,0 +1,51 @@
+# Recognize faces in images and build embedding index
+[![GitHub](https://img.shields.io/github/stars/cocoindex-io/cocoindex?color=5B5BD6)](https://github.com/cocoindex-io/cocoindex)
+
+
+In this example, we will recognize faces in images and build embedding index.
+
+We appreciate a star ⭐ at [CocoIndex Github](https://github.com/cocoindex-io/cocoindex) if this is helpful.
+
+## Steps
+### Indexing Flow
+
+1. We will ingest a list of images.
+2. For each image, we:
+   - Extract faces from the image.
+   - Compute embeddings for each face.
+3. We will export to the following tables in Postgres with PGVector:
+   - Filename, rect, embedding for each face.
+
+
+## Prerequisite
+
+1.  [Install Postgres](https://cocoindex.io/docs/getting_started/installation#-install-postgres) if you don't have one.
+
+2.  dependencies:
+
+    ```bash
+    pip install -e .
+    ```
+
+## Run
+
+Update index, which will also setup the tables at the first time:
+
+```bash
+cocoindex update --setup main.py
+```
+
+You can also run the command with `-L`, which will watch for file changes and update the index automatically.
+
+```bash
+cocoindex update --setup -L main.py
+```
+
+## CocoInsight
+I used CocoInsight (Free beta now) to troubleshoot the index generation and understand the data lineage of the pipeline. It just connects to your local CocoIndex server, with zero pipeline data retention. Run following command to start CocoInsight:
+
+```
+cocoindex server -ci main.py
+```
+
+Then open the CocoInsight UI at [https://cocoindex.io/cocoinsight](https://cocoindex.io/cocoinsight).
@@ -0,0 +1,115 @@
+import cocoindex
+import io
+import dataclasses
+import datetime
+import typing
+
+import face_recognition
+from PIL import Image
+import numpy as np
+
+
+@dataclasses.dataclass
+class ImageRect:
+    top: int
+    left: int
+    bottom: int
+    right: int
+
+
+@dataclasses.dataclass
+class FaceBase:
+    """A face in an image."""
+
+    rect: ImageRect
+    image: bytes
+
+
+MAX_IMAGE_WIDTH = 1280
+
+
+@cocoindex.op.function(cache=True, behavior_version=1, gpu=True)
+def extract_faces(content: bytes) -> list[FaceBase]:
+    """Extract the first pages of a PDF."""
+    orig_img = Image.open(io.BytesIO(content)).convert("RGB")
+
+    # The model is too slow on large images, so we resize them if too large.
+    if orig_img.width > MAX_IMAGE_WIDTH:
+        ratio = orig_img.width * 1.0 / MAX_IMAGE_WIDTH
+        img = orig_img.resize(
+            (MAX_IMAGE_WIDTH, int(orig_img.height / ratio)),
+            resample=Image.Resampling.BICUBIC,
+        )
+    else:
+        ratio = 1.0
+        img = orig_img
+
+    # Extract face locations.
+    locs = face_recognition.face_locations(np.array(img), model="cnn")
+
+    faces: list[FaceBase] = []
+    for top, right, bottom, left in locs:
+        rect = ImageRect(
+            left=int(left * ratio),
+            top=int(top * ratio),
+            right=int(right * ratio),
+            bottom=int(bottom * ratio),
+        )
+
+        # Crop the face and save it as a PNG.
+        buf = io.BytesIO()
+        orig_img.crop((rect.left, rect.top, rect.right, rect.bottom)).save(
+            buf, format="PNG"
+        )
+        face = buf.getvalue()
+        faces.append(FaceBase(rect, face))
+
+    return faces
+
+
+@cocoindex.op.function(cache=True, behavior_version=1, gpu=True)
+def extract_face_embedding(
+    face: bytes,
+) -> cocoindex.Vector[cocoindex.Float32, typing.Literal[128]]:
+    """Extract the embedding of a face."""
+    img = Image.open(io.BytesIO(face)).convert("RGB")
+    embedding = face_recognition.face_encodings(
+        np.array(img),
+        known_face_locations=[(0, img.width - 1, img.height - 1, 0)],
+    )[0]
+    return embedding
+
+
+@cocoindex.flow_def(name="FaceRecognition")
+def face_recognition_flow(
+    flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope
+) -> None:
+    """
+    Define an example flow that embeds files into a vector database.
+    """
+    data_scope["images"] = flow_builder.add_source(
+        cocoindex.sources.LocalFile(path="images", binary=True),
+        refresh_interval=datetime.timedelta(seconds=10),
+    )
+
+    face_embeddings = data_scope.add_collector()
+
+    with data_scope["images"].row() as image:
+        # Extract faces
+        image["faces"] = image["content"].transform(extract_faces)
+
+        with image["faces"].row() as face:
+            face["embedding"] = face["image"].transform(extract_face_embedding)
+
+            # Collect embeddings
+            face_embeddings.collect(
+                filename=image["filename"],
+                rect=face["rect"],
+                embedding=face["embedding"],
+            )
+
+    face_embeddings.export(
+        "face_embeddings",
+        cocoindex.targets.Postgres(),
+        primary_key_fields=["filename", "rect"],
+    )
@@ -0,0 +1,14 @@
+[project]
+name = "cocoindex-face-recognition-example"
+version = "0.1.0"
+description = "Build index for papers with both metadata and content embeddings"
+requires-python = ">=3.11"
+dependencies = [
+    "cocoindex>=0.1.67",
+    "face-recognition>=1.3.0",
+    "pillow>=10.0.0",
+    "numpy>=1.26.0",
+]
+
+[tool.setuptools]
+packages = []
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+# Postgres database address for cocoindex`
	`2`	`+COCOINDEX_DATABASE_URL=postgres://cocoindex:cocoindex@localhost/cocoindex`