diff --git a/README.md b/README.md
index 097d9d95d..69451a057 100644
--- a/README.md
+++ b/README.md
@@ -27,18 +27,18 @@ Ultra performant data transformation framework for AI, with core engine written
⭐ Drop a star to help us grow!
-
+
-[Deutsch](https://readme-i18n.com/cocoindex-io/cocoindex?lang=de) |
-[English](https://readme-i18n.com/cocoindex-io/cocoindex?lang=en) |
-[Español](https://readme-i18n.com/cocoindex-io/cocoindex?lang=es) |
-[français](https://readme-i18n.com/cocoindex-io/cocoindex?lang=fr) |
-[日本語](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ja) |
-[한국어](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ko) |
-[Português](https://readme-i18n.com/cocoindex-io/cocoindex?lang=pt) |
-[Русский](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ru) |
+[Deutsch](https://readme-i18n.com/cocoindex-io/cocoindex?lang=de) |
+[English](https://readme-i18n.com/cocoindex-io/cocoindex?lang=en) |
+[Español](https://readme-i18n.com/cocoindex-io/cocoindex?lang=es) |
+[français](https://readme-i18n.com/cocoindex-io/cocoindex?lang=fr) |
+[日本語](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ja) |
+[한국어](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ko) |
+[Português](https://readme-i18n.com/cocoindex-io/cocoindex?lang=pt) |
+[Русский](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ru) |
[中文](https://readme-i18n.com/cocoindex-io/cocoindex?lang=zh)
-
+
@@ -183,6 +183,7 @@ It defines an index flow like this:
| [FastAPI Server with Docker](examples/fastapi_server_docker) | Run the semantic search server in a Dockerized FastAPI setup |
| [Product Recommendation](examples/product_recommendation) | Build real-time product recommendations with LLM and graph database|
| [Image Search with Vision API](examples/image_search) | Generates detailed captions for images using a vision model, embeds them, enables live-updating semantic search via FastAPI and served on a React frontend|
+| [Face Recognition](examples/face_recognition) | Recognize faces in images and build embedding index |
| [Paper Metadata](examples/paper_metadata) | Index papers in PDF files, and build metadata tables for each paper |
More coming and stay tuned 👀!
diff --git a/examples/face_recognition/.env b/examples/face_recognition/.env
new file mode 100644
index 000000000..335f30600
--- /dev/null
+++ b/examples/face_recognition/.env
@@ -0,0 +1,2 @@
+# Postgres database address for cocoindex
+COCOINDEX_DATABASE_URL=postgres://cocoindex:cocoindex@localhost/cocoindex
diff --git a/examples/face_recognition/README.md b/examples/face_recognition/README.md
new file mode 100644
index 000000000..5d313dd42
--- /dev/null
+++ b/examples/face_recognition/README.md
@@ -0,0 +1,51 @@
+# Recognize faces in images and build embedding index
+[](https://github.com/cocoindex-io/cocoindex)
+
+
+In this example, we will recognize faces in images and build embedding index.
+
+We appreciate a star ⭐ at [CocoIndex Github](https://github.com/cocoindex-io/cocoindex) if this is helpful.
+
+## Steps
+### Indexing Flow
+
+1. We will ingest a list of images.
+2. For each image, we:
+ - Extract faces from the image.
+ - Compute embeddings for each face.
+3. We will export to the following tables in Postgres with PGVector:
+ - Filename, rect, embedding for each face.
+
+
+## Prerequisite
+
+1. [Install Postgres](https://cocoindex.io/docs/getting_started/installation#-install-postgres) if you don't have one.
+
+2. dependencies:
+
+ ```bash
+ pip install -e .
+ ```
+
+## Run
+
+Update index, which will also setup the tables at the first time:
+
+```bash
+cocoindex update --setup main.py
+```
+
+You can also run the command with `-L`, which will watch for file changes and update the index automatically.
+
+```bash
+cocoindex update --setup -L main.py
+```
+
+## CocoInsight
+I used CocoInsight (Free beta now) to troubleshoot the index generation and understand the data lineage of the pipeline. It just connects to your local CocoIndex server, with zero pipeline data retention. Run following command to start CocoInsight:
+
+```
+cocoindex server -ci main.py
+```
+
+Then open the CocoInsight UI at [https://cocoindex.io/cocoinsight](https://cocoindex.io/cocoinsight).
diff --git a/examples/face_recognition/images/Carter_welcomes_Reagan.jpg b/examples/face_recognition/images/Carter_welcomes_Reagan.jpg
new file mode 100644
index 000000000..b24785729
Binary files /dev/null and b/examples/face_recognition/images/Carter_welcomes_Reagan.jpg differ
diff --git a/examples/face_recognition/images/Solvay_conference_1927.jpg b/examples/face_recognition/images/Solvay_conference_1927.jpg
new file mode 100644
index 000000000..ce85a45a8
Binary files /dev/null and b/examples/face_recognition/images/Solvay_conference_1927.jpg differ
diff --git a/examples/face_recognition/images/Steve_Jobs_and_Bill_Gates_(522695099).jpg b/examples/face_recognition/images/Steve_Jobs_and_Bill_Gates_(522695099).jpg
new file mode 100644
index 000000000..6cb91f4c5
Binary files /dev/null and b/examples/face_recognition/images/Steve_Jobs_and_Bill_Gates_(522695099).jpg differ
diff --git a/examples/face_recognition/images/einplanck3.jpg b/examples/face_recognition/images/einplanck3.jpg
new file mode 100644
index 000000000..9bec9401d
Binary files /dev/null and b/examples/face_recognition/images/einplanck3.jpg differ
diff --git a/examples/face_recognition/main.py b/examples/face_recognition/main.py
new file mode 100644
index 000000000..ef241265f
--- /dev/null
+++ b/examples/face_recognition/main.py
@@ -0,0 +1,115 @@
+import cocoindex
+import io
+import dataclasses
+import datetime
+import typing
+
+import face_recognition
+from PIL import Image
+import numpy as np
+
+
+@dataclasses.dataclass
+class ImageRect:
+ top: int
+ left: int
+ bottom: int
+ right: int
+
+
+@dataclasses.dataclass
+class FaceBase:
+ """A face in an image."""
+
+ rect: ImageRect
+ image: bytes
+
+
+MAX_IMAGE_WIDTH = 1280
+
+
+@cocoindex.op.function(cache=True, behavior_version=1, gpu=True)
+def extract_faces(content: bytes) -> list[FaceBase]:
+ """Extract the first pages of a PDF."""
+ orig_img = Image.open(io.BytesIO(content)).convert("RGB")
+
+ # The model is too slow on large images, so we resize them if too large.
+ if orig_img.width > MAX_IMAGE_WIDTH:
+ ratio = orig_img.width * 1.0 / MAX_IMAGE_WIDTH
+ img = orig_img.resize(
+ (MAX_IMAGE_WIDTH, int(orig_img.height / ratio)),
+ resample=Image.Resampling.BICUBIC,
+ )
+ else:
+ ratio = 1.0
+ img = orig_img
+
+ # Extract face locations.
+ locs = face_recognition.face_locations(np.array(img), model="cnn")
+
+ faces: list[FaceBase] = []
+ for top, right, bottom, left in locs:
+ rect = ImageRect(
+ left=int(left * ratio),
+ top=int(top * ratio),
+ right=int(right * ratio),
+ bottom=int(bottom * ratio),
+ )
+
+ # Crop the face and save it as a PNG.
+ buf = io.BytesIO()
+ orig_img.crop((rect.left, rect.top, rect.right, rect.bottom)).save(
+ buf, format="PNG"
+ )
+ face = buf.getvalue()
+ faces.append(FaceBase(rect, face))
+
+ return faces
+
+
+@cocoindex.op.function(cache=True, behavior_version=1, gpu=True)
+def extract_face_embedding(
+ face: bytes,
+) -> cocoindex.Vector[cocoindex.Float32, typing.Literal[128]]:
+ """Extract the embedding of a face."""
+ img = Image.open(io.BytesIO(face)).convert("RGB")
+ embedding = face_recognition.face_encodings(
+ np.array(img),
+ known_face_locations=[(0, img.width - 1, img.height - 1, 0)],
+ )[0]
+ return embedding
+
+
+@cocoindex.flow_def(name="FaceRecognition")
+def face_recognition_flow(
+ flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope
+) -> None:
+ """
+ Define an example flow that embeds files into a vector database.
+ """
+ data_scope["images"] = flow_builder.add_source(
+ cocoindex.sources.LocalFile(path="images", binary=True),
+ refresh_interval=datetime.timedelta(seconds=10),
+ )
+
+ face_embeddings = data_scope.add_collector()
+
+ with data_scope["images"].row() as image:
+ # Extract faces
+ image["faces"] = image["content"].transform(extract_faces)
+
+ with image["faces"].row() as face:
+ face["embedding"] = face["image"].transform(extract_face_embedding)
+
+ # Collect embeddings
+ face_embeddings.collect(
+ filename=image["filename"],
+ rect=face["rect"],
+ embedding=face["embedding"],
+ )
+
+ face_embeddings.export(
+ "face_embeddings",
+ cocoindex.targets.Postgres(),
+ primary_key_fields=["filename", "rect"],
+ )
diff --git a/examples/face_recognition/pyproject.toml b/examples/face_recognition/pyproject.toml
new file mode 100644
index 000000000..5fdd96e44
--- /dev/null
+++ b/examples/face_recognition/pyproject.toml
@@ -0,0 +1,14 @@
+[project]
+name = "cocoindex-face-recognition-example"
+version = "0.1.0"
+description = "Build index for papers with both metadata and content embeddings"
+requires-python = ">=3.11"
+dependencies = [
+ "cocoindex>=0.1.67",
+ "face-recognition>=1.3.0",
+ "pillow>=10.0.0",
+ "numpy>=1.26.0",
+]
+
+[tool.setuptools]
+packages = []