Skip to content

Commit eb4bfc6

Browse files
authored
simplify the fast api example & upgrade query handling (#537)
1 parent e19822b commit eb4bfc6

File tree

9 files changed

+682
-184
lines changed

9 files changed

+682
-184
lines changed
Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,5 @@
1-
COCOINDEX_DATABASE_URL=postgres://cocoindex:cocoindex@coco_db:5432/cocoindex
1+
# for docker
2+
COCOINDEX_DATABASE_URL=postgres://cocoindex:cocoindex@coco_db:5436/cocoindex
3+
4+
# For local testing
5+
# COCOINDEX_DATABASE_URL=postgres://cocoindex:cocoindex@localhost/cocoindex
Lines changed: 62 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,65 @@
1-
## Run cocoindex docker container with a simple query endpoint via fastapi
2-
In this example, we provide a simple docker container using docker compose to build pgvector17 along with a simple python fastapi script than runs a simple query endpoint. This example uses the code from the code embedding example.
1+
## Run docker container with a simple query endpoint via fastapi
32

4-
## How to run
5-
Edit the sample code directory to include the code you want to query over in
6-
```sample_code/```
3+
In this example, we will build index for text embedding from local markdown files, and provide a simple query endpoint via fastapi.
4+
We provide a simple docker container using docker compose to build pgvector17 along with a simple python fastapi script
75

8-
Edit the configuration code from the file ```src/cocoindex_funs.py``` line 23 to 25.
6+
We appreciate a star ⭐ at [CocoIndex Github](https://github.com/cocoindex-io/cocoindex) if this is helpful.
97

10-
Finally build the docker container via: ```docker compose up``` while inside the directory of the example.
8+
9+
## Run locally without docker
10+
11+
In the `.env` file, use local Postgres URL
12+
13+
```
14+
# For local testing
15+
COCOINDEX_DATABASE_URL=postgres://cocoindex:cocoindex@localhost/cocoindex
16+
```
17+
18+
- Install dependencies:
19+
20+
```bash
21+
pip install -e .
22+
```
23+
24+
- Setup:
25+
26+
```bash
27+
cocoindex setup main.py
28+
```
29+
30+
- Update index:
31+
32+
```bash
33+
cocoindex update main.py
34+
```
35+
36+
- Run:
37+
38+
```bash
39+
uvicorn main:fastapi_app --reload --host 0.0.0.0 --port 8000
40+
```
41+
42+
## Query the endpoint
43+
44+
```bash
45+
curl "http://localhost:8000/search?q=model&limit=3"
46+
```
47+
48+
49+
## Run Docker
50+
51+
In the `.env` file, use Docker Postgres URL
52+
53+
```
54+
COCOINDEX_DATABASE_URL=postgres://cocoindex:cocoindex@coco_db:5436/cocoindex
55+
```
56+
57+
Build the docker container via:
58+
```bash
59+
docker compose up --build
60+
```
61+
62+
Test the endpoint:
63+
```bash
64+
curl "http://0.0.0.0:8080/search?q=model&limit=3"
65+
```

examples/fastapi_server_docker/compose.yaml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,10 @@ services:
66
POSTGRES_USER: cocoindex
77
POSTGRES_PASSWORD: cocoindex
88
POSTGRES_DB: cocoindex
9+
POSTGRES_PORT: 5436
910
ports:
10-
- "5432:5432"
11+
- "5436:5436"
12+
command: postgres -p 5436
1113

1214
coco_api:
1315
build:

examples/fastapi_server_docker/dockerfile

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,12 @@ FROM python:3.11-slim
22

33
WORKDIR /app
44

5+
# Install PostgreSQL client libraries
6+
RUN apt-get update && apt-get install -y \
7+
libpq-dev \
8+
gcc \
9+
&& rm -rf /var/lib/apt/lists/*
10+
511
COPY requirements.txt .
612

713
RUN pip install -r requirements.txt

examples/fastapi_server_docker/files/1810.04805v2.md

Lines changed: 530 additions & 0 deletions
Large diffs are not rendered by default.

examples/fastapi_server_docker/main.py

Lines changed: 73 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,82 @@
11
import cocoindex
22
import uvicorn
3-
4-
from fastapi import FastAPI
53
from dotenv import load_dotenv
4+
from fastapi import FastAPI, Query
5+
from psycopg_pool import ConnectionPool
6+
import os
7+
8+
@cocoindex.transform_flow()
9+
def text_to_embedding(text: cocoindex.DataSlice[str]) -> cocoindex.DataSlice[list[float]]:
10+
"""
11+
Embed the text using a SentenceTransformer model.
12+
This is a shared logic between indexing and querying.
13+
"""
14+
return text.transform(
15+
cocoindex.functions.SentenceTransformerEmbed(
16+
model="sentence-transformers/all-MiniLM-L6-v2"))
17+
18+
@cocoindex.flow_def(name="MarkdownEmbeddingFastApiExample")
19+
def markdown_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope):
20+
"""
21+
Define an example flow that embeds markdown files into a vector database.
22+
"""
23+
data_scope["documents"] = flow_builder.add_source(
24+
cocoindex.sources.LocalFile(path="files"))
25+
doc_embeddings = data_scope.add_collector()
26+
27+
with data_scope["documents"].row() as doc:
28+
doc["chunks"] = doc["content"].transform(
29+
cocoindex.functions.SplitRecursively(),
30+
language="markdown", chunk_size=2000, chunk_overlap=500)
31+
32+
with doc["chunks"].row() as chunk:
33+
chunk["embedding"] = text_to_embedding(chunk["text"])
34+
doc_embeddings.collect(
35+
filename=doc["filename"],
36+
location=chunk["location"],
37+
text=chunk["text"],
38+
embedding=chunk["embedding"]
39+
)
40+
41+
doc_embeddings.export(
42+
"doc_embeddings",
43+
cocoindex.storages.Postgres(),
44+
primary_key_fields=["filename", "location"],
45+
vector_indexes=[
46+
cocoindex.VectorIndexDef(
47+
field_name="embedding",
48+
metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY)])
649

7-
from src.cocoindex_funs import code_embedding_flow, code_to_embedding
50+
def search(pool: ConnectionPool, query: str, top_k: int = 5):
51+
# Get the table name, for the export target in the text_embedding_flow above.
52+
table_name = cocoindex.utils.get_target_storage_default_name(markdown_embedding_flow, "doc_embeddings")
53+
# Evaluate the transform flow defined above with the input query, to get the embedding.
54+
query_vector = text_to_embedding.eval(query)
55+
# Run the query and get the results.
56+
with pool.connection() as conn:
57+
with conn.cursor() as cur:
58+
cur.execute(f"""
59+
SELECT filename, text, embedding <=> %s::vector AS distance
60+
FROM {table_name} ORDER BY distance LIMIT %s
61+
""", (query_vector, top_k))
62+
return [
63+
{"filename": row[0], "text": row[1], "score": 1.0 - row[2]}
64+
for row in cur.fetchall()
65+
]
866

967
fastapi_app = FastAPI()
10-
11-
query_handler = cocoindex.query.SimpleSemanticsQueryHandler(
12-
name="SemanticsSearch",
13-
flow=code_embedding_flow,
14-
target_name="code_embeddings",
15-
query_transform_flow=code_to_embedding,
16-
default_similarity_metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY
17-
)
18-
19-
@fastapi_app.get("/query")
20-
def query_endpoint(string: str):
21-
results, _ = query_handler.search(string, 10)
22-
return results
68+
69+
@fastapi_app.on_event("startup")
70+
def startup_event():
71+
load_dotenv()
72+
cocoindex.init()
73+
# Initialize database connection pool
74+
fastapi_app.state.pool = ConnectionPool(os.getenv("COCOINDEX_DATABASE_URL"))
75+
76+
@fastapi_app.get("/search")
77+
def search_endpoint(q: str = Query(..., description="Search query"), limit: int = Query(5, description="Number of results")):
78+
results = search(fastapi_app.state.pool, q, limit)
79+
return {"results": results}
2380

2481
if __name__ == "__main__":
2582
load_dotenv()

examples/fastapi_server_docker/requirements.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,6 @@ cocoindex>=0.1.42
22
python-dotenv>=1.0.1
33
fastapi==0.115.12
44
fastapi-cli==0.0.7
5-
uvicorn==0.34.2
5+
uvicorn==0.34.2
6+
psycopg==3.2.6
7+
psycopg_pool==3.2.6

examples/fastapi_server_docker/sample_code/main.py

Lines changed: 0 additions & 113 deletions
This file was deleted.

examples/fastapi_server_docker/src/cocoindex_funs.py

Lines changed: 0 additions & 45 deletions
This file was deleted.

0 commit comments

Comments
 (0)