Skip to content

Commit 40a86dc

Browse files
authored
examples: simplify postgres source example to be a single flow (#938)
1 parent d03a639 commit 40a86dc

File tree

4 files changed

+6
-112
lines changed

4 files changed

+6
-112
lines changed

examples/postgres_source/.env.example

Lines changed: 0 additions & 22 deletions
This file was deleted.

examples/postgres_source/README.md

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,13 @@
33
[![GitHub](https://img.shields.io/github/stars/cocoindex-io/cocoindex?color=5B5BD6)](https://github.com/cocoindex-io/cocoindex)
44

55
This example demonstrates how to use Postgres tables as the source for CocoIndex.
6-
It reads structured data from existing PostgreSQL tables, performs calculations, generates embeddings, and stores them in a separate CocoIndex table.
6+
It reads structured product data from existing PostgreSQL tables, performs calculations, generates embeddings, and stores them in a separate CocoIndex table.
77

88
We appreciate a star ⭐ at [CocoIndex Github](https://github.com/cocoindex-io/cocoindex) if this is helpful.
99

10-
This example contains two flows:
10+
This example contains one flow:
1111

12-
1. `postgres_message_indexing_flow`: Read from a simpler table `source_messages` (single primary key), and generate embeddings for the `message` column.
13-
2. `postgres_product_indexing_flow`: Read from a more complex table `source_products` (composite primary key), compute additional fields and generates embeddings.
12+
`postgres_product_indexing_flow`: Read from a table `source_products` (composite primary key), compute additional fields like total value and full description, then generate embeddings for semantic search.
1413

1514

1615
## Prerequisites
@@ -25,7 +24,7 @@ Before running the example, you need to:
2524

2625
2. Follow the [CocoIndex PostgreSQL setup guide](https://cocoindex.io/docs/getting_started/quickstart) to install and configure PostgreSQL with pgvector extension.
2726

28-
3. Create source tables `source_messages` and `source_products` with sample data:
27+
3. Create source table `source_products` with sample data:
2928

3029
```bash
3130
$ psql "postgres://cocoindex:cocoindex@localhost/cocoindex" -f ./prepare_source_data.sql

examples/postgres_source/main.py

Lines changed: 1 addition & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -2,58 +2,6 @@
22
import os
33

44

5-
@cocoindex.flow_def(name="PostgresMessageIndexing")
6-
def postgres_message_indexing_flow(
7-
flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope
8-
) -> None:
9-
"""
10-
Define a flow that reads data from a PostgreSQL table, generates embeddings,
11-
and stores them in another PostgreSQL table with pgvector.
12-
"""
13-
14-
data_scope["messages"] = flow_builder.add_source(
15-
cocoindex.sources.Postgres(
16-
table_name="source_messages",
17-
# Optional. Use the default CocoIndex database if not specified.
18-
database=cocoindex.add_transient_auth_entry(
19-
cocoindex.sources.DatabaseConnectionSpec(
20-
url=os.getenv("SOURCE_DATABASE_URL"),
21-
)
22-
),
23-
# Optional.
24-
ordinal_column="created_at",
25-
)
26-
)
27-
28-
indexed_messages = data_scope.add_collector()
29-
with data_scope["messages"].row() as message_row:
30-
# Use the indexing column for embedding generation
31-
message_row["embedding"] = message_row["message"].transform(
32-
cocoindex.functions.SentenceTransformerEmbed(
33-
model="sentence-transformers/all-MiniLM-L6-v2"
34-
)
35-
)
36-
# Collect the data - include key columns and content
37-
indexed_messages.collect(
38-
id=message_row["id"],
39-
author=message_row["author"],
40-
message=message_row["message"],
41-
embedding=message_row["embedding"],
42-
)
43-
44-
indexed_messages.export(
45-
"output",
46-
cocoindex.targets.Postgres(),
47-
primary_key_fields=["id"],
48-
vector_indexes=[
49-
cocoindex.VectorIndexDef(
50-
field_name="embedding",
51-
metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY,
52-
)
53-
],
54-
)
55-
56-
575
@cocoindex.op.function()
586
def calculate_total_value(
597
price: float,
@@ -76,7 +24,7 @@ def postgres_product_indexing_flow(
7624
flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope
7725
) -> None:
7826
"""
79-
Define a flow that reads data from a PostgreSQL table, generates embeddings,
27+
Define a flow that reads product data from a PostgreSQL table, generates embeddings,
8028
and stores them in another PostgreSQL table with pgvector.
8129
"""
8230
data_scope["products"] = flow_builder.add_source(

examples/postgres_source/prepare_source_data.sql

Lines changed: 1 addition & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,7 @@
11
-- Usage: run with psql from your shell, for example:
22
-- $ psql "postgres://cocoindex:cocoindex@localhost/cocoindex" -f ./prepare_source_data.sql
33
-- ========================================
4-
-- Simple schema: source_messages (single primary key)
5-
-- ========================================
6-
DROP TABLE IF EXISTS source_messages CASCADE;
7-
CREATE TABLE source_messages (
8-
id uuid NOT NULL PRIMARY KEY DEFAULT gen_random_uuid(),
9-
author text NOT NULL,
10-
message text NOT NULL,
11-
created_at timestamp DEFAULT CURRENT_TIMESTAMP
12-
);
13-
INSERT INTO source_messages (author, message)
14-
VALUES (
15-
'Jane Smith',
16-
'Hello world! This is a test message.'
17-
),
18-
(
19-
'John Doe',
20-
'PostgreSQL source integration is working great!'
21-
),
22-
(
23-
'Jane Smith',
24-
'CocoIndex makes database processing so much easier.'
25-
),
26-
(
27-
'John Doe',
28-
'Embeddings and vector search are powerful tools.'
29-
),
30-
(
31-
'John Doe',
32-
'Natural language processing meets database technology.'
33-
) ON CONFLICT DO NOTHING;
34-
-- ========================================
35-
-- Multiple schema: source_products (composite primary key)
4+
-- Product schema: source_products (composite primary key)
365
-- ========================================
376
DROP TABLE IF EXISTS source_products CASCADE;
387
CREATE TABLE source_products (

0 commit comments

Comments
 (0)