examples: update examples for query handler

georgeh0 · georgeh0 · commit 8fc19a4fdffd · 2025-09-18T16:29:37.000-07:00
diff --git a/examples/code_embedding/main.py b/examples/code_embedding/main.py
@@ -2,6 +2,7 @@
 from psycopg_pool import ConnectionPool
 from pgvector.psycopg import register_vector
 from typing import Any
+import functools
 import cocoindex
 import os
 from numpy.typing import NDArray
@@ -84,52 +85,74 @@ def code_embedding_flow(
     )
 
 
-def search(pool: ConnectionPool, query: str, top_k: int = 5) -> list[dict[str, Any]]:
+@functools.cache
+def connection_pool() -> ConnectionPool:
+    """
+    Get a connection pool to the database.
+    """
+    return ConnectionPool(os.environ["COCOINDEX_DATABASE_URL"])
+
+
+TOP_K = 5
+
+
+# Declaring it ss a query handler, so that you can easily run queries in CocoInsight.
+@code_embedding_flow.query_handler(
+    result_fields=cocoindex.QueryHandlerResultFields(
+        embedding=["embedding"], score="score"
+    )
+)
+def search(query: str) -> cocoindex.QueryOutput:
     # Get the table name, for the export target in the code_embedding_flow above.
     table_name = cocoindex.utils.get_target_default_name(
         code_embedding_flow, "code_embeddings"
     )
     # Evaluate the transform flow defined above with the input query, to get the embedding.
     query_vector = code_to_embedding.eval(query)
     # Run the query and get the results.
-    with pool.connection() as conn:
+    with connection_pool().connection() as conn:
         register_vector(conn)
         with conn.cursor() as cur:
             cur.execute(
                 f"""
-                SELECT filename, code, embedding <=> %s AS distance, start, "end"
+                SELECT filename, code, embedding, embedding <=> %s AS distance, start, "end"
                 FROM {table_name} ORDER BY distance LIMIT %s
             """,
-                (query_vector, top_k),
+                (query_vector, TOP_K),
+            )
+            return cocoindex.QueryOutput(
+                query_info=cocoindex.QueryInfo(
+                    embedding=query_vector,
+                    similarity_metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY,
+                ),
+                results=[
+                    {
+                        "filename": row[0],
+                        "code": row[1],
+                        "embedding": row[2],
+                        "score": 1.0 - row[3],
+                        "start": row[4],
+                        "end": row[5],
+                    }
+                    for row in cur.fetchall()
+                ],
             )
-            return [
-                {
-                    "filename": row[0],
-                    "code": row[1],
-                    "score": 1.0 - row[2],
-                    "start": row[3],
-                    "end": row[4],
-                }
-                for row in cur.fetchall()
-            ]
 
 
 def _main() -> None:
     # Make sure the flow is built and up-to-date.
     stats = code_embedding_flow.update()
     print("Updated index: ", stats)
 
-    # Initialize the database connection pool.
-    pool = ConnectionPool(os.getenv("COCOINDEX_DATABASE_URL"))
     # Run queries in a loop to demonstrate the query capabilities.
     while True:
         query = input("Enter search query (or Enter to quit): ")
         if query == "":
             break
         # Run the query function with the database connection pool and the query.
-        results = search(pool, query)
+        query_output = search(query)
         print("\nSearch results:")
-        for result in results:
+        for result in query_output.results:
             print(
                 f"[{result['score']:.3f}] {result['filename']} (L{result['start']['line']}-L{result['end']['line']})"
             )
diff --git a/examples/text_embedding/main.py b/examples/text_embedding/main.py
@@ -4,6 +4,7 @@
 from typing import Any
 import cocoindex
 import os
+import functools
 from numpy.typing import NDArray
 import numpy as np
 from datetime import timedelta
@@ -74,42 +75,65 @@ def text_embedding_flow(
     )
 
 
-def search(pool: ConnectionPool, query: str, top_k: int = 5) -> list[dict[str, Any]]:
+@functools.cache
+def connection_pool() -> ConnectionPool:
+    """
+    Get a connection pool to the database.
+    """
+    return ConnectionPool(os.environ["COCOINDEX_DATABASE_URL"])
+
+
+TOP_K = 5
+
+
+# Declaring it ss a query handler, so that you can easily run queries in CocoInsight.
+@text_embedding_flow.query_handler(
+    result_fields=cocoindex.QueryHandlerResultFields(
+        embedding=["embedding"],
+        score="score",
+    ),
+)
+def search(query: str) -> cocoindex.QueryOutput:
     # Get the table name, for the export target in the text_embedding_flow above.
     table_name = cocoindex.utils.get_target_default_name(
         text_embedding_flow, "doc_embeddings"
     )
     # Evaluate the transform flow defined above with the input query, to get the embedding.
     query_vector = text_to_embedding.eval(query)
     # Run the query and get the results.
-    with pool.connection() as conn:
+    with connection_pool().connection() as conn:
         register_vector(conn)
         with conn.cursor() as cur:
             cur.execute(
                 f"""
                 SELECT filename, text, embedding <=> %s AS distance
                 FROM {table_name} ORDER BY distance LIMIT %s
             """,
-                (query_vector, top_k),
+                (query_vector, TOP_K),
             )
-            return [
+            results = [
                 {"filename": row[0], "text": row[1], "score": 1.0 - row[2]}
                 for row in cur.fetchall()
             ]
+            return cocoindex.QueryOutput(
+                results=results,
+                query_info=cocoindex.QueryInfo(
+                    embedding=query_vector,
+                    similarity_metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY,
+                ),
+            )
 
 
 def _main() -> None:
-    # Initialize the database connection pool.
-    pool = ConnectionPool(os.getenv("COCOINDEX_DATABASE_URL"))
     # Run queries in a loop to demonstrate the query capabilities.
     while True:
         query = input("Enter search query (or Enter to quit): ")
         if query == "":
             break
         # Run the query function with the database connection pool and the query.
-        results = search(pool, query)
+        query_output = search(query)
         print("\nSearch results:")
-        for result in results:
+        for result in query_output.results:
             print(f"[{result['score']:.3f}] {result['filename']}")
             print(f"    {result['text']}")
             print("---")
diff --git a/examples/text_embedding_qdrant/main.py b/examples/text_embedding_qdrant/main.py
@@ -1,3 +1,4 @@
+import functools
 from dotenv import load_dotenv
 from qdrant_client import QdrantClient
 import cocoindex
@@ -61,32 +62,58 @@ def text_embedding_flow(
     )
 
 
-def _main() -> None:
-    # Initialize Qdrant client
-    client = QdrantClient(url=QDRANT_URL, prefer_grpc=True)
+@functools.cache
+def get_qdrant_client() -> QdrantClient:
+    return QdrantClient(url=QDRANT_URL, prefer_grpc=True)
+
+
+@text_embedding_flow.query_handler(
+    result_fields=cocoindex.QueryHandlerResultFields(
+        embedding=["embedding"],
+        score="score",
+    ),
+)
+def search(query: str) -> cocoindex.QueryOutput:
+    client = get_qdrant_client()
 
+    # Get the embedding for the query
+    query_embedding = text_to_embedding.eval(query)
+
+    search_results = client.search(
+        collection_name=QDRANT_COLLECTION,
+        query_vector=("text_embedding", query_embedding),
+        limit=10,
+    )
+    return cocoindex.QueryOutput(
+        results=[
+            {
+                "filename": result.payload["filename"],
+                "text": result.payload["text"],
+                "embedding": result.vector,
+                "score": result.score,
+            }
+            for result in search_results
+        ],
+        query_info=cocoindex.QueryInfo(
+            embedding=query_embedding,
+            similarity_metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY,
+        ),
+    )
+
+
+def _main() -> None:
     # Run queries in a loop to demonstrate the query capabilities.
     while True:
         query = input("Enter search query (or Enter to quit): ")
         if query == "":
             break
 
-        # Get the embedding for the query
-        query_embedding = text_to_embedding.eval(query)
-
-        search_results = client.search(
-            collection_name=QDRANT_COLLECTION,
-            query_vector=("text_embedding", query_embedding),
-            limit=10,
-        )
+        # Run the query function with the database connection pool and the query.
+        query_output = search(query)
         print("\nSearch results:")
-        for result in search_results:
-            score = result.score
-            payload = result.payload
-            if payload is None:
-                continue
-            print(f"[{score:.3f}] {payload['filename']}")
-            print(f"    {payload['text']}")
+        for result in query_output.results:
+            print(f"[{result['score']:.3f}] {result['filename']}")
+            print(f"    {result['text']}")
             print("---")
         print()