cocoindex-io · badmonster0 · Apr 14, 2025 · Mar 20, 2025 · Mar 24, 2025 · Mar 24, 2025
diff --git a/Cargo.toml b/Cargo.toml
@@ -97,6 +97,7 @@ rustls = { version = "0.23.25" }
 http-body-util = "0.1.3"
 yaml-rust2 = "0.10.1"
 urlencoding = "2.1.3"
+qdrant-client = "1.13.0"
 uuid = { version = "1.16.0", features = ["serde", "v4", "v8"] }
 tokio-stream = "0.1.17"
 async-stream = "0.3.6"

diff --git a/docs/docs/ops/storages.md b/docs/docs/ops/storages.md
@@ -7,10 +7,41 @@ description: CocoIndex Built-in Storages
 
 ## Postgres
 
-`Postgres` exports data to Postgres database (with pgvector extension).
+Exports data to Postgres database (with pgvector extension).
 
 The spec takes the following fields:
 
 *   `database_url` (type: `str`, optional): The URL of the Postgres database to use as the internal storage, e.g. `postgres://cocoindex:cocoindex@localhost/cocoindex`. If unspecified, will use the same database as the [internal storage](/docs/core/basics#internal-storage).
 
 *   `table_name` (type: `str`, optional): The name of the table to store to. If unspecified, will generate a new automatically. We recommend specifying a name explicitly if you want to directly query the table. It can be omitted if you want to use CocoIndex's query handlers to query the table.
+
+## Qdrant
+
+Exports data to a [Qdrant](https://qdrant.tech/) collection.
+
+The spec takes the following fields:
+
+*   `collection_name` (type: `str`, required): The name of the collection to export the data to.
+
+*   `grpc_url` (type: `str`, optional): The [gRPC URL](https://qdrant.tech/documentation/interfaces/#grpc-interface) of the Qdrant instance. Defaults to `http://localhost:6334/`.
+
+*   `api_key` (type: `str`, optional). API key to authenticate requests with.
+
+Before exporting, you must create a collection with a [vector name](https://qdrant.tech/documentation/concepts/vectors/#named-vectors) that matches the vector field name in CocoIndex, and set `setup_by_user=True` during export.
+
+Example:
+
+```python
+doc_embeddings.export(
+    "doc_embeddings",
+    cocoindex.storages.Qdrant(
+        collection_name="cocoindex",
+        grpc_url="http://xyz-example.cloud-region.cloud-provider.cloud.qdrant.io:6334/",
+        api_key="<your-api-key-here>",
+    ),
+    primary_key_fields=["id_field"],
+    setup_by_user=True,
+)
+```
+
+You can find an end-to-end example [here](https://github.com/cocoindex-io/cocoindex/tree/main/examples/text_embedding_qdrant).
diff --git a/examples/text_embedding_qdrant/.env b/examples/text_embedding_qdrant/.env
@@ -0,0 +1,2 @@
+# Postgres database address for cocoindex
+COCOINDEX_DATABASE_URL=postgres://cocoindex:cocoindex@localhost/cocoindex
diff --git a/examples/text_embedding_qdrant/README.md b/examples/text_embedding_qdrant/README.md
@@ -0,0 +1,69 @@
+## Description
+
+Example to build a vector index in Qdrant based on local files.
+
+## Pre-requisites
+
+- [Install Postgres](https://cocoindex.io/docs/getting_started/installation#-install-postgres) if you don't have one.
+
+- Run Qdrant.
+
+```bash
+docker run -d -p 6334:6334 -p 6333:6333 qdrant/qdrant
+```
+
+- [Create a collection](https://qdrant.tech/documentation/concepts/vectors/#named-vectors) to export the embeddings to.
+
+```bash
+curl  -X PUT \
+  'http://localhost:6333/collections/cocoindex' \
+  --header 'Content-Type: application/json' \
+  --data-raw '{
+  "vectors": {
+    "text_embedding": {
+      "size": 384,
+      "distance": "Cosine"
+    }
+  }
+}'
+```
+
+You can view the collections and data with the Qdrant dashboard at <http://localhost:6333/dashboard>.
+
+## Run
+
+Install dependencies:
+
+```bash
+pip install -e .
+```
+
+Setup:
+
+```bash
+python main.py cocoindex setup
+```
+
+Update index:
+
+```bash
+python main.py cocoindex update
+```
+
+Run:
+
+```bash
+python main.py
+```
+
+## CocoInsight
+
+CocoInsight is in Early Access now (Free) 😊 You found us! A quick 3 minute video tutorial about CocoInsight: [Watch on YouTube](https://youtu.be/ZnmyoHslBSc?si=pPLXWALztkA710r9).
+
+Run CocoInsight to understand your RAG data pipeline:
+
+```bash
+python main.py cocoindex server -c https://cocoindex.io
+```
+
+Then open the CocoInsight UI at [https://cocoindex.io/cocoinsight](https://cocoindex.io/cocoinsight).
diff --git a/examples/text_embedding_qdrant/main.py b/examples/text_embedding_qdrant/main.py
@@ -0,0 +1,90 @@
+from dotenv import load_dotenv
+
+import cocoindex
+
+
+def text_to_embedding(text: cocoindex.DataSlice) -> cocoindex.DataSlice:
+    """
+    Embed the text using a SentenceTransformer model.
+    This is a shared logic between indexing and querying, so extract it as a function.
+    """
+    return text.transform(
+        cocoindex.functions.SentenceTransformerEmbed(
+            model="sentence-transformers/all-MiniLM-L6-v2"
+        )
+    )
+
+
+@cocoindex.flow_def(name="TextEmbedding")
+def text_embedding_flow(
+    flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope
+):
+    """
+    Define an example flow that embeds text into a vector database.
+    """
+    data_scope["documents"] = flow_builder.add_source(
+        cocoindex.sources.LocalFile(path="markdown_files")
+    )
+
+    doc_embeddings = data_scope.add_collector()
+
+    with data_scope["documents"].row() as doc:
+        doc["chunks"] = doc["content"].transform(
+            cocoindex.functions.SplitRecursively(),
+            language="markdown",
+            chunk_size=2000,
+            chunk_overlap=500,
+        )
+
+        with doc["chunks"].row() as chunk:
+            chunk["embedding"] = text_to_embedding(chunk["text"])
+            doc_embeddings.collect(
+                id=cocoindex.GeneratedField.UUID,
+                filename=doc["filename"],
+                location=chunk["location"],
+                text=chunk["text"],
+                # 'text_embedding' is the name of the vector we've created the Qdrant collection with.
+                text_embedding=chunk["embedding"],
+            )
+
+    doc_embeddings.export(
+        "doc_embeddings",
+        cocoindex.storages.Qdrant(
+            collection_name="cocoindex", grpc_url="http://localhost:6334/"
+        ),
+        primary_key_fields=["id"],
+        setup_by_user=True,
+    )
+
+
+query_handler = cocoindex.query.SimpleSemanticsQueryHandler(
+    name="SemanticsSearch",
+    flow=text_embedding_flow,
+    target_name="doc_embeddings",
+    query_transform_flow=text_to_embedding,
+    default_similarity_metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY,
+)
+
+
+@cocoindex.main_fn()
+def _run():
+    # Run queries in a loop to demonstrate the query capabilities.
+    while True:
+        try:
+            query = input("Enter search query (or Enter to quit): ")
+            if query == "":
+                break
+            results, _ = query_handler.search(query, 10, "text_embedding")
+            print("\nSearch results:")
+            for result in results:
+                print(f"[{result.score:.3f}] {result.data['filename']}")
+                print(f"    {result.data['text']}")
+                print("---")
+            print()
+        except KeyboardInterrupt:
+            break
+
+
+if __name__ == "__main__":
+    load_dotenv(override=True)
+    _run()
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# Postgres database address for cocoindex
		COCOINDEX_DATABASE_URL=postgres://cocoindex:cocoindex@localhost/cocoindex