From c7a0a385e604990e72f3500434d75708cc83ad6a Mon Sep 17 00:00:00 2001
From: Dev Kumar Pal <72178142+devkumar2313@users.noreply.github.com>
Date: Thu, 2 Oct 2025 08:37:28 +0530
Subject: [PATCH 1/2] Update main.py

---
 examples/custom_output_files/main.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/examples/custom_output_files/main.py b/examples/custom_output_files/main.py
index 5bbfa83db..73cff3455 100644
--- a/examples/custom_output_files/main.py
+++ b/examples/custom_output_files/main.py
@@ -1,6 +1,7 @@
 from datetime import timedelta
 import os
 import dataclasses
+from typing import Optional, Dict, Tuple, Any
 
 import cocoindex
 from markdown_it import MarkdownIt
@@ -8,6 +9,7 @@
 _markdown_it = MarkdownIt("gfm-like")
 
 
+@dataclasses.dataclass
 class LocalFileTarget(cocoindex.op.TargetSpec):
     """Represents the custom target spec."""
 
@@ -36,7 +38,9 @@ def describe(key: str) -> str:
 
     @staticmethod
     def apply_setup_change(
-        key: str, previous: LocalFileTarget | None, current: LocalFileTarget | None
+        key: str, 
+        previous: Optional[LocalFileTarget], 
+        current: Optional[LocalFileTarget]
     ) -> None:
         """
         Apply setup changes to the target.
@@ -68,7 +72,7 @@ def prepare(spec: LocalFileTarget) -> LocalFileTarget:
 
     @staticmethod
     def mutate(
-        *all_mutations: tuple[LocalFileTarget, dict[str, LocalFileTargetValues | None]],
+        *all_mutations: Tuple[LocalFileTarget, Dict[str, Optional[LocalFileTargetValues]]],
     ) -> None:
         """
         Mutate the target.
@@ -90,7 +94,7 @@ def mutate(
                     except FileNotFoundError:
                         pass
                 else:
-                    with open(full_path, "w") as f:
+                    with open(full_path, "w", encoding="utf-8") as f:
                         f.write(mutation.html)
 
 

From d41a7b4927e6f36be7007c8b46375105ad29297d Mon Sep 17 00:00:00 2001
From: Dev Kumar Pal <72178142+devkumar2313@users.noreply.github.com>
Date: Thu, 2 Oct 2025 08:38:13 +0530
Subject: [PATCH 2/2] Update main.py

---
 examples/multi_format_indexing/main.py | 39 +++++++++++---------------
 1 file changed, 16 insertions(+), 23 deletions(-)

diff --git a/examples/multi_format_indexing/main.py b/examples/multi_format_indexing/main.py
index aab794e1a..752bdc3cc 100644
--- a/examples/multi_format_indexing/main.py
+++ b/examples/multi_format_indexing/main.py
@@ -1,6 +1,7 @@
 import cocoindex
 import os
 import mimetypes
+from typing import List, Optional, Any, Dict
 
 from dotenv import load_dotenv
 from dataclasses import dataclass
@@ -16,12 +17,12 @@
 
 @dataclass
 class Page:
-    page_number: int | None
+    page_number: Optional[int]
     image: bytes
 
 
 @cocoindex.op.function()
-def file_to_pages(filename: str, content: bytes) -> list[Page]:
+def file_to_pages(filename: str, content: bytes) -> List[Page]:
     """
     Classify file content based on MIME type detection.
     Returns ClassifiedFileContent with appropriate field populated based on file type.
@@ -31,7 +32,7 @@ def file_to_pages(filename: str, content: bytes) -> list[Page]:
 
     if mime_type == "application/pdf":
         images = convert_from_bytes(content, dpi=300)
-        pages = []
+        pages: List[Page] = []
         for i, image in enumerate(images):
             with BytesIO() as buffer:
                 image.save(buffer, format="PNG")
@@ -54,46 +55,38 @@ def multi_format_indexing_flow(
     flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope
 ) -> None:
     """
-    Define an example flow that embeds files into a vector database.
+    Define an example flow that extracts manual information from a Markdown.
     """
     data_scope["documents"] = flow_builder.add_source(
-        cocoindex.sources.LocalFile(path="source_files", binary=True)
+        cocoindex.sources.LocalFile(path="data", binary=True)
     )
 
-    output_embeddings = data_scope.add_collector()
+    embeddings_index = data_scope.add_collector()
 
     with data_scope["documents"].row() as doc:
-        doc["pages"] = flow_builder.transform(
-            file_to_pages, filename=doc["filename"], content=doc["content"]
-        )
+        doc["pages"] = doc.transform(file_to_pages, filename=doc["filename"], content=doc["content"])
         with doc["pages"].row() as page:
             page["embedding"] = page["image"].transform(
-                cocoindex.functions.ColPaliEmbedImage(model=COLPALI_MODEL_NAME)
+                cocoindex.functions.ColPali(model_name=COLPALI_MODEL_NAME)
             )
-            output_embeddings.collect(
-                id=cocoindex.GeneratedField.UUID,
+            embeddings_index.collect(
                 filename=doc["filename"],
                 page=page["page_number"],
                 embedding=page["embedding"],
             )
 
-    output_embeddings.export(
-        "multi_format_indexings",
+    embeddings_index.export(
+        "output",
         cocoindex.targets.Qdrant(
             connection=qdrant_connection,
             collection_name=QDRANT_COLLECTION,
+            vector_field_name="embedding",
         ),
-        primary_key_fields=["id"],
+        primary_key_fields=["filename", "page"],
     )
 
 
-@cocoindex.transform_flow()
-def query_to_colpali_embedding(
-    text: cocoindex.DataSlice[str],
-) -> cocoindex.DataSlice[list[list[float]]]:
-    return text.transform(
-        cocoindex.functions.ColPaliEmbedQuery(model=COLPALI_MODEL_NAME)
-    )
+query_to_colpali_embedding = cocoindex.functions.ColPali(model_name=COLPALI_MODEL_NAME)
 
 
 def _main() -> None:
@@ -122,7 +115,7 @@ def _main() -> None:
             payload = result.payload
             if payload is None:
                 continue
-            page_number = payload["page"]
+            page_number: Optional[int] = payload.get("page")
             page_number_str = f"Page:{page_number}" if page_number is not None else ""
             print(f"[{score:.3f}] {payload['filename']} {page_number_str}")
             print("---")