cocoindex-io · badmonster0 · Aug 14, 2025 · Aug 13, 2025 · Aug 14, 2025 · Aug 14, 2025
diff --git a/docs/docs/ops/functions.md b/docs/docs/ops/functions.md
@@ -189,49 +189,49 @@ Input data:
 
 Return: *Vector[Float32, N]*, where *N* is the dimension of the embedding vector determined by the model.
 
-## ColPaliEmbedImage
+## ColPali Functions
 
-`ColPaliEmbedImage` embeds images using the ColPali multimodal model.
+ColPali functions enable multimodal document retrieval using ColVision models. These functions support ALL models available in the [colpali-engine library](https://github.com/illuin-tech/colpali), including:
 
-ColPali (Contextual Late-interaction over Patches) uses late interaction between image patch embeddings and text token embeddings for retrieval.
+- **ColPali models** (colpali-*): PaliGemma-based, best for general document retrieval
+- **ColQwen2 models** (colqwen-*): Qwen2-VL-based, excellent for multilingual text (29+ languages) and general vision
+- **ColSmol models** (colsmol-*): Lightweight, good for resource-constrained environments
+- Any future ColVision models supported by colpali-engine
+
+These models use late interaction between image patch embeddings and text token embeddings for retrieval.
 
 :::note Optional Dependency Required
 
-This function requires the `colpali-engine` library, which is an optional dependency. Install CocoIndex with:
+These functions require the `colpali-engine` library, which is an optional dependency. Install CocoIndex with:
 
 ```bash
 pip install 'cocoindex[colpali]'
 ```
 :::
 
+### ColPaliEmbedImage
+
+`ColPaliEmbedImage` embeds images using ColVision multimodal models.
+
 The spec takes the following fields:
 
-*   `model` (`str`): The ColPali model name to use (e.g., "vidore/colpali-v1.2")
+*   `model` (`str`): Any ColVision model name supported by colpali-engine (e.g., "vidore/colpali-v1.2", "vidore/colqwen2.5-v0.2", "vidore/colsmol-v1.0"). See the [complete list of supported models](https://github.com/illuin-tech/colpali#list-of-colvision-models).
 
 Input data:
 
 *   `img_bytes` (*Bytes*): The image data in bytes format.
 
 Return: *Vector[Vector[Float32, N]]*, where *N* is the hidden dimension determined by the model. This returns a multi-vector format with variable patches and fixed hidden dimension.
 
-## ColPaliEmbedQuery
+### ColPaliEmbedQuery
 
-`ColPaliEmbedQuery` embeds text queries using the ColPali multimodal model.
-
-This produces query embeddings compatible with ColPali image embeddings for late interaction scoring (MaxSim).
-
-:::note Optional Dependency Required
+`ColPaliEmbedQuery` embeds text queries using ColVision multimodal models.
 
-This function requires the `colpali-engine` library, which is an optional dependency. Install CocoIndex with:
-
-```bash
-pip install 'cocoindex[colpali]'
-```
-:::
+This produces query embeddings compatible with ColVision image embeddings for late interaction scoring (MaxSim).
 
 The spec takes the following fields:
 
-*   `model` (`str`): The ColPali model name to use (e.g., "vidore/colpali-v1.2")
+*   `model` (`str`): Any ColVision model name supported by colpali-engine (e.g., "vidore/colpali-v1.2", "vidore/colqwen2.5-v0.2", "vidore/colsmol-v1.0"). See the [complete list of supported models](https://github.com/illuin-tech/colpali#list-of-colvision-models).
 
 Input data:
 

diff --git a/examples/image_search/README.md b/examples/image_search/README.md
@@ -68,7 +68,21 @@ export OLLAMA_MODEL="gemma3"  # Optional, for caption generation
 
 - Configure model (optional):
   ```sh
+  # All ColVision models supported by colpali-engine are available
+  # See https://github.com/illuin-tech/colpali#list-of-colvision-models for the complete list
+
+  # ColPali models (colpali-*) - PaliGemma-based, best for general document retrieval
   export COLPALI_MODEL="vidore/colpali-v1.2"  # Default model
+  export COLPALI_MODEL="vidore/colpali-v1.3"  # Latest version
+
+  # ColQwen2 models (colqwen-*) - Qwen2-VL-based, excellent for multilingual text (29+ languages) and general vision
+  export COLPALI_MODEL="vidore/colqwen2-v1.0"
+  export COLPALI_MODEL="vidore/colqwen2.5-v0.2"  # Latest Qwen2.5 model
+
+  # ColSmol models (colsmol-*) - Lightweight, good for resource-constrained environments
+  export COLPALI_MODEL="vidore/colSmol-256M"
+
+  # Any other ColVision models from https://github.com/illuin-tech/colpali are supported
   ```
 
 - Run ColPali Backend:

diff --git a/examples/patient_intake_extraction/README.md b/examples/patient_intake_extraction/README.md
@@ -1,9 +1,9 @@
 # Extract structured data from patient intake forms with LLM
-[![GitHub](https://img.shields.io/github/stars/cocoindex-io/cocoindex?color=5B5BD6)](https://github.com/cocoindex-io/cocoindex)  
+[![GitHub](https://img.shields.io/github/stars/cocoindex-io/cocoindex?color=5B5BD6)](https://github.com/cocoindex-io/cocoindex)
 We appreciate a star ⭐ at [CocoIndex Github](https://github.com/cocoindex-io/cocoindex) if this is helpful.
 
 
-This repo shows how to use LLM to extract structured data from patient intake forms with different formats - like PDF, Docx, etc. 
+This repo shows how to use LLM to extract structured data from patient intake forms with different formats - like PDF, Docx, etc.
 CocoIndex supports multiple [sources](https://cocoindex.io/docs/ops/sources) and [LLM models](https://cocoindex.io/docs/ai/llm) natively.
 
 ![Structured Data From Patient Intake Forms](https://github.com/user-attachments/assets/1f6afb69-d26d-4a08-8774-13982d6aec1e)

diff --git a/python/cocoindex/functions.py b/python/cocoindex/functions.py
@@ -116,19 +116,62 @@ def __call__(self, text: str) -> NDArray[np.float32]:
 def _get_colpali_model_and_processor(model_name: str) -> ColPaliModelInfo:
     """Get or load ColPali model and processor, with caching."""
     try:
-        from colpali_engine.models import ColPali, ColPaliProcessor  # type: ignore[import-untyped]
+        from colpali_engine.models import (  # type: ignore[import-untyped]
+            ColPali,
+            ColPaliProcessor,
+            ColQwen2,
+            ColQwen2Processor,
+            ColQwen2_5,
+            ColQwen2_5_Processor,
+            ColIdefics3,
+            ColIdefics3Processor,
+        )
         from colpali_engine.utils.torch_utils import get_torch_device  # type: ignore[import-untyped]
         import torch
     except ImportError as e:
         raise ImportError(
-            "ColPali is not available. Make sure cocoindex is installed with ColPali support."
+            "ColVision models are not available. Make sure cocoindex is installed with ColPali support."
         ) from e
 
     device = get_torch_device("auto")
-    model = ColPali.from_pretrained(
-        model_name, device_map=device, torch_dtype=torch.bfloat16
-    ).eval()
-    processor = ColPaliProcessor.from_pretrained(model_name)
+
+    # Manual model detection based on model name
+    model_name_lower = model_name.lower()
+
+    try:
+        if "qwen2.5" in model_name_lower:
+            model = ColQwen2_5.from_pretrained(
+                model_name,
+                torch_dtype=torch.bfloat16,
+                device_map=device,
+            ).eval()
+            processor = ColQwen2_5_Processor.from_pretrained(model_name)
+        elif "qwen2" in model_name_lower:
+            model = ColQwen2.from_pretrained(
+                model_name,
+                torch_dtype=torch.bfloat16,
+                device_map=device,
+            ).eval()
+            processor = ColQwen2Processor.from_pretrained(model_name)
+        elif "colsmol" in model_name_lower or "smol" in model_name_lower:
+            # ColSmol models use Idefics3 architecture
+            model = ColIdefics3.from_pretrained(
+                model_name,
+                torch_dtype=torch.bfloat16,
+                device_map=device,
+            ).eval()
+            processor = ColIdefics3Processor.from_pretrained(model_name)
+        else:
+            # Default to ColPali
+            model = ColPali.from_pretrained(
+                model_name,
+                torch_dtype=torch.bfloat16,
+                device_map=device,
+            ).eval()
+            processor = ColPaliProcessor.from_pretrained(model_name)
+
+    except Exception as e:
+        raise RuntimeError(f"Failed to load model {model_name}: {e}")
 
     # Get dimension from the actual model
     dimension = _detect_colpali_dimension(model, processor, device)
@@ -167,17 +210,25 @@ def _detect_colpali_dimension(model: Any, processor: Any, device: Any) -> int:
 
 class ColPaliEmbedImage(op.FunctionSpec):
     """
-    `ColPaliEmbedImage` embeds images using the ColPali multimodal model.
+    `ColPaliEmbedImage` embeds images using ColVision multimodal models.
+
+    Supports ALL models available in the colpali-engine library, including:
+    - ColPali models (colpali-*): PaliGemma-based, best for general document retrieval
+    - ColQwen2 models (colqwen-*): Qwen2-VL-based, excellent for multilingual text (29+ languages) and general vision
+    - ColSmol models (colsmol-*): Lightweight, good for resource-constrained environments
+    - Any future ColVision models supported by colpali-engine
 
-    ColPali (Contextual Late-interaction over Patches) uses late interaction
-    between image patch embeddings and text token embeddings for retrieval.
+    These models use late interaction between image patch embeddings and text token
+    embeddings for retrieval.
 
     Args:
-        model: The ColPali model name to use (e.g., "vidore/colpali-v1.2")
+        model: Any ColVision model name supported by colpali-engine
+               (e.g., "vidore/colpali-v1.2", "vidore/colqwen2.5-v0.2", "vidore/colsmol-v1.0")
+               See https://github.com/illuin-tech/colpali for the complete list of supported models.
 
     Note:
         This function requires the optional colpali-engine dependency.
-        Install it with: pip install 'cocoindex[embeddings]'
+        Install it with: pip install 'cocoindex[colpali]'
     """
 
     model: str
@@ -189,7 +240,7 @@ class ColPaliEmbedImage(op.FunctionSpec):
     behavior_version=1,
 )
 class ColPaliEmbedImageExecutor:
-    """Executor for ColPaliEmbedImage."""
+    """Executor for ColVision image embedding (ColPali, ColQwen2, ColSmol, etc.)."""
 
     spec: ColPaliEmbedImage
     _model_info: ColPaliModelInfo
@@ -209,7 +260,7 @@ def __call__(self, img_bytes: bytes) -> Any:
             import io
         except ImportError as e:
             raise ImportError(
-                "Required dependencies (PIL, torch) are missing for ColPali image embedding."
+                "Required dependencies (PIL, torch) are missing for ColVision image embedding."
             ) from e
 
         model = self._model_info.model
@@ -235,17 +286,25 @@ def __call__(self, img_bytes: bytes) -> Any:
 
 class ColPaliEmbedQuery(op.FunctionSpec):
     """
-    `ColPaliEmbedQuery` embeds text queries using the ColPali multimodal model.
+    `ColPaliEmbedQuery` embeds text queries using ColVision multimodal models.
+
+    Supports ALL models available in the colpali-engine library, including:
+    - ColPali models (colpali-*): PaliGemma-based, best for general document retrieval
+    - ColQwen2 models (colqwen-*): Qwen2-VL-based, excellent for multilingual text (29+ languages) and general vision
+    - ColSmol models (colsmol-*): Lightweight, good for resource-constrained environments
+    - Any future ColVision models supported by colpali-engine
 
-    This produces query embeddings compatible with ColPali image embeddings
+    This produces query embeddings compatible with ColVision image embeddings
     for late interaction scoring (MaxSim).
 
     Args:
-        model: The ColPali model name to use (e.g., "vidore/colpali-v1.2")
+        model: Any ColVision model name supported by colpali-engine
+               (e.g., "vidore/colpali-v1.2", "vidore/colqwen2.5-v0.2", "vidore/colsmol-v1.0")
+               See https://github.com/illuin-tech/colpali for the complete list of supported models.
 
     Note:
         This function requires the optional colpali-engine dependency.
-        Install it with: pip install 'cocoindex[embeddings]'
+        Install it with: pip install 'cocoindex[colpali]'
     """
 
     model: str
@@ -257,7 +316,7 @@ class ColPaliEmbedQuery(op.FunctionSpec):
     behavior_version=1,
 )
 class ColPaliEmbedQueryExecutor:
-    """Executor for ColPaliEmbedQuery."""
+    """Executor for ColVision query embedding (ColPali, ColQwen2, ColSmol, etc.)."""
 
     spec: ColPaliEmbedQuery
     _model_info: ColPaliModelInfo
@@ -275,7 +334,7 @@ def __call__(self, query: str) -> Any:
             import torch
         except ImportError as e:
             raise ImportError(
-                "Required dependencies (torch) are missing for ColPali query embedding."
+                "Required dependencies (torch) are missing for ColVision query embedding."
             ) from e
 
         model = self._model_info.model