@@ -99,3 +99,132 @@ def __call__(self, text: str) -> NDArray[np.float32]:
9999 assert self ._model is not None
100100 result : NDArray [np .float32 ] = self ._model .encode (text , convert_to_numpy = True )
101101 return result
102+
103+
104+ # ColPali model cache for ColPali embedding functions
105+ _colpali_model_cache = {}
106+
107+
108+ def get_colpali_model (model : str ):
109+ """Get or load ColPali model and processor."""
110+ global _colpali_model_cache
111+ if model not in _colpali_model_cache :
112+ try :
113+ from colpali_engine .models import ColPali , ColPaliProcessor
114+ except ImportError as e :
115+ raise ImportError (
116+ "ColPali is not available. Make sure cocoindex is installed with ColPali support."
117+ ) from e
118+
119+ model_instance = ColPali .from_pretrained (model )
120+ processor_instance = ColPaliProcessor .from_pretrained (model )
121+
122+ # Try to get dimension from FastEmbed API first
123+ output_dim = None
124+ try :
125+ from fastembed import LateInteractionMultimodalEmbedding
126+
127+ # Use the standard FastEmbed ColPali model for dimension detection
128+ # All ColPali variants should have the same embedding dimension
129+ standard_colpali_model = "Qdrant/colpali-v1.3-fp16"
130+
131+ # Try to find the model in FastEmbed's supported models
132+ supported_models = LateInteractionMultimodalEmbedding .list_supported_models ()
133+ for supported_model in supported_models :
134+ if supported_model ["model" ] == standard_colpali_model :
135+ output_dim = supported_model ["dim" ]
136+ break
137+
138+ except Exception :
139+ # FastEmbed API failed, will fall back to model config
140+ pass
141+
142+ # Fallback to model config if FastEmbed API failed
143+ if output_dim is None :
144+ if hasattr (model_instance , 'config' ):
145+ # Try different config attributes that might contain the hidden dimension
146+ if hasattr (model_instance .config , 'hidden_size' ):
147+ output_dim = model_instance .config .hidden_size
148+ elif hasattr (model_instance .config , 'text_config' ) and hasattr (model_instance .config .text_config , 'hidden_size' ):
149+ output_dim = model_instance .config .text_config .hidden_size
150+ elif hasattr (model_instance .config , 'vision_config' ) and hasattr (model_instance .config .vision_config , 'hidden_size' ):
151+ output_dim = model_instance .config .vision_config .hidden_size
152+ else :
153+ raise ValueError (f"Could not find hidden_size in model config for { model } . Config attributes: { dir (model_instance .config )} " )
154+ else :
155+ raise ValueError (f"Model { model } has no config attribute. Model attributes: { dir (model_instance )} " )
156+
157+ _colpali_model_cache [model ] = {
158+ "model" : model_instance ,
159+ "processor" : processor_instance ,
160+ "dimension" : output_dim ,
161+ }
162+ return _colpali_model_cache [model ]["model" ], _colpali_model_cache [model ]["processor" ], _colpali_model_cache [model ]["dimension" ]
163+
164+
165+ def get_colpali_dimension (model : str ) -> int :
166+ """Get the output dimension for a ColPali model."""
167+ _ , _ , dimension = get_colpali_model (model )
168+ return dimension
169+
170+
171+ def colpali_embed_image (img_bytes : bytes , model : str ) -> list [list [float ]]:
172+ """Embed image using ColPali model, returning multi-vector format."""
173+ try :
174+ from PIL import Image
175+ import torch
176+ import io
177+ except ImportError as e :
178+ raise ImportError (
179+ "Required dependencies (PIL, torch) are missing for ColPali image embedding."
180+ ) from e
181+
182+ colpali_model , processor , expected_dim = get_colpali_model (model )
183+ pil_image = Image .open (io .BytesIO (img_bytes )).convert ("RGB" )
184+ inputs = processor .process_images ([pil_image ])
185+ with torch .no_grad ():
186+ embeddings = colpali_model (** inputs )
187+
188+ # Return multi-vector format: [patches, hidden_dim]
189+ if len (embeddings .shape ) != 3 :
190+ raise ValueError (f"Expected 3D tensor [batch, patches, hidden_dim], got shape { embeddings .shape } " )
191+
192+ # Keep patch-level embeddings: [batch, patches, hidden_dim] -> [patches, hidden_dim]
193+ patch_embeddings = embeddings [0 ] # Remove batch dimension
194+
195+ # Convert to list of lists: [[patch1_embedding], [patch2_embedding], ...]
196+ result = []
197+ for patch in patch_embeddings :
198+ result .append (patch .cpu ().numpy ().tolist ())
199+
200+ return result
201+
202+
203+ def colpali_embed_query (query : str , model : str ) -> list [list [float ]]:
204+ """Embed query using ColPali model, returning multi-vector format."""
205+ try :
206+ import torch
207+ import numpy as np
208+ except ImportError as e :
209+ raise ImportError (
210+ "Required dependencies (torch, numpy) are missing for ColPali query embedding."
211+ ) from e
212+
213+ colpali_model , processor , target_dimension = get_colpali_model (model )
214+ inputs = processor .process_queries ([query ])
215+ with torch .no_grad ():
216+ embeddings = colpali_model (** inputs )
217+
218+ # Return multi-vector format: [tokens, hidden_dim]
219+ if len (embeddings .shape ) != 3 :
220+ raise ValueError (f"Expected 3D tensor [batch, tokens, hidden_dim], got shape { embeddings .shape } " )
221+
222+ # Keep token-level embeddings: [batch, tokens, hidden_dim] -> [tokens, hidden_dim]
223+ token_embeddings = embeddings [0 ] # Remove batch dimension
224+
225+ # Convert to list of lists: [[token1_embedding], [token2_embedding], ...]
226+ result = []
227+ for token in token_embeddings :
228+ result .append (token .cpu ().numpy ().tolist ())
229+
230+ return result
0 commit comments