Skip to content

Commit f240642

Browse files
fix: colmodernvbert tests and query processing
1 parent 9b0f4cb commit f240642

File tree

2 files changed

+18
-12
lines changed

2 files changed

+18
-12
lines changed

fastembed/late_interaction_multimodal/colmodernvbert.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ class ColModernVBERT(LateInteractionMultimodalEmbeddingBase, OnnxMultimodalModel
4343
VISUAL_PROMPT_PREFIX = (
4444
"<|begin_of_text|>User:<image>Describe the image.<end_of_utterance>\nAssistant:"
4545
)
46+
QUERY_AUGMENTATION_TOKEN = "<end_of_utterance>"
4647

4748
def __init__(
4849
self,
@@ -187,7 +188,9 @@ def _post_process_onnx_text_output(
187188
return output.model_output
188189

189190
def tokenize(self, documents: list[str], **kwargs: Any) -> list[Encoding]:
190-
encoded = self.tokenizer.encode_batch(documents) # type: ignore[union-attr]
191+
# Add query augmentation tokens (matching process_queries logic from colpali-engine)
192+
augmented_queries = [doc + self.QUERY_AUGMENTATION_TOKEN * 10 for doc in documents]
193+
encoded = self.tokenizer.encode_batch(augmented_queries) # type: ignore[union-attr]
191194
return encoded
192195

193196
def _preprocess_onnx_image_input(

tests/test_late_interaction_multimodal.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,13 @@
2323
),
2424
"Qdrant/colmodernvbert": np.array(
2525
[
26-
[0.2256, -0.0503, 0.0254, -0.011, -0.0786, 0.2152, -0.0961],
27-
[-0.0028, -0.0484, -0.0724, -0.0724, -0.0977, 0.0308, -0.0236],
28-
[0.0035, -0.1075, -0.0877, -0.0207, -0.0828, -0.0294, -0.0253],
29-
[0.0021, -0.0797, -0.0605, -0.0008, -0.0837, 0.0015, -0.0846],
30-
[-0.0473, -0.0594, -0.0553, -0.0014, -0.0712, 0.0158, -0.0546],
31-
[-0.1009, -0.082, -0.0684, -0.1385, -0.0469, -0.0606, -0.0323],
32-
[-0.0624, 0.006, -0.0498, -0.0127, -0.1115, 0.0076, -0.0888],
26+
[0.11614, -0.15793, -0.11194, 0.0688, 0.08001, 0.10575, -0.07871],
27+
[0.10094, -0.13301, -0.12069, 0.10932, 0.04645, 0.09884, 0.04048],
28+
[0.13106, -0.18613, -0.13469, 0.10566, 0.03659, 0.07712, -0.03916],
29+
[0.09754, -0.09596, -0.04839, 0.14991, 0.05692, 0.10569, -0.08349],
30+
[0.02576, -0.15651, -0.09977, 0.09707, 0.13412, 0.09994, -0.09931],
31+
[-0.06741, -0.1787, -0.19677, -0.07618, 0.13102, -0.02131, -0.02437],
32+
[-0.02776, -0.10187, -0.13793, 0.03835, 0.04766, 0.04701, -0.15635],
3333
]
3434
),
3535
}
@@ -48,10 +48,13 @@
4848
),
4949
"Qdrant/colmodernvbert": np.array(
5050
[
51-
[0.0541, 0.0677, 0.0392, 0.1494, 0.1855, 0.0275, -0.1835, -0.1025, -0.1204, -0.0835],
52-
[-0.0515, -0.1328, 0.0298, -0.0574, 0.0829, -0.0836, 0.0888, 0.0138, 0.0741, 0.0293],
53-
[-0.1114, -0.0506, 0.0666, -0.1064, -0.0229, -0.0486, -0.007, 0.0932, 0.0054, 0.1113],
54-
[0.2317, -0.0518, 0.0248, -0.0075, -0.078, 0.2073, -0.0912, -0.0622, -0.0203, 0.093]
51+
[0.05, 0.06557, 0.04026, 0.14981, 0.1842, 0.0263, -0.18706],
52+
[-0.05664, -0.14028, 0.00649, -0.02849, 0.09034, -0.01494, 0.10693],
53+
[-0.10147, -0.00716, 0.09084, -0.08236, -0.01849, -0.00972, -0.00461],
54+
[-0.1233, -0.10814, -0.02337, -0.00329, 0.05984, 0.09934, 0.09846],
55+
[-0.07053, -0.13119, -0.06487, 0.01508, 0.07459, 0.07655, 0.14821],
56+
[0.00526, -0.13842, -0.05837, -0.02721, 0.13009, 0.05076, 0.17962],
57+
[0.00924, -0.14383, -0.03057, -0.03691, 0.11718, 0.037, 0.13344],
5558
]
5659
),
5760
}

0 commit comments

Comments
 (0)