|
15 | 15 | import org.elasticsearch.common.util.LazyInitializable; |
16 | 16 | import org.elasticsearch.core.Nullable; |
17 | 17 | import org.elasticsearch.core.TimeValue; |
| 18 | +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; |
18 | 19 | import org.elasticsearch.inference.ChunkedInference; |
19 | 20 | import org.elasticsearch.inference.ChunkingSettings; |
20 | 21 | import org.elasticsearch.inference.InferenceServiceConfiguration; |
@@ -349,19 +350,18 @@ public Model updateModelWithEmbeddingDetails(Model model, int embeddingSize) { |
349 | 350 | } |
350 | 351 |
|
351 | 352 | /** |
352 | | - * Return the default similarity measure for the embedding type. |
353 | | - * Cohere embeddings are normalized to unit vectors therefor Dot |
354 | | - * Product similarity can be used and is the default for all Cohere |
355 | | - * models. |
| 353 | + * Returns the default similarity measure for the embedding type. |
| 354 | + * Cohere embeddings are expected to be normalized to unit vectors, but due to floating point precision issues, |
| 355 | + * our check ({@link DenseVectorFieldMapper#isNotUnitVector(float)}) often fails. |
| 356 | + * Therefore, we use cosine similarity to ensure compatibility. |
356 | 357 | * |
357 | | - * @return The default similarity. |
| 358 | + * @return The default similarity measure. |
358 | 359 | */ |
359 | 360 | static SimilarityMeasure defaultSimilarity(CohereEmbeddingType embeddingType) { |
360 | 361 | if (embeddingType == CohereEmbeddingType.BIT || embeddingType == CohereEmbeddingType.BINARY) { |
361 | 362 | return SimilarityMeasure.L2_NORM; |
362 | 363 | } |
363 | | - |
364 | | - return SimilarityMeasure.DOT_PRODUCT; |
| 364 | + return SimilarityMeasure.COSINE; |
365 | 365 | } |
366 | 366 |
|
367 | 367 | @Override |
|
0 commit comments