diff --git a/docs/changelog/132959.yaml b/docs/changelog/132959.yaml new file mode 100644 index 0000000000000..1e1923c3beaf9 --- /dev/null +++ b/docs/changelog/132959.yaml @@ -0,0 +1,5 @@ +pr: 132959 +summary: Adds the `v_hamming` function for calculating the Hamming distance between two dense vectors +area: ES|QL +type: feature +issues: [132056] diff --git a/docs/reference/query-languages/esql/_snippets/functions/description/v_hamming.md b/docs/reference/query-languages/esql/_snippets/functions/description/v_hamming.md new file mode 100644 index 0000000000000..1f200140065f7 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/description/v_hamming.md @@ -0,0 +1,6 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Description** + +Calculates the Hamming distance between two dense vectors. + diff --git a/docs/reference/query-languages/esql/_snippets/functions/examples/v_hamming.md b/docs/reference/query-languages/esql/_snippets/functions/examples/v_hamming.md new file mode 100644 index 0000000000000..6a8a5b38350e6 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/examples/v_hamming.md @@ -0,0 +1,24 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Example** + +```esql + from colors + | eval similarity = v_hamming(rgb_vector, [0, 255, 255]) + | sort similarity desc, color asc +``` + +| color:text | similarity:double | +| --- | --- | +| red | 24.0 | +| orange | 20.0 | +| gold | 18.0 | +| indigo | 18.0 | +| bisque | 17.0 | +| maroon | 17.0 | +| pink | 17.0 | +| salmon | 17.0 | +| black | 16.0 | +| firebrick | 16.0 | + + diff --git a/docs/reference/query-languages/esql/_snippets/functions/layout/v_hamming.md b/docs/reference/query-languages/esql/_snippets/functions/layout/v_hamming.md new file mode 100644 index 0000000000000..65354c6380f64 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/layout/v_hamming.md @@ -0,0 +1,27 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +## `V_HAMMING` [esql-v_hamming] +```{applies_to} +stack: preview 9.2.0 +serverless: preview +``` + +**Syntax** + +:::{image} ../../../images/functions/v_hamming.svg +:alt: Embedded +:class: text-center +::: + + +:::{include} ../parameters/v_hamming.md +::: + +:::{include} ../description/v_hamming.md +::: + +:::{include} ../types/v_hamming.md +::: + +:::{include} ../examples/v_hamming.md +::: diff --git a/docs/reference/query-languages/esql/_snippets/functions/parameters/v_hamming.md b/docs/reference/query-languages/esql/_snippets/functions/parameters/v_hamming.md new file mode 100644 index 0000000000000..6fe93636f0764 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/parameters/v_hamming.md @@ -0,0 +1,10 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Parameters** + +`left` +: First dense_vector to use to calculate the Hamming distance + +`right` +: Second dense_vector to use to calculate the Hamming distance + diff --git a/docs/reference/query-languages/esql/images/functions/v_hamming.svg b/docs/reference/query-languages/esql/images/functions/v_hamming.svg new file mode 100644 index 0000000000000..1fb76c406cb08 --- /dev/null +++ b/docs/reference/query-languages/esql/images/functions/v_hamming.svg @@ -0,0 +1 @@ +V_HAMMING(left,right) \ No newline at end of file diff --git a/docs/reference/query-languages/esql/kibana/definition/functions/v_hamming.json b/docs/reference/query-languages/esql/kibana/definition/functions/v_hamming.json new file mode 100644 index 0000000000000..51e3660ae8650 --- /dev/null +++ b/docs/reference/query-languages/esql/kibana/definition/functions/v_hamming.json @@ -0,0 +1,12 @@ +{ + "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.", + "type" : "scalar", + "name" : "v_hamming", + "description" : "Calculates the Hamming distance between two dense vectors.", + "signatures" : [ ], + "examples" : [ + " from colors\n | eval similarity = v_hamming(rgb_vector, [0, 255, 255])\n | sort similarity desc, color asc" + ], + "preview" : true, + "snapshot_only" : true +} diff --git a/docs/reference/query-languages/esql/kibana/docs/functions/v_hamming.md b/docs/reference/query-languages/esql/kibana/docs/functions/v_hamming.md new file mode 100644 index 0000000000000..8de48ee0292ca --- /dev/null +++ b/docs/reference/query-languages/esql/kibana/docs/functions/v_hamming.md @@ -0,0 +1,10 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +### V HAMMING +Calculates the Hamming distance between two dense vectors. + +```esql + from colors + | eval similarity = v_hamming(rgb_vector, [0, 255, 255]) + | sort similarity desc, color asc +``` diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-hamming.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-hamming.csv-spec new file mode 100644 index 0000000000000..a7e8815139567 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-hamming.csv-spec @@ -0,0 +1,103 @@ + # Tests for hamming similarity function + + similarityWithVectorField + required_capability: hamming_vector_similarity_function + +// tag::vector-hamming[] + from colors + | eval similarity = v_hamming(rgb_vector, [0, 255, 255]) + | sort similarity desc, color asc +// end::vector-hamming[] + | limit 10 + | keep color, similarity + ; + +// tag::vector-hamming-result[] +color:text | similarity:double +red | 24.0 +orange | 20.0 +gold | 18.0 +indigo | 18.0 +bisque | 17.0 +maroon | 17.0 +pink | 17.0 +salmon | 17.0 +black | 16.0 +firebrick | 16.0 +// end::vector-hamming-result[] +; + +similarityAsPartOfExpression +required_capability: hamming_vector_similarity_function + +from colors +| eval score = round((1 + v_hamming(rgb_vector, [0, 255, 255]) / 2), 3) +| sort score desc, color asc +| limit 10 +| keep color, score +; + +color:text | score:double +red | 13.0 +orange | 11.0 +gold | 10.0 +indigo | 10.0 +bisque | 9.5 +maroon | 9.5 +pink | 9.5 +salmon | 9.5 +black | 9.0 +firebrick | 9.0 +; + +similarityWithLiteralVectors +required_capability: hamming_vector_similarity_function + +row a = 1 +| eval similarity = round(v_hamming([1, 2, 3], [0, 1, 2]), 3) +| keep similarity +; + +similarity:double +4.0 +; + +similarityWithStats +required_capability: hamming_vector_similarity_function + +from colors +| eval similarity = round(v_hamming(rgb_vector, [0, 255, 255]), 3) +| stats avg = round(avg(similarity), 3), min = min(similarity), max = max(similarity) +; + +avg:double | min:double | max:double +13.322 | 0.0 | 24.0 +; + +similarityWithNull +required_capability: hamming_vector_similarity_function +required_capability: vector_similarity_functions_support_null + +from colors +| eval similarity = v_hamming(rgb_vector, null) +| stats total_null = count(*) where similarity is null +; + +total_null:long +59 +; + +# TODO Need to implement a conversion function to convert a non-foldable row to a dense_vector +similarityWithRow-Ignore +required_capability: hamming_vector_similarity_function + +row vector = [1, 2, 3] +| eval similarity = round(v_hamming(vector, [0, 1, 2]), 3) +| sort similarity desc, color asc +| limit 10 +| keep color, similarity +; + +similarity:double +0.978 +; diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/vector/VectorSimilarityFunctionsIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/vector/VectorSimilarityFunctionsIT.java index 2d85e3bd7f93c..4ab018d3eac11 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/vector/VectorSimilarityFunctionsIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/vector/VectorSimilarityFunctionsIT.java @@ -24,6 +24,7 @@ import org.elasticsearch.xpack.esql.EsqlTestUtils; import org.elasticsearch.xpack.esql.action.AbstractEsqlIntegTestCase; import org.elasticsearch.xpack.esql.action.EsqlCapabilities; +import org.elasticsearch.xpack.esql.expression.function.vector.Hamming; import org.elasticsearch.xpack.esql.expression.function.vector.L1Norm; import org.elasticsearch.xpack.esql.expression.function.vector.L2Norm; import org.elasticsearch.xpack.esql.expression.function.vector.VectorSimilarityFunction.SimilarityEvaluatorFunction; @@ -56,6 +57,9 @@ public static Iterable parameters() throws Exception { if (EsqlCapabilities.Cap.L2_NORM_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { params.add(new Object[] { "v_l2_norm", (SimilarityEvaluatorFunction) L2Norm::calculateSimilarity }); } + if (EsqlCapabilities.Cap.HAMMING_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { + params.add(new Object[] { "v_hamming", (SimilarityEvaluatorFunction) Hamming::calculateSimilarity }); + } return params; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 6d88b479b59f4..3480212fb5b06 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -1374,7 +1374,12 @@ public enum Cap { /** * Support null elements on vector similarity functions */ - VECTOR_SIMILARITY_FUNCTIONS_SUPPORT_NULL; + VECTOR_SIMILARITY_FUNCTIONS_SUPPORT_NULL, + + /** + * Support for vector Hamming distance. + */ + HAMMING_VECTOR_SIMILARITY_FUNCTION(Build.current().isSnapshot()); private final boolean enabled; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java index 0202661acf76d..9d6372702d842 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java @@ -186,6 +186,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.util.Delay; import org.elasticsearch.xpack.esql.expression.function.vector.CosineSimilarity; import org.elasticsearch.xpack.esql.expression.function.vector.DotProduct; +import org.elasticsearch.xpack.esql.expression.function.vector.Hamming; import org.elasticsearch.xpack.esql.expression.function.vector.Knn; import org.elasticsearch.xpack.esql.expression.function.vector.L1Norm; import org.elasticsearch.xpack.esql.expression.function.vector.L2Norm; @@ -507,7 +508,8 @@ private static FunctionDefinition[][] snapshotFunctions() { def(DotProduct.class, DotProduct::new, "v_dot_product"), def(L1Norm.class, L1Norm::new, "v_l1_norm"), def(L2Norm.class, L2Norm::new, "v_l2_norm"), - def(Magnitude.class, Magnitude::new, "v_magnitude") } }; + def(Magnitude.class, Magnitude::new, "v_magnitude"), + def(Hamming.class, Hamming::new, "v_hamming") } }; } public EsqlFunctionRegistry snapshotRegistry() { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Hamming.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Hamming.java new file mode 100644 index 0000000000000..981304415a128 --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Hamming.java @@ -0,0 +1,88 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.vector; + +import org.apache.lucene.util.VectorUtil; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.function.scalar.BinaryScalarFunction; +import org.elasticsearch.xpack.esql.core.tree.NodeInfo; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.expression.function.Example; +import org.elasticsearch.xpack.esql.expression.function.FunctionAppliesTo; +import org.elasticsearch.xpack.esql.expression.function.FunctionAppliesToLifecycle; +import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; +import org.elasticsearch.xpack.esql.expression.function.Param; + +import java.io.IOException; + +public class Hamming extends VectorSimilarityFunction { + + public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Hamming", Hamming::new); + static final SimilarityEvaluatorFunction SIMILARITY_FUNCTION = Hamming::calculateSimilarity; + + @FunctionInfo( + returnType = "double", + preview = true, + description = "Calculates the Hamming distance between two dense vectors.", + examples = { @Example(file = "vector-hamming", tag = "vector-hamming") }, + appliesTo = { @FunctionAppliesTo(lifeCycle = FunctionAppliesToLifecycle.PREVIEW, version = "9.2.0") } + ) + public Hamming( + Source source, + @Param( + name = "left", + type = { "dense_vector" }, + description = "First dense_vector to use to calculate the Hamming distance" + ) Expression left, + @Param( + name = "right", + type = { "dense_vector" }, + description = "Second dense_vector to use to calculate the Hamming distance" + ) Expression right + ) { + super(source, left, right); + } + + private Hamming(StreamInput in) throws IOException { + super(in); + } + + @Override + protected SimilarityEvaluatorFunction getSimilarityFunction() { + return SIMILARITY_FUNCTION; + } + + @Override + protected BinaryScalarFunction replaceChildren(Expression newLeft, Expression newRight) { + return new Hamming(source(), newLeft, newRight); + } + + @Override + protected NodeInfo info() { + return NodeInfo.create(this, Hamming::new, left(), right()); + } + + @Override + public String getWriteableName() { + return ENTRY.name; + } + + public static float calculateSimilarity(float[] leftScratch, float[] rightScratch) { + byte[] a = new byte[leftScratch.length]; + byte[] b = new byte[rightScratch.length]; + for (int i = 0; i < leftScratch.length; i++) { + a[i] = (byte) leftScratch[i]; + } + for (int i = 0; i < leftScratch.length; i++) { + b[i] = (byte) rightScratch[i]; + } + return VectorUtil.xorBitCount(a, b); + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorWritables.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorWritables.java index a0897792482d8..f4353c28476d2 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorWritables.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorWritables.java @@ -45,6 +45,9 @@ public static List getNamedWritables() { if (EsqlCapabilities.Cap.MAGNITUDE_SCALAR_VECTOR_FUNCTION.isEnabled()) { entries.add(Magnitude.ENTRY); } + if (EsqlCapabilities.Cap.HAMMING_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { + entries.add(Hamming.ENTRY); + } return Collections.unmodifiableList(entries); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java index 7307285ec37a7..e2f786b94ed5b 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java @@ -2390,6 +2390,13 @@ public void testDenseVectorImplicitCastingSimilarityFunctions() { ); checkDenseVectorImplicitCastingSimilarityFunction("v_l2_norm(float_vector, [1, 2, 3])", List.of(1f, 2f, 3f)); } + if (EsqlCapabilities.Cap.HAMMING_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { + checkDenseVectorImplicitCastingSimilarityFunction( + "v_hamming(byte_vector, [0.342, 0.164, 0.234])", + List.of(0.342f, 0.164f, 0.234f) + ); + checkDenseVectorImplicitCastingSimilarityFunction("v_hamming(byte_vector, [1, 2, 3])", List.of(1f, 2f, 3f)); + } } private void checkDenseVectorImplicitCastingSimilarityFunction(String similarityFunction, List expectedElems) { @@ -2422,6 +2429,9 @@ public void testNoDenseVectorFailsSimilarityFunction() { if (EsqlCapabilities.Cap.L2_NORM_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { checkNoDenseVectorFailsSimilarityFunction("v_l2_norm([0, 1, 2], 0.342)"); } + if (EsqlCapabilities.Cap.HAMMING_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { + checkNoDenseVectorFailsSimilarityFunction("v_hamming([0, 1, 2], 0.342)"); + } } private void checkNoDenseVectorFailsSimilarityFunction(String similarityFunction) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index c43f875327872..077d1fc7300aa 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -2318,6 +2318,10 @@ public void testVectorSimilarityFunctionsNullArgs() throws Exception { if (EsqlCapabilities.Cap.MAGNITUDE_SCALAR_VECTOR_FUNCTION.isEnabled()) { checkVectorFunctionsNullArgs("v_magnitude(null)"); } + if (EsqlCapabilities.Cap.HAMMING_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { + checkVectorFunctionsNullArgs("v_hamming(null, vector)"); + checkVectorFunctionsNullArgs("v_hamming(vector, null)"); + } } private void checkVectorFunctionsNullArgs(String functionInvocation) throws Exception { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/vector/HammingSimilarityTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/vector/HammingSimilarityTests.java new file mode 100644 index 0000000000000..203c0171dc5f4 --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/vector/HammingSimilarityTests.java @@ -0,0 +1,42 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.vector; + +import com.carrotsearch.randomizedtesting.annotations.Name; +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.elasticsearch.xpack.esql.action.EsqlCapabilities; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.expression.function.FunctionName; +import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; + +import java.util.List; +import java.util.function.Supplier; + +@FunctionName("v_hamming") +public class HammingSimilarityTests extends AbstractVectorSimilarityFunctionTestCase { + + public HammingSimilarityTests(@Name("TestCase") Supplier testCaseSupplier) { + super(testCaseSupplier); + } + + @ParametersFactory + public static Iterable parameters() { + return similarityParameters(Hamming.class.getSimpleName(), Hamming.SIMILARITY_FUNCTION); + } + + protected EsqlCapabilities.Cap capability() { + return EsqlCapabilities.Cap.HAMMING_VECTOR_SIMILARITY_FUNCTION; + } + + @Override + protected Expression build(Source source, List args) { + return new Hamming(source, args.get(0), args.get(1)); + } +}