From 3cd12e99272562ae021a77ef20953033b03edc56 Mon Sep 17 00:00:00 2001 From: Svilen Mihaylov Date: Thu, 14 Aug 2025 18:05:32 -0400 Subject: [PATCH 1/8] Implement v_hamming --- .../functions/description/v_hamming.md | 6 + .../_snippets/functions/examples/v_hamming.md | 24 ++++ .../_snippets/functions/layout/v_hamming.md | 27 +++++ .../functions/parameters/v_hamming.md | 10 ++ .../esql/images/functions/v_hamming.svg | 1 + .../definition/functions/v_hamming.json | 12 ++ .../esql/kibana/docs/functions/v_hamming.md | 10 ++ .../main/resources/vector-hamming.csv-spec | 103 ++++++++++++++++++ .../vector/VectorSimilarityFunctionsIT.java | 4 + .../xpack/esql/action/EsqlCapabilities.java | 7 +- .../function/EsqlFunctionRegistry.java | 4 +- .../expression/function/vector/Hamming.java | 88 +++++++++++++++ .../function/vector/VectorWritables.java | 3 + .../xpack/esql/analysis/AnalyzerTests.java | 10 ++ .../xpack/esql/analysis/VerifierTests.java | 4 + .../vector/HammingSimilarityTests.java | 42 +++++++ 16 files changed, 353 insertions(+), 2 deletions(-) create mode 100644 docs/reference/query-languages/esql/_snippets/functions/description/v_hamming.md create mode 100644 docs/reference/query-languages/esql/_snippets/functions/examples/v_hamming.md create mode 100644 docs/reference/query-languages/esql/_snippets/functions/layout/v_hamming.md create mode 100644 docs/reference/query-languages/esql/_snippets/functions/parameters/v_hamming.md create mode 100644 docs/reference/query-languages/esql/images/functions/v_hamming.svg create mode 100644 docs/reference/query-languages/esql/kibana/definition/functions/v_hamming.json create mode 100644 docs/reference/query-languages/esql/kibana/docs/functions/v_hamming.md create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-hamming.csv-spec create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Hamming.java create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/vector/HammingSimilarityTests.java diff --git a/docs/reference/query-languages/esql/_snippets/functions/description/v_hamming.md b/docs/reference/query-languages/esql/_snippets/functions/description/v_hamming.md new file mode 100644 index 0000000000000..65c393cd69067 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/description/v_hamming.md @@ -0,0 +1,6 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Description** + +Calculates the hamming distance between two dense_vectors. + diff --git a/docs/reference/query-languages/esql/_snippets/functions/examples/v_hamming.md b/docs/reference/query-languages/esql/_snippets/functions/examples/v_hamming.md new file mode 100644 index 0000000000000..e3f6b039222f0 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/examples/v_hamming.md @@ -0,0 +1,24 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Example** + +```esql + from colors + | eval similarity = v_hamming(rgb_vector, [0, 255, 255]) + | sort similarity desc, color asc +``` + +| color:text | similarity:double | +| --- | --- | +| cyan | 1.0 | +| azure | 0.8333333134651184 | +| blue | 0.6666666865348816 | +| honeydew | 0.6666666865348816 | +| lime | 0.6666666865348816 | +| mint cream | 0.6666666865348816 | +| white | 0.6666666865348816 | +| thistle | 0.625 | +| lavender | 0.5833333134651184 | +| aqua marine | 0.5416666865348816 | + + diff --git a/docs/reference/query-languages/esql/_snippets/functions/layout/v_hamming.md b/docs/reference/query-languages/esql/_snippets/functions/layout/v_hamming.md new file mode 100644 index 0000000000000..74c506e59e4d0 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/layout/v_hamming.md @@ -0,0 +1,27 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +## `V_HAMMING` [esql-v_hamming] +```{applies_to} +stack: development +serverless: preview +``` + +**Syntax** + +:::{image} ../../../images/functions/v_hamming.svg +:alt: Embedded +:class: text-center +::: + + +:::{include} ../parameters/v_hamming.md +::: + +:::{include} ../description/v_hamming.md +::: + +:::{include} ../types/v_hamming.md +::: + +:::{include} ../examples/v_hamming.md +::: diff --git a/docs/reference/query-languages/esql/_snippets/functions/parameters/v_hamming.md b/docs/reference/query-languages/esql/_snippets/functions/parameters/v_hamming.md new file mode 100644 index 0000000000000..4b19f1b9ab19a --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/parameters/v_hamming.md @@ -0,0 +1,10 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Parameters** + +`left` +: first dense_vector to calculate hamming distance between + +`right` +: second dense_vector to calculate hamming distance between + diff --git a/docs/reference/query-languages/esql/images/functions/v_hamming.svg b/docs/reference/query-languages/esql/images/functions/v_hamming.svg new file mode 100644 index 0000000000000..1fb76c406cb08 --- /dev/null +++ b/docs/reference/query-languages/esql/images/functions/v_hamming.svg @@ -0,0 +1 @@ +V_HAMMING(left,right) \ No newline at end of file diff --git a/docs/reference/query-languages/esql/kibana/definition/functions/v_hamming.json b/docs/reference/query-languages/esql/kibana/definition/functions/v_hamming.json new file mode 100644 index 0000000000000..2f2fa7136f7a7 --- /dev/null +++ b/docs/reference/query-languages/esql/kibana/definition/functions/v_hamming.json @@ -0,0 +1,12 @@ +{ + "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.", + "type" : "scalar", + "name" : "v_hamming", + "description" : "Calculates the hamming distance between two dense_vectors.", + "signatures" : [ ], + "examples" : [ + " from colors\n | eval similarity = v_hamming(rgb_vector, [0, 255, 255])\n | sort similarity desc, color asc" + ], + "preview" : true, + "snapshot_only" : true +} diff --git a/docs/reference/query-languages/esql/kibana/docs/functions/v_hamming.md b/docs/reference/query-languages/esql/kibana/docs/functions/v_hamming.md new file mode 100644 index 0000000000000..4c85c35d73814 --- /dev/null +++ b/docs/reference/query-languages/esql/kibana/docs/functions/v_hamming.md @@ -0,0 +1,10 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +### V HAMMING +Calculates the hamming distance between two dense_vectors. + +```esql + from colors + | eval similarity = v_hamming(rgb_vector, [0, 255, 255]) + | sort similarity desc, color asc +``` diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-hamming.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-hamming.csv-spec new file mode 100644 index 0000000000000..b7e96e8f2f305 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-hamming.csv-spec @@ -0,0 +1,103 @@ + # Tests for hamming similarity function + + similarityWithVectorField + required_capability: hamming_vector_similarity_function + +// tag::vector-hamming[] + from colors + | eval similarity = v_hamming(rgb_vector, [0, 255, 255]) + | sort similarity desc, color asc +// end::vector-hamming[] + | limit 10 + | keep color, similarity + ; + +// tag::vector-hamming-result[] +color:text | similarity:double +cyan | 1.0 +azure | 0.8333333134651184 +blue | 0.6666666865348816 +honeydew | 0.6666666865348816 +lime | 0.6666666865348816 +mint cream | 0.6666666865348816 +white | 0.6666666865348816 +thistle | 0.625 +lavender | 0.5833333134651184 +aqua marine | 0.5416666865348816 +// end::vector-hamming-result[] +; + +similarityAsPartOfExpression +required_capability: hamming_vector_similarity_function + +from colors +| eval score = round((1 + v_hamming(rgb_vector, [0, 255, 255]) / 2), 3) +| sort score desc, color asc +| limit 10 +| keep color, score +; + +color:text | score:double +cyan | 1.5 +azure | 1.417 +blue | 1.333 +honeydew | 1.333 +lime | 1.333 +mint cream | 1.333 +white | 1.333 +thistle | 1.313 +lavender | 1.292 +aqua marine | 1.271 +; + +similarityWithLiteralVectors +required_capability: hamming_vector_similarity_function + +row a = 1 +| eval similarity = round(v_hamming([1, 2, 3], [0, 1, 2]), 3) +| keep similarity +; + +similarity:double +0.833 +; + +similarityWithStats +required_capability: hamming_vector_similarity_function + +from colors +| eval similarity = round(v_hamming(rgb_vector, [0, 255, 255]), 3) +| stats avg = round(avg(similarity), 3), min = min(similarity), max = max(similarity) +; + +avg:double | min:double | max:double +0.445 | 0.0 | 1.0 +; + +similarityWithNull +required_capability: hamming_vector_similarity_function +required_capability: vector_similarity_functions_support_null + +from colors +| eval similarity = v_hamming(rgb_vector, null) +| stats total_null = count(*) where similarity is null +; + +total_null:long +59 +; + +# TODO Need to implement a conversion function to convert a non-foldable row to a dense_vector +similarityWithRow-Ignore +required_capability: hamming_vector_similarity_function + +row vector = [1, 2, 3] +| eval similarity = round(v_hamming(vector, [0, 1, 2]), 3) +| sort similarity desc, color asc +| limit 10 +| keep color, similarity +; + +similarity:double +0.978 +; diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/vector/VectorSimilarityFunctionsIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/vector/VectorSimilarityFunctionsIT.java index 2d85e3bd7f93c..4ab018d3eac11 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/vector/VectorSimilarityFunctionsIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/vector/VectorSimilarityFunctionsIT.java @@ -24,6 +24,7 @@ import org.elasticsearch.xpack.esql.EsqlTestUtils; import org.elasticsearch.xpack.esql.action.AbstractEsqlIntegTestCase; import org.elasticsearch.xpack.esql.action.EsqlCapabilities; +import org.elasticsearch.xpack.esql.expression.function.vector.Hamming; import org.elasticsearch.xpack.esql.expression.function.vector.L1Norm; import org.elasticsearch.xpack.esql.expression.function.vector.L2Norm; import org.elasticsearch.xpack.esql.expression.function.vector.VectorSimilarityFunction.SimilarityEvaluatorFunction; @@ -56,6 +57,9 @@ public static Iterable parameters() throws Exception { if (EsqlCapabilities.Cap.L2_NORM_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { params.add(new Object[] { "v_l2_norm", (SimilarityEvaluatorFunction) L2Norm::calculateSimilarity }); } + if (EsqlCapabilities.Cap.HAMMING_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { + params.add(new Object[] { "v_hamming", (SimilarityEvaluatorFunction) Hamming::calculateSimilarity }); + } return params; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 14a79f54646ba..1fce0d563739c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -1364,7 +1364,12 @@ public enum Cap { /** * Support null elements on vector similarity functions */ - VECTOR_SIMILARITY_FUNCTIONS_SUPPORT_NULL; + VECTOR_SIMILARITY_FUNCTIONS_SUPPORT_NULL, + + /** + * Support for vector Hamming distance. + */ + HAMMING_VECTOR_SIMILARITY_FUNCTION(Build.current().isSnapshot()); private final boolean enabled; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java index 0eca67f625121..9c8a4b5fc5878 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java @@ -185,6 +185,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.util.Delay; import org.elasticsearch.xpack.esql.expression.function.vector.CosineSimilarity; import org.elasticsearch.xpack.esql.expression.function.vector.DotProduct; +import org.elasticsearch.xpack.esql.expression.function.vector.Hamming; import org.elasticsearch.xpack.esql.expression.function.vector.Knn; import org.elasticsearch.xpack.esql.expression.function.vector.L1Norm; import org.elasticsearch.xpack.esql.expression.function.vector.L2Norm; @@ -503,7 +504,8 @@ private static FunctionDefinition[][] snapshotFunctions() { def(CosineSimilarity.class, CosineSimilarity::new, "v_cosine"), def(DotProduct.class, DotProduct::new, "v_dot_product"), def(L1Norm.class, L1Norm::new, "v_l1_norm"), - def(L2Norm.class, L2Norm::new, "v_l2_norm") } }; + def(L2Norm.class, L2Norm::new, "v_l2_norm"), + def(Hamming.class, Hamming::new, "v_hamming") } }; } public EsqlFunctionRegistry snapshotRegistry() { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Hamming.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Hamming.java new file mode 100644 index 0000000000000..b8b56b70ffacc --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Hamming.java @@ -0,0 +1,88 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.vector; + +import org.apache.lucene.util.VectorUtil; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.function.scalar.BinaryScalarFunction; +import org.elasticsearch.xpack.esql.core.tree.NodeInfo; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.expression.function.Example; +import org.elasticsearch.xpack.esql.expression.function.FunctionAppliesTo; +import org.elasticsearch.xpack.esql.expression.function.FunctionAppliesToLifecycle; +import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; +import org.elasticsearch.xpack.esql.expression.function.Param; + +import java.io.IOException; + +public class Hamming extends VectorSimilarityFunction { + + public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Hamming", Hamming::new); + static final SimilarityEvaluatorFunction SIMILARITY_FUNCTION = Hamming::calculateSimilarity; + + @FunctionInfo( + returnType = "double", + preview = true, + description = "Calculates the hamming distance between two dense_vectors.", + examples = { @Example(file = "vector-hamming", tag = "vector-hamming") }, + appliesTo = { @FunctionAppliesTo(lifeCycle = FunctionAppliesToLifecycle.DEVELOPMENT) } + ) + public Hamming( + Source source, + @Param( + name = "left", + type = { "dense_vector" }, + description = "first dense_vector to calculate hamming distance between" + ) Expression left, + @Param( + name = "right", + type = { "dense_vector" }, + description = "second dense_vector to calculate hamming distance between" + ) Expression right + ) { + super(source, left, right); + } + + private Hamming(StreamInput in) throws IOException { + super(in); + } + + @Override + protected SimilarityEvaluatorFunction getSimilarityFunction() { + return SIMILARITY_FUNCTION; + } + + @Override + protected BinaryScalarFunction replaceChildren(Expression newLeft, Expression newRight) { + return new Hamming(source(), newLeft, newRight); + } + + @Override + protected NodeInfo info() { + return NodeInfo.create(this, Hamming::new, left(), right()); + } + + @Override + public String getWriteableName() { + return ENTRY.name; + } + + public static float calculateSimilarity(float[] leftScratch, float[] rightScratch) { + byte[] a = new byte[leftScratch.length]; + byte[] b = new byte[rightScratch.length]; + for (int i = 0; i < leftScratch.length; i++) { + a[i] = (byte) leftScratch[i]; + } + for (int i = 0; i < leftScratch.length; i++) { + b[i] = (byte) rightScratch[i]; + } + return ((a.length * Byte.SIZE) - VectorUtil.xorBitCount(a, b)) / (float) (a.length * Byte.SIZE); + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorWritables.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorWritables.java index 4a1a2ec9386ae..182c2ba1efe34 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorWritables.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorWritables.java @@ -42,6 +42,9 @@ public static List getNamedWritables() { if (EsqlCapabilities.Cap.L2_NORM_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { entries.add(L2Norm.ENTRY); } + if (EsqlCapabilities.Cap.HAMMING_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { + entries.add(Hamming.ENTRY); + } return Collections.unmodifiableList(entries); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java index e040067458408..fb6d28f6306eb 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java @@ -2389,6 +2389,13 @@ public void testDenseVectorImplicitCastingSimilarityFunctions() { ); checkDenseVectorImplicitCastingSimilarityFunction("v_l2_norm(float_vector, [1, 2, 3])", List.of(1f, 2f, 3f)); } + if (EsqlCapabilities.Cap.HAMMING_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { + checkDenseVectorImplicitCastingSimilarityFunction( + "v_hamming(byte_vector, [0.342, 0.164, 0.234])", + List.of(0.342f, 0.164f, 0.234f) + ); + checkDenseVectorImplicitCastingSimilarityFunction("v_hamming(byte_vector, [1, 2, 3])", List.of(1f, 2f, 3f)); + } } private void checkDenseVectorImplicitCastingSimilarityFunction(String similarityFunction, List expectedElems) { @@ -2421,6 +2428,9 @@ public void testNoDenseVectorFailsSimilarityFunction() { if (EsqlCapabilities.Cap.L2_NORM_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { checkNoDenseVectorFailsSimilarityFunction("v_l2_norm([0, 1, 2], 0.342)"); } + if (EsqlCapabilities.Cap.HAMMING_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { + checkNoDenseVectorFailsSimilarityFunction("v_hamming([0, 1, 2], 0.342)"); + } } private void checkNoDenseVectorFailsSimilarityFunction(String similarityFunction) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index 37d6719ddccfc..77a303e10217d 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -2499,6 +2499,10 @@ public void testVectorSimilarityFunctionsNullArgs() throws Exception { checkVectorSimilarityFunctionsNullArgs("v_l2_norm(null, vector)"); checkVectorSimilarityFunctionsNullArgs("v_l2_norm(vector, null)"); } + if (EsqlCapabilities.Cap.HAMMING_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { + checkVectorSimilarityFunctionsNullArgs("v_hamming(null, vector)"); + checkVectorSimilarityFunctionsNullArgs("v_hamming(vector, null)"); + } } private void checkVectorSimilarityFunctionsNullArgs(String functionInvocation) throws Exception { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/vector/HammingSimilarityTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/vector/HammingSimilarityTests.java new file mode 100644 index 0000000000000..203c0171dc5f4 --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/vector/HammingSimilarityTests.java @@ -0,0 +1,42 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.vector; + +import com.carrotsearch.randomizedtesting.annotations.Name; +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.elasticsearch.xpack.esql.action.EsqlCapabilities; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.expression.function.FunctionName; +import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; + +import java.util.List; +import java.util.function.Supplier; + +@FunctionName("v_hamming") +public class HammingSimilarityTests extends AbstractVectorSimilarityFunctionTestCase { + + public HammingSimilarityTests(@Name("TestCase") Supplier testCaseSupplier) { + super(testCaseSupplier); + } + + @ParametersFactory + public static Iterable parameters() { + return similarityParameters(Hamming.class.getSimpleName(), Hamming.SIMILARITY_FUNCTION); + } + + protected EsqlCapabilities.Cap capability() { + return EsqlCapabilities.Cap.HAMMING_VECTOR_SIMILARITY_FUNCTION; + } + + @Override + protected Expression build(Source source, List args) { + return new Hamming(source, args.get(0), args.get(1)); + } +} From ed5a7a8524439e026cba18e321f18244d9485a06 Mon Sep 17 00:00:00 2001 From: Svilen Mihaylov Date: Fri, 15 Aug 2025 09:02:57 -0400 Subject: [PATCH 2/8] Fix merge --- .../org/elasticsearch/xpack/esql/analysis/VerifierTests.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index 605122d0c592c..077d1fc7300aa 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -2319,8 +2319,8 @@ public void testVectorSimilarityFunctionsNullArgs() throws Exception { checkVectorFunctionsNullArgs("v_magnitude(null)"); } if (EsqlCapabilities.Cap.HAMMING_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { - checkVectorSimilarityFunctionsNullArgs("v_hamming(null, vector)"); - checkVectorSimilarityFunctionsNullArgs("v_hamming(vector, null)"); + checkVectorFunctionsNullArgs("v_hamming(null, vector)"); + checkVectorFunctionsNullArgs("v_hamming(vector, null)"); } } From 1c38f156ccabb53e8e13676404568a4818ba32bc Mon Sep 17 00:00:00 2001 From: Svilen Mihaylov Date: Fri, 15 Aug 2025 09:04:32 -0400 Subject: [PATCH 3/8] Update docs/changelog/132959.yaml --- docs/changelog/132959.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/132959.yaml diff --git a/docs/changelog/132959.yaml b/docs/changelog/132959.yaml new file mode 100644 index 0000000000000..62a85002e82bd --- /dev/null +++ b/docs/changelog/132959.yaml @@ -0,0 +1,5 @@ +pr: 132959 +summary: Implement `v_hamming` +area: ES|QL +type: feature +issues: [] From d303e296739b73142d65ddf1eeb954aedf0df124 Mon Sep 17 00:00:00 2001 From: Svilen Mihaylov Date: Fri, 15 Aug 2025 13:28:04 -0400 Subject: [PATCH 4/8] Add issue --- docs/changelog/132959.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/changelog/132959.yaml b/docs/changelog/132959.yaml index 62a85002e82bd..f9b16945825a5 100644 --- a/docs/changelog/132959.yaml +++ b/docs/changelog/132959.yaml @@ -2,4 +2,4 @@ pr: 132959 summary: Implement `v_hamming` area: ES|QL type: feature -issues: [] +issues: [132056] From d3ac81bf020530cb018ec10ecdc5866a419b5b4f Mon Sep 17 00:00:00 2001 From: Svilen Mihaylov Date: Mon, 18 Aug 2025 08:37:09 -0400 Subject: [PATCH 5/8] Update docs/changelog/132959.yaml Co-authored-by: Liam Thompson --- docs/changelog/132959.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/changelog/132959.yaml b/docs/changelog/132959.yaml index f9b16945825a5..1e1923c3beaf9 100644 --- a/docs/changelog/132959.yaml +++ b/docs/changelog/132959.yaml @@ -1,5 +1,5 @@ pr: 132959 -summary: Implement `v_hamming` +summary: Adds the `v_hamming` function for calculating the Hamming distance between two dense vectors area: ES|QL type: feature issues: [132056] From 20c74956d9dee7741b97d24605b4d7ddb41b9412 Mon Sep 17 00:00:00 2001 From: Svilen Mihaylov Date: Mon, 18 Aug 2025 08:37:25 -0400 Subject: [PATCH 6/8] Update x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Hamming.java Co-authored-by: Liam Thompson --- .../xpack/esql/expression/function/vector/Hamming.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Hamming.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Hamming.java index b8b56b70ffacc..b3cf4a65aafee 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Hamming.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Hamming.java @@ -32,7 +32,7 @@ public class Hamming extends VectorSimilarityFunction { preview = true, description = "Calculates the hamming distance between two dense_vectors.", examples = { @Example(file = "vector-hamming", tag = "vector-hamming") }, - appliesTo = { @FunctionAppliesTo(lifeCycle = FunctionAppliesToLifecycle.DEVELOPMENT) } + appliesTo = { @FunctionAppliesTo(lifeCycle = FunctionAppliesToLifecycle.PREVIEW, version = "9.2.0") } ) public Hamming( Source source, From eb2c822ca081d4fd7a14138c441a0338c8685c4a Mon Sep 17 00:00:00 2001 From: Svilen Mihaylov Date: Mon, 18 Aug 2025 08:37:53 -0400 Subject: [PATCH 7/8] Update x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Hamming.java Co-authored-by: Liam Thompson --- .../xpack/esql/expression/function/vector/Hamming.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Hamming.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Hamming.java index b3cf4a65aafee..b0ae6b60f81f5 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Hamming.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Hamming.java @@ -30,7 +30,7 @@ public class Hamming extends VectorSimilarityFunction { @FunctionInfo( returnType = "double", preview = true, - description = "Calculates the hamming distance between two dense_vectors.", + description = "Calculates the Hamming distance between two dense vectors.", examples = { @Example(file = "vector-hamming", tag = "vector-hamming") }, appliesTo = { @FunctionAppliesTo(lifeCycle = FunctionAppliesToLifecycle.PREVIEW, version = "9.2.0") } ) From d7cc1533a10c751786f2c720d405c58cfb1c0fde Mon Sep 17 00:00:00 2001 From: Svilen Mihaylov Date: Mon, 18 Aug 2025 09:50:33 -0400 Subject: [PATCH 8/8] cr feedback --- .../functions/description/v_hamming.md | 2 +- .../_snippets/functions/examples/v_hamming.md | 20 ++++---- .../_snippets/functions/layout/v_hamming.md | 2 +- .../functions/parameters/v_hamming.md | 4 +- .../definition/functions/v_hamming.json | 2 +- .../esql/kibana/docs/functions/v_hamming.md | 2 +- .../main/resources/vector-hamming.csv-spec | 48 +++++++++---------- .../expression/function/vector/Hamming.java | 6 +-- 8 files changed, 43 insertions(+), 43 deletions(-) diff --git a/docs/reference/query-languages/esql/_snippets/functions/description/v_hamming.md b/docs/reference/query-languages/esql/_snippets/functions/description/v_hamming.md index 65c393cd69067..1f200140065f7 100644 --- a/docs/reference/query-languages/esql/_snippets/functions/description/v_hamming.md +++ b/docs/reference/query-languages/esql/_snippets/functions/description/v_hamming.md @@ -2,5 +2,5 @@ **Description** -Calculates the hamming distance between two dense_vectors. +Calculates the Hamming distance between two dense vectors. diff --git a/docs/reference/query-languages/esql/_snippets/functions/examples/v_hamming.md b/docs/reference/query-languages/esql/_snippets/functions/examples/v_hamming.md index e3f6b039222f0..6a8a5b38350e6 100644 --- a/docs/reference/query-languages/esql/_snippets/functions/examples/v_hamming.md +++ b/docs/reference/query-languages/esql/_snippets/functions/examples/v_hamming.md @@ -10,15 +10,15 @@ | color:text | similarity:double | | --- | --- | -| cyan | 1.0 | -| azure | 0.8333333134651184 | -| blue | 0.6666666865348816 | -| honeydew | 0.6666666865348816 | -| lime | 0.6666666865348816 | -| mint cream | 0.6666666865348816 | -| white | 0.6666666865348816 | -| thistle | 0.625 | -| lavender | 0.5833333134651184 | -| aqua marine | 0.5416666865348816 | +| red | 24.0 | +| orange | 20.0 | +| gold | 18.0 | +| indigo | 18.0 | +| bisque | 17.0 | +| maroon | 17.0 | +| pink | 17.0 | +| salmon | 17.0 | +| black | 16.0 | +| firebrick | 16.0 | diff --git a/docs/reference/query-languages/esql/_snippets/functions/layout/v_hamming.md b/docs/reference/query-languages/esql/_snippets/functions/layout/v_hamming.md index 74c506e59e4d0..65354c6380f64 100644 --- a/docs/reference/query-languages/esql/_snippets/functions/layout/v_hamming.md +++ b/docs/reference/query-languages/esql/_snippets/functions/layout/v_hamming.md @@ -2,7 +2,7 @@ ## `V_HAMMING` [esql-v_hamming] ```{applies_to} -stack: development +stack: preview 9.2.0 serverless: preview ``` diff --git a/docs/reference/query-languages/esql/_snippets/functions/parameters/v_hamming.md b/docs/reference/query-languages/esql/_snippets/functions/parameters/v_hamming.md index 4b19f1b9ab19a..6fe93636f0764 100644 --- a/docs/reference/query-languages/esql/_snippets/functions/parameters/v_hamming.md +++ b/docs/reference/query-languages/esql/_snippets/functions/parameters/v_hamming.md @@ -3,8 +3,8 @@ **Parameters** `left` -: first dense_vector to calculate hamming distance between +: First dense_vector to use to calculate the Hamming distance `right` -: second dense_vector to calculate hamming distance between +: Second dense_vector to use to calculate the Hamming distance diff --git a/docs/reference/query-languages/esql/kibana/definition/functions/v_hamming.json b/docs/reference/query-languages/esql/kibana/definition/functions/v_hamming.json index 2f2fa7136f7a7..51e3660ae8650 100644 --- a/docs/reference/query-languages/esql/kibana/definition/functions/v_hamming.json +++ b/docs/reference/query-languages/esql/kibana/definition/functions/v_hamming.json @@ -2,7 +2,7 @@ "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.", "type" : "scalar", "name" : "v_hamming", - "description" : "Calculates the hamming distance between two dense_vectors.", + "description" : "Calculates the Hamming distance between two dense vectors.", "signatures" : [ ], "examples" : [ " from colors\n | eval similarity = v_hamming(rgb_vector, [0, 255, 255])\n | sort similarity desc, color asc" diff --git a/docs/reference/query-languages/esql/kibana/docs/functions/v_hamming.md b/docs/reference/query-languages/esql/kibana/docs/functions/v_hamming.md index 4c85c35d73814..8de48ee0292ca 100644 --- a/docs/reference/query-languages/esql/kibana/docs/functions/v_hamming.md +++ b/docs/reference/query-languages/esql/kibana/docs/functions/v_hamming.md @@ -1,7 +1,7 @@ % This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. ### V HAMMING -Calculates the hamming distance between two dense_vectors. +Calculates the Hamming distance between two dense vectors. ```esql from colors diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-hamming.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-hamming.csv-spec index b7e96e8f2f305..a7e8815139567 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-hamming.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-hamming.csv-spec @@ -13,17 +13,17 @@ ; // tag::vector-hamming-result[] -color:text | similarity:double -cyan | 1.0 -azure | 0.8333333134651184 -blue | 0.6666666865348816 -honeydew | 0.6666666865348816 -lime | 0.6666666865348816 -mint cream | 0.6666666865348816 -white | 0.6666666865348816 -thistle | 0.625 -lavender | 0.5833333134651184 -aqua marine | 0.5416666865348816 +color:text | similarity:double +red | 24.0 +orange | 20.0 +gold | 18.0 +indigo | 18.0 +bisque | 17.0 +maroon | 17.0 +pink | 17.0 +salmon | 17.0 +black | 16.0 +firebrick | 16.0 // end::vector-hamming-result[] ; @@ -37,17 +37,17 @@ from colors | keep color, score ; -color:text | score:double -cyan | 1.5 -azure | 1.417 -blue | 1.333 -honeydew | 1.333 -lime | 1.333 -mint cream | 1.333 -white | 1.333 -thistle | 1.313 -lavender | 1.292 -aqua marine | 1.271 +color:text | score:double +red | 13.0 +orange | 11.0 +gold | 10.0 +indigo | 10.0 +bisque | 9.5 +maroon | 9.5 +pink | 9.5 +salmon | 9.5 +black | 9.0 +firebrick | 9.0 ; similarityWithLiteralVectors @@ -59,7 +59,7 @@ row a = 1 ; similarity:double -0.833 +4.0 ; similarityWithStats @@ -71,7 +71,7 @@ from colors ; avg:double | min:double | max:double -0.445 | 0.0 | 1.0 +13.322 | 0.0 | 24.0 ; similarityWithNull diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Hamming.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Hamming.java index b0ae6b60f81f5..981304415a128 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Hamming.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Hamming.java @@ -39,12 +39,12 @@ public Hamming( @Param( name = "left", type = { "dense_vector" }, - description = "first dense_vector to calculate hamming distance between" + description = "First dense_vector to use to calculate the Hamming distance" ) Expression left, @Param( name = "right", type = { "dense_vector" }, - description = "second dense_vector to calculate hamming distance between" + description = "Second dense_vector to use to calculate the Hamming distance" ) Expression right ) { super(source, left, right); @@ -83,6 +83,6 @@ public static float calculateSimilarity(float[] leftScratch, float[] rightScratc for (int i = 0; i < leftScratch.length; i++) { b[i] = (byte) rightScratch[i]; } - return ((a.length * Byte.SIZE) - VectorUtil.xorBitCount(a, b)) / (float) (a.length * Byte.SIZE); + return VectorUtil.xorBitCount(a, b); } }