Skip to content

Commit bcfd399

Browse files
Implement v_hamming (#132959)
Implements #132056
1 parent fe44a4d commit bcfd399

File tree

17 files changed

+358
-2
lines changed

17 files changed

+358
-2
lines changed

docs/changelog/132959.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 132959
2+
summary: Adds the `v_hamming` function for calculating the Hamming distance between two dense vectors
3+
area: ES|QL
4+
type: feature
5+
issues: [132056]

docs/reference/query-languages/esql/_snippets/functions/description/v_hamming.md

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/reference/query-languages/esql/_snippets/functions/examples/v_hamming.md

Lines changed: 24 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/reference/query-languages/esql/_snippets/functions/layout/v_hamming.md

Lines changed: 27 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/reference/query-languages/esql/_snippets/functions/parameters/v_hamming.md

Lines changed: 10 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/reference/query-languages/esql/images/functions/v_hamming.svg

Lines changed: 1 addition & 0 deletions
Loading

docs/reference/query-languages/esql/kibana/definition/functions/v_hamming.json

Lines changed: 12 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/reference/query-languages/esql/kibana/docs/functions/v_hamming.md

Lines changed: 10 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
# Tests for hamming similarity function
2+
3+
similarityWithVectorField
4+
required_capability: hamming_vector_similarity_function
5+
6+
// tag::vector-hamming[]
7+
from colors
8+
| eval similarity = v_hamming(rgb_vector, [0, 255, 255])
9+
| sort similarity desc, color asc
10+
// end::vector-hamming[]
11+
| limit 10
12+
| keep color, similarity
13+
;
14+
15+
// tag::vector-hamming-result[]
16+
color:text | similarity:double
17+
red | 24.0
18+
orange | 20.0
19+
gold | 18.0
20+
indigo | 18.0
21+
bisque | 17.0
22+
maroon | 17.0
23+
pink | 17.0
24+
salmon | 17.0
25+
black | 16.0
26+
firebrick | 16.0
27+
// end::vector-hamming-result[]
28+
;
29+
30+
similarityAsPartOfExpression
31+
required_capability: hamming_vector_similarity_function
32+
33+
from colors
34+
| eval score = round((1 + v_hamming(rgb_vector, [0, 255, 255]) / 2), 3)
35+
| sort score desc, color asc
36+
| limit 10
37+
| keep color, score
38+
;
39+
40+
color:text | score:double
41+
red | 13.0
42+
orange | 11.0
43+
gold | 10.0
44+
indigo | 10.0
45+
bisque | 9.5
46+
maroon | 9.5
47+
pink | 9.5
48+
salmon | 9.5
49+
black | 9.0
50+
firebrick | 9.0
51+
;
52+
53+
similarityWithLiteralVectors
54+
required_capability: hamming_vector_similarity_function
55+
56+
row a = 1
57+
| eval similarity = round(v_hamming([1, 2, 3], [0, 1, 2]), 3)
58+
| keep similarity
59+
;
60+
61+
similarity:double
62+
4.0
63+
;
64+
65+
similarityWithStats
66+
required_capability: hamming_vector_similarity_function
67+
68+
from colors
69+
| eval similarity = round(v_hamming(rgb_vector, [0, 255, 255]), 3)
70+
| stats avg = round(avg(similarity), 3), min = min(similarity), max = max(similarity)
71+
;
72+
73+
avg:double | min:double | max:double
74+
13.322 | 0.0 | 24.0
75+
;
76+
77+
similarityWithNull
78+
required_capability: hamming_vector_similarity_function
79+
required_capability: vector_similarity_functions_support_null
80+
81+
from colors
82+
| eval similarity = v_hamming(rgb_vector, null)
83+
| stats total_null = count(*) where similarity is null
84+
;
85+
86+
total_null:long
87+
59
88+
;
89+
90+
# TODO Need to implement a conversion function to convert a non-foldable row to a dense_vector
91+
similarityWithRow-Ignore
92+
required_capability: hamming_vector_similarity_function
93+
94+
row vector = [1, 2, 3]
95+
| eval similarity = round(v_hamming(vector, [0, 1, 2]), 3)
96+
| sort similarity desc, color asc
97+
| limit 10
98+
| keep color, similarity
99+
;
100+
101+
similarity:double
102+
0.978
103+
;

x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/vector/VectorSimilarityFunctionsIT.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.elasticsearch.xpack.esql.EsqlTestUtils;
2525
import org.elasticsearch.xpack.esql.action.AbstractEsqlIntegTestCase;
2626
import org.elasticsearch.xpack.esql.action.EsqlCapabilities;
27+
import org.elasticsearch.xpack.esql.expression.function.vector.Hamming;
2728
import org.elasticsearch.xpack.esql.expression.function.vector.L1Norm;
2829
import org.elasticsearch.xpack.esql.expression.function.vector.L2Norm;
2930
import org.elasticsearch.xpack.esql.expression.function.vector.VectorSimilarityFunction.SimilarityEvaluatorFunction;
@@ -56,6 +57,9 @@ public static Iterable<Object[]> parameters() throws Exception {
5657
if (EsqlCapabilities.Cap.L2_NORM_VECTOR_SIMILARITY_FUNCTION.isEnabled()) {
5758
params.add(new Object[] { "v_l2_norm", (SimilarityEvaluatorFunction) L2Norm::calculateSimilarity });
5859
}
60+
if (EsqlCapabilities.Cap.HAMMING_VECTOR_SIMILARITY_FUNCTION.isEnabled()) {
61+
params.add(new Object[] { "v_hamming", (SimilarityEvaluatorFunction) Hamming::calculateSimilarity });
62+
}
5963

6064
return params;
6165
}

0 commit comments

Comments
 (0)