Skip to content

Commit 1be0364

Browse files
committed
Change verification process, so ToDenseVector is not used but direct literal translation is done
1 parent b67e121 commit 1be0364

File tree

13 files changed

+165
-58
lines changed

13 files changed

+165
-58
lines changed

x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ required_capability: knn_function_v3
308308
required_capability: to_dense_vector_function
309309

310310
from colors metadata _score
311-
| eval query = [0, 120, 0]
311+
| eval query = to_dense_vector([0, 120, 0])
312312
| where knn(rgb_vector, query, 10)
313313
| sort _score desc, color asc
314314
| keep color, rgb_vector
@@ -333,7 +333,6 @@ required_capability: knn_function_v3
333333
required_capability: to_dense_vector_function
334334

335335
from colors metadata _score
336-
| eval query = [0, 120, 0]
337336
| where knn(rgb_vector, "007800", 10)
338337
| sort _score desc, color asc
339338
| keep color, rgb_vector

x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-cosine-similarity.csv-spec

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -94,12 +94,12 @@ similarityWithRow
9494
required_capability: cosine_vector_similarity_function
9595
required_capability: to_dense_vector_function
9696

97-
row vector = [1, 2, 3]
97+
row vector = to_dense_vector([1, 2, 3])
9898
| eval similarity = round(v_cosine(vector, [0, 1, 2]), 3)
9999
;
100100

101-
vector: integer | similarity:double
102-
[1, 2, 3] | 0.978
101+
vector: dense_vector | similarity:double
102+
[1.0, 2.0, 3.0] | 0.978
103103
;
104104

105105
similarityWithVectorField
@@ -108,7 +108,7 @@ required_capability: to_dense_vector_function
108108

109109
from colors
110110
| where color != "black"
111-
| eval query = [0, 255, 255]
111+
| eval query = to_dense_vector([0, 255, 255])
112112
| eval similarity = v_cosine(rgb_vector, query)
113113
| sort similarity desc, color asc
114114
| limit 10

x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-dot-product.csv-spec

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -92,20 +92,20 @@ similarityWithRow
9292
required_capability: dot_product_vector_similarity_function
9393
required_capability: to_dense_vector_function
9494

95-
row vector = [1, 2, 3]
95+
row vector = to_dense_vector([1, 2, 3])
9696
| eval similarity = round(v_dot_product(vector, [0, 1, 2]), 3)
9797
;
9898

99-
vector: integer | similarity:double
100-
[1, 2, 3] | 4.5
99+
vector: dense_vector | similarity:double
100+
[1.0, 2.0, 3.0] | 4.5
101101
;
102102

103103
similarityWithVectorField
104104
required_capability: dot_product_vector_similarity_function
105105
required_capability: to_dense_vector_function
106106

107107
from colors
108-
| eval query = [0, 255, 255]
108+
| eval query = to_dense_vector([0, 255, 255])
109109
| eval similarity = v_dot_product(rgb_vector, query)
110110
| sort similarity desc, color asc
111111
| limit 10

x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-hamming.csv-spec

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -91,20 +91,20 @@ similarityWithRow
9191
required_capability: hamming_vector_similarity_function
9292
required_capability: to_dense_vector_function
9393

94-
row vector = [1, 2, 3]
94+
row vector = to_dense_vector([1, 2, 3])
9595
| eval similarity = round(v_hamming(vector, [0, 1, 2]), 3)
9696
;
9797

98-
vector: integer | similarity:double
99-
[1, 2, 3] | 4.0
98+
vector: dense_vector | similarity:double
99+
[1.0, 2.0, 3.0] | 4.0
100100
;
101101

102102
similarityWithVectorField
103103
required_capability: hamming_vector_similarity_function
104104
required_capability: to_dense_vector_function
105105

106106
from colors
107-
| eval query = [0, 255, 255]
107+
| eval query = to_dense_vector([0, 255, 255])
108108
| eval similarity = v_hamming(rgb_vector, query)
109109
| sort similarity desc, color asc
110110
| limit 10

x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-l1-norm.csv-spec

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -91,20 +91,20 @@ similarityWithRow
9191
required_capability: l1_norm_vector_similarity_function
9292
required_capability: to_dense_vector_function
9393

94-
row vector = [1, 2, 3]
94+
row vector = to_dense_vector([1, 2, 3])
9595
| eval similarity = round(v_l1_norm(vector, [0, 1, 2]), 3)
9696
;
9797

98-
vector: integer | similarity:double
99-
[1, 2, 3] | 3.0
98+
vector: dense_vector | similarity:double
99+
[1.0, 2.0, 3.0] | 3.0
100100
;
101101

102102
similarityWithVectorField
103103
required_capability: l1_norm_vector_similarity_function
104104
required_capability: to_dense_vector_function
105105

106106
from colors
107-
| eval query = [0, 255, 255]
107+
| eval query = to_dense_vector([0, 255, 255])
108108
| eval similarity = v_l1_norm(rgb_vector, query)
109109
| sort similarity desc, color asc
110110
| limit 10

x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-l2-norm.csv-spec

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -91,20 +91,20 @@ similarityWithRow
9191
required_capability: l2_norm_vector_similarity_function
9292
required_capability: to_dense_vector_function
9393

94-
row vector = [1, 2, 3]
94+
row vector = to_dense_vector([1, 2, 3])
9595
| eval similarity = round(v_l2_norm(vector, [0, 1, 2]), 3)
9696
;
9797

98-
vector: integer | similarity:double
99-
[1, 2, 3] | 1.732
98+
vector: dense_vector | similarity:double
99+
[1.0, 2.0, 3.0] | 1.732
100100
;
101101

102102
similarityWithVectorField
103103
required_capability: l2_norm_vector_similarity_function
104104
required_capability: to_dense_vector_function
105105

106106
from colors
107-
| eval query = [0, 255, 255]
107+
| eval query = to_dense_vector([0, 255, 255])
108108
| eval similarity = v_l2_norm(rgb_vector, query)
109109
| sort similarity desc, color asc
110110
| limit 10

x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-magnitude.csv-spec

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,10 +90,10 @@ magnitudeWithRow
9090
required_capability: magnitude_scalar_vector_function
9191
required_capability: to_dense_vector_function
9292

93-
row vector = [1, 2, 3]
93+
row vector = to_dense_vector([1, 2, 3])
9494
| eval magnitude = round(v_magnitude(vector), 3)
9595
;
9696

97-
vector: integer | magnitude:double
98-
[1, 2, 3] | 3.742
97+
vector: dense_vector | magnitude:double
98+
[1.0, 2.0, 3.0] | 3.742
9999
;

x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,23 @@ public void testKnnWithLookupJoin() {
191191
);
192192
}
193193

194+
195+
public void testKnnIncorrectCasting() {
196+
var query = String.format(Locale.ROOT, """
197+
FROM test
198+
| WHERE KNN(vector, "notcorrect", 5)
199+
""");
200+
201+
var error = expectThrows(VerificationException.class, () -> run(query));
202+
assertThat(
203+
error.getMessage(),
204+
containsString(
205+
"line 3:13: [KNN] function cannot operate on [lookup_vector], supplied by an index [test_lookup] in non-STANDARD "
206+
+ "mode [lookup]"
207+
)
208+
);
209+
}
210+
194211
@Before
195212
public void setup() throws IOException {
196213
assumeTrue("Needs KNN support", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled());

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@
7777
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.FromAggregateMetricDouble;
7878
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToAggregateMetricDouble;
7979
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDateNanos;
80-
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDenseVector;
8180
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDouble;
8281
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToInteger;
8382
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToLong;
@@ -1686,21 +1685,28 @@ private static Expression processVectorFunction(org.elasticsearch.xpack.esql.cor
16861685
floatVector = ((List<Number>) folded).stream().map(Number::floatValue).collect(Collectors.toList());
16871686
}
16881687
} else if (folded instanceof BytesRef hexString && arg.dataType() == KEYWORD) {
1689-
byte[] bytes = HexFormat.of().parseHex(hexString.utf8ToString());
1690-
floatVector = new ArrayList<>();
1691-
for (byte value : bytes) {
1692-
floatVector.add((float) value);
1688+
try {
1689+
byte[] bytes = HexFormat.of().parseHex(hexString.utf8ToString());
1690+
floatVector = new ArrayList<>();
1691+
for (byte value : bytes) {
1692+
floatVector.add((float) value);
1693+
}
1694+
} catch (IllegalArgumentException e) {
1695+
throw new VerificationException(
1696+
"Error in ["
1697+
+ vectorFunction.sourceText()
1698+
+ "] for argument ["
1699+
+ arg.sourceText()
1700+
+ "]; dense_vectors must be a hex-encoded string: "
1701+
+ e.getMessage()
1702+
);
16931703
}
16941704
}
16951705
if (floatVector != null) {
16961706
Literal denseVector = new Literal(arg.source(), floatVector, DataType.DENSE_VECTOR);
16971707
newArgs.add(denseVector);
16981708
continue;
16991709
}
1700-
} else if ((arg instanceof ToDenseVector == false) && (arg.dataType().isNumeric() || arg.dataType() == KEYWORD)) {
1701-
// add casting function if it's not already there
1702-
newArgs.add(new ToDenseVector(arg.source(), arg));
1703-
continue;
17041710
}
17051711
}
17061712
newArgs.add(arg);

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVector.java

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,29 +7,37 @@
77

88
package org.elasticsearch.xpack.esql.expression.function.scalar.convert;
99

10+
import org.apache.lucene.util.BytesRef;
1011
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
1112
import org.elasticsearch.common.io.stream.StreamInput;
1213
import org.elasticsearch.compute.ann.ConvertEvaluator;
14+
import org.elasticsearch.xpack.esql.capabilities.PostAnalysisPlanVerificationAware;
1315
import org.elasticsearch.xpack.esql.capabilities.PostAnalysisVerificationAware;
16+
import org.elasticsearch.xpack.esql.capabilities.PostOptimizationVerificationAware;
17+
import org.elasticsearch.xpack.esql.common.Failure;
18+
import org.elasticsearch.xpack.esql.common.Failures;
1419
import org.elasticsearch.xpack.esql.core.expression.Expression;
20+
import org.elasticsearch.xpack.esql.core.expression.FoldContext;
1521
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
1622
import org.elasticsearch.xpack.esql.core.tree.Source;
1723
import org.elasticsearch.xpack.esql.core.type.DataType;
1824
import org.elasticsearch.xpack.esql.expression.function.Example;
1925
import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
2026
import org.elasticsearch.xpack.esql.expression.function.Param;
27+
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
2128

2229
import java.io.IOException;
2330
import java.util.List;
2431
import java.util.Map;
32+
import java.util.function.BiConsumer;
2533

2634
import static org.elasticsearch.xpack.esql.core.type.DataType.DENSE_VECTOR;
2735
import static org.elasticsearch.xpack.esql.core.type.DataType.DOUBLE;
2836
import static org.elasticsearch.xpack.esql.core.type.DataType.INTEGER;
2937
import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD;
3038
import static org.elasticsearch.xpack.esql.core.type.DataType.LONG;
3139

32-
public class ToDenseVector extends AbstractConvertFunction implements PostAnalysisVerificationAware {
40+
public class ToDenseVector extends AbstractConvertFunction implements PostAnalysisPlanVerificationAware {
3341
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(
3442
Expression.class,
3543
"ToDenseVector",
@@ -60,6 +68,40 @@ public ToDenseVector(
6068
super(source, field);
6169
}
6270

71+
@Override
72+
public BiConsumer<LogicalPlan, Failures> postAnalysisPlanVerification() {
73+
return (lp, failures) -> {
74+
Expression arg = children().get(0);
75+
if (arg.foldable()) {
76+
Object fold = arg.fold(FoldContext.small());
77+
if ((fold instanceof List<?> list) && arg.dataType().isNumeric()) {
78+
if (list.size() <= 1) {
79+
failures.add(Failure.fail(
80+
this,
81+
"[" + sourceText() + "] requires at least two values to convert to a dense_vector"
82+
));
83+
}
84+
return;
85+
}
86+
if ((arg.dataType() == KEYWORD) && fold instanceof BytesRef bytesRef) {
87+
if (bytesRef.length == 0) {
88+
failures.add(Failure.fail(
89+
this,
90+
"["
91+
+ sourceText()
92+
+ "] must be a non-empty hexadecimal string"));
93+
}
94+
return;
95+
}
96+
failures.add(Failure.fail(
97+
this,
98+
"["
99+
+ sourceText()
100+
+ "] must be a multi-valued input of numbers or an hexadecimal string"));
101+
}
102+
};
103+
}
104+
63105
private ToDenseVector(StreamInput in) throws IOException {
64106
super(in);
65107
}

0 commit comments

Comments
 (0)