Skip to content

Commit d8c455c

Browse files
authored
ES|QL - Remove vectors from _source when applicable (#138013)
1 parent 70a79bc commit d8c455c

File tree

3 files changed

+43
-2
lines changed

3 files changed

+43
-2
lines changed

docs/changelog/138013.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 138013
2+
summary: ES|QL - Remove vectors from `_source` when applicable
3+
area: "ES|QL"
4+
type: enhancement
5+
issues: []

x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/DenseVectorFieldTypeIT.java

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import org.elasticsearch.xcontent.XContentBuilder;
2121
import org.elasticsearch.xcontent.XContentFactory;
2222
import org.elasticsearch.xpack.esql.action.AbstractEsqlIntegTestCase;
23+
import org.hamcrest.Matchers;
2324
import org.junit.Before;
2425

2526
import java.io.IOException;
@@ -40,6 +41,7 @@
4041
import static org.elasticsearch.index.mapper.SourceFieldMapper.Mode.SYNTHETIC;
4142
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
4243
import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.L2_NORM_VECTOR_SIMILARITY_FUNCTION;
44+
import static org.hamcrest.Matchers.hasKey;
4345

4446
public class DenseVectorFieldTypeIT extends AbstractEsqlIntegTestCase {
4547

@@ -68,7 +70,6 @@ private enum VectorSourceOptions {
6870
@ParametersFactory
6971
public static Iterable<Object[]> parameters() throws Exception {
7072
List<Object[]> params = new ArrayList<>();
71-
7273
for (ElementType elementType : List.of(ElementType.BYTE, ElementType.FLOAT, ElementType.BIT)) {
7374
// Test all similarities
7475
for (DenseVectorFieldMapper.VectorSimilarity similarity : DenseVectorFieldMapper.VectorSimilarity.values()) {
@@ -156,7 +157,6 @@ public void testRetrieveDenseVectorFieldData() {
156157
try (var resp = run(query)) {
157158
List<List<Object>> valuesList = EsqlTestUtils.getValuesList(resp);
158159
assertEquals(valuesList.size(), indexedVectors.size());
159-
// print all values for debugging
160160
valuesList.forEach(value -> {
161161
assertEquals(2, value.size());
162162
Integer id = (Integer) value.get(0);
@@ -180,6 +180,33 @@ public void testRetrieveDenseVectorFieldData() {
180180
}
181181
}
182182

183+
@SuppressWarnings("unchecked")
184+
public void testDenseVectorsIncludedInSource() {
185+
var query = """
186+
FROM test METADATA _source
187+
| KEEP _source
188+
""";
189+
190+
try (var resp = run(query)) {
191+
List<List<Object>> valuesList = EsqlTestUtils.getValuesList(resp);
192+
assertEquals(valuesList.size(), indexedVectors.size());
193+
valuesList.forEach(value -> {
194+
assertEquals(1, value.size());
195+
Map<String, Object> source = (Map<String, Object>) value.get(0);
196+
assertThat(source, hasKey("id"));
197+
assertNotNull(source.get("id"));
198+
Integer id = Integer.valueOf(source.get("id").toString());
199+
// Vectors should be in _source if they are included in the index settings, and the vector is not null
200+
if (sourceOptions == VectorSourceOptions.INCLUDE_SOURCE_VECTORS && indexedVectors.get(id) != null) {
201+
assertThat(source, hasKey("vector"));
202+
assertNotNull(source.get("vector"));
203+
} else {
204+
assertThat(source, Matchers.aMapWithSize(1));
205+
}
206+
});
207+
}
208+
}
209+
183210
public void testNonIndexedDenseVectorField() throws IOException {
184211
createIndexWithDenseVector("no_dense_vectors", 64);
185212

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
import org.elasticsearch.logging.LogManager;
6262
import org.elasticsearch.logging.Logger;
6363
import org.elasticsearch.search.fetch.StoredFieldsSpec;
64+
import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
6465
import org.elasticsearch.search.internal.AliasFilter;
6566
import org.elasticsearch.search.lookup.SearchLookup;
6667
import org.elasticsearch.search.lookup.SourceFilter;
@@ -97,6 +98,7 @@
9798

9899
import static org.elasticsearch.common.lucene.search.Queries.newNonNestedFilter;
99100
import static org.elasticsearch.compute.lucene.LuceneSourceOperator.NO_LIMIT;
101+
import static org.elasticsearch.index.get.ShardGetService.maybeExcludeVectorFields;
100102

101103
public class EsPhysicalOperationProviders extends AbstractPhysicalOperationProviders {
102104
private static final Logger logger = LogManager.getLogger(EsPhysicalOperationProviders.class);
@@ -464,6 +466,13 @@ public String shardIdentifier() {
464466
@Override
465467
public SourceLoader newSourceLoader(Set<String> sourcePaths) {
466468
var filter = sourcePaths != null ? new SourceFilter(sourcePaths.toArray(new String[0]), null) : null;
469+
// Apply vector exclusion logic similar to ShardGetService
470+
var fetchSourceContext = filter != null ? FetchSourceContext.of(true, null, filter.getIncludes(), filter.getExcludes()) : null;
471+
var result = maybeExcludeVectorFields(ctx.getMappingLookup(), ctx.getIndexSettings(), fetchSourceContext, null);
472+
var vectorFilter = result.v2();
473+
if (vectorFilter != null) {
474+
filter = vectorFilter;
475+
}
467476
return ctx.newSourceLoader(filter, false);
468477
}
469478

0 commit comments

Comments
 (0)