Skip to content

Commit 1a3bb97

Browse files
Adding support to exclude semantic_text subfields
1 parent 4d907ce commit 1a3bb97

File tree

3 files changed

+76
-1
lines changed

3 files changed

+76
-1
lines changed

server/src/main/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesFetcher.java

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,11 @@
1414
import org.elasticsearch.core.Nullable;
1515
import org.elasticsearch.index.IndexService;
1616
import org.elasticsearch.index.engine.Engine;
17+
import org.elasticsearch.index.mapper.KeywordFieldMapper;
1718
import org.elasticsearch.index.mapper.MappedFieldType;
1819
import org.elasticsearch.index.mapper.RuntimeField;
20+
import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
21+
import org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper;
1922
import org.elasticsearch.index.query.MatchAllQueryBuilder;
2023
import org.elasticsearch.index.query.QueryBuilder;
2124
import org.elasticsearch.index.query.SearchExecutionContext;
@@ -149,6 +152,18 @@ private FieldCapabilitiesIndexResponse doFetch(
149152
return new FieldCapabilitiesIndexResponse(shardId.getIndexName(), indexMappingHash, responseMap, true, indexMode);
150153
}
151154

155+
/**
156+
* Returns true if the field should be excluded from the field capabilities response.
157+
* This is used to exclude fields that are not useful for the user, such as
158+
* offset_source and inference chunk embeddings.
159+
*/
160+
private static boolean shouldExcludeField(MappedFieldType ft) {
161+
return ft.typeName().equals("offset_source")
162+
|| ((ft instanceof SparseVectorFieldMapper.SparseVectorFieldType
163+
|| ft instanceof DenseVectorFieldMapper.DenseVectorFieldType
164+
|| ft instanceof KeywordFieldMapper.KeywordFieldType) && ft.name().contains(".inference.chunks"));
165+
}
166+
152167
static Map<String, IndexFieldCapabilities> retrieveFieldCaps(
153168
SearchExecutionContext context,
154169
Predicate<String> fieldNameFilter,
@@ -173,7 +188,8 @@ static Map<String, IndexFieldCapabilities> retrieveFieldCaps(
173188
MappedFieldType ft = entry.getValue();
174189
if ((includeEmptyFields || ft.fieldHasValue(fieldInfos))
175190
&& (fieldPredicate.test(ft.name()) || context.isMetadataField(ft.name()))
176-
&& (filter == null || filter.test(ft))) {
191+
&& (filter == null || filter.test(ft))
192+
&& shouldExcludeField(ft) == false) {
177193
IndexFieldCapabilities fieldCap = new IndexFieldCapabilities(
178194
field,
179195
ft.familyTypeName(),

x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping.yml

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,26 @@ setup:
148148
- not_exists: fields.dense_field
149149
- match: { fields.sparse_field.text.searchable: true }
150150

151+
---
152+
"Field caps exclude chunks and embedding fields":
153+
- requires:
154+
cluster_features: "gte_v8.16.0"
155+
reason: field_caps support for semantic_text added in 8.16.0
156+
157+
- do:
158+
field_caps:
159+
include_empty_fields: true
160+
index: test-index
161+
fields: "*"
162+
163+
- match: { indices: [ "test-index" ] }
164+
- exists: fields.sparse_field
165+
- exists: fields.dense_field
166+
- not_exists: fields.sparse_field.chunks.embeddings
167+
- not_exists: fields.sparse_field.chunks.embeddings.offsets
168+
- not_exists: fields.dense_field.chunks.embeddings
169+
- not_exists: fields.dense_field.chunks.embeddings.offsets
170+
151171
---
152172
"Indexes dense vector document":
153173
# Checks mapping is not updated until first doc arrives
@@ -359,3 +379,23 @@ setup:
359379
index: test-always-include-inference-id-index
360380

361381
- exists: test-always-include-inference-id-index.mappings.properties.semantic_field.inference_id
382+
383+
---
384+
"Field caps exclude chunks and embedding fields":
385+
- requires:
386+
cluster_features: "gte_v8.16.0"
387+
reason: field_caps support for semantic_text added in 8.16.0
388+
389+
- do:
390+
field_caps:
391+
include_empty_fields: true
392+
index: test-index
393+
fields: "*"
394+
395+
- match: { indices: [ "test-index" ] }
396+
- exists: fields.sparse_field
397+
- exists: fields.dense_field
398+
- not_exists: fields.sparse_field.chunks.embeddings
399+
- not_exists: fields.sparse_field.chunks.offset
400+
- not_exists: fields.dense_field.chunks.embeddings
401+
- not_exists: fields.dense_field.chunks.offset

x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping_bwc.yml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,3 +307,22 @@ setup:
307307
another_field:
308308
type: keyword
309309

310+
---
311+
"Field caps exclude chunks embedding and text fields":
312+
- requires:
313+
cluster_features: "gte_v8.16.0"
314+
reason: field_caps support for semantic_text added in 8.16.0
315+
316+
- do:
317+
field_caps:
318+
include_empty_fields: true
319+
index: test-index
320+
fields: "*"
321+
322+
- match: { indices: [ "test-index" ] }
323+
- exists: fields.sparse_field
324+
- exists: fields.dense_field
325+
- not_exists: fields.sparse_field.inference.chunks.embeddings
326+
- not_exists: fields.sparse_field.inference.chunks.text
327+
- not_exists: fields.dense_field.inference.chunks.embeddings
328+
- not_exists: fields.dense_field.inference.chunks.text

0 commit comments

Comments
 (0)