Skip to content

Commit 132350b

Browse files
Initial native synthetic source for counted_keyword fields (#120078) (#120541)
Natively support synthetic source for the counted_keyword field type if the "synthetic_source_keep" mapping attribute is "none". Right now we don't have the logic set up to get the correct value of synthetic_source_keep if the value is inherited. Until we get that set up, we can only confidently use the doc_values implementation of synthetic_source if the synthetic_source_keep is explicitly set to "none" in the mapping parameters.
1 parent 8e00d7e commit 132350b

File tree

4 files changed

+336
-5
lines changed

4 files changed

+336
-5
lines changed

server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,10 @@ public Set<NodeFeature> getFeatures() {
5555
"mapper.constant_keyword.synthetic_source_write_fix"
5656
);
5757

58+
public static final NodeFeature COUNTED_KEYWORD_SYNTHETIC_SOURCE_NATIVE_SUPPORT = new NodeFeature(
59+
"mapper.counted_keyword.synthetic_source_native_support"
60+
);
61+
5862
public static final NodeFeature META_FETCH_FIELDS_ERROR_CODE_CHANGED = new NodeFeature("meta_fetch_fields_error_code_changed");
5963
public static final NodeFeature SPARSE_VECTOR_STORE_SUPPORT = new NodeFeature("mapper.sparse_vector.store_support");
6064

@@ -71,6 +75,7 @@ public Set<NodeFeature> getTestFeatures() {
7175
CONSTANT_KEYWORD_SYNTHETIC_SOURCE_WRITE_FIX,
7276
META_FETCH_FIELDS_ERROR_CODE_CHANGED,
7377
SPARSE_VECTOR_STORE_SUPPORT,
78+
COUNTED_KEYWORD_SYNTHETIC_SOURCE_NATIVE_SUPPORT,
7479
SourceFieldMapper.SYNTHETIC_RECOVERY_SOURCE,
7580
ObjectMapper.SUBOBJECTS_FALSE_MAPPING_UPDATE_FIX
7681
);

x-pack/plugin/mapper-counted-keyword/src/main/java/org/elasticsearch/xpack/countedkeyword/CountedKeywordFieldMapper.java

Lines changed: 106 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,13 @@
1313
import org.apache.lucene.index.DocValues;
1414
import org.apache.lucene.index.DocValuesType;
1515
import org.apache.lucene.index.IndexOptions;
16+
import org.apache.lucene.index.LeafReader;
1617
import org.apache.lucene.index.LeafReaderContext;
1718
import org.apache.lucene.index.SortedSetDocValues;
1819
import org.apache.lucene.index.TermsEnum;
1920
import org.apache.lucene.search.SortField;
2021
import org.apache.lucene.util.BytesRef;
22+
import org.elasticsearch.common.bytes.BytesArray;
2123
import org.elasticsearch.common.io.stream.ByteArrayStreamInput;
2224
import org.elasticsearch.common.io.stream.BytesStreamOutput;
2325
import org.elasticsearch.common.util.BigArrays;
@@ -35,6 +37,7 @@
3537
import org.elasticsearch.index.mapper.MappedFieldType;
3638
import org.elasticsearch.index.mapper.Mapper;
3739
import org.elasticsearch.index.mapper.MapperBuilderContext;
40+
import org.elasticsearch.index.mapper.SourceLoader;
3841
import org.elasticsearch.index.mapper.SourceValueFetcher;
3942
import org.elasticsearch.index.mapper.StringFieldType;
4043
import org.elasticsearch.index.mapper.TextSearchInfo;
@@ -46,6 +49,7 @@
4649
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
4750
import org.elasticsearch.search.sort.BucketedSort;
4851
import org.elasticsearch.search.sort.SortOrder;
52+
import org.elasticsearch.xcontent.XContentBuilder;
4953
import org.elasticsearch.xcontent.XContentParser;
5054

5155
import java.io.IOException;
@@ -72,7 +76,8 @@
7276
* 2 for each key (one per document), a <code>counted_terms</code> aggregation on a <code>counted_keyword</code> field will consider
7377
* the actual count and report a count of 3 for each key.</p>
7478
*
75-
* <p>Only regular source is supported; synthetic source won't work.</p>
79+
* <p>Synthetic source is supported, but uses the fallback "ignore source" infrastructure unless the <code>source_keep_mode</code> is
80+
* explicitly set to <code>none</code> in the field mapping parameters.</p>
7681
*/
7782
public class CountedKeywordFieldMapper extends FieldMapper {
7883
public static final String CONTENT_TYPE = "counted_keyword";
@@ -309,6 +314,81 @@ public FieldMapper build(MapperBuilderContext context) {
309314
}
310315
}
311316

317+
private static class CountedKeywordFieldSyntheticSourceLoader extends SourceLoader.DocValuesBasedSyntheticFieldLoader {
318+
private final String keywordsFieldName;
319+
private final String countsFieldName;
320+
private final String leafName;
321+
322+
private SortedSetDocValues keywordsReader;
323+
private BinaryDocValues countsReader;
324+
private boolean hasValue;
325+
326+
CountedKeywordFieldSyntheticSourceLoader(String keywordsFieldName, String countsFieldName, String leafName) {
327+
this.keywordsFieldName = keywordsFieldName;
328+
this.countsFieldName = countsFieldName;
329+
this.leafName = leafName;
330+
}
331+
332+
@Override
333+
public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException {
334+
keywordsReader = leafReader.getSortedSetDocValues(keywordsFieldName);
335+
countsReader = leafReader.getBinaryDocValues(countsFieldName);
336+
337+
if (keywordsReader == null || countsReader == null) {
338+
return null;
339+
}
340+
341+
return docId -> {
342+
hasValue = keywordsReader.advanceExact(docId);
343+
if (hasValue == false) {
344+
return false;
345+
}
346+
347+
boolean countsHasValue = countsReader.advanceExact(docId);
348+
assert countsHasValue;
349+
350+
return true;
351+
};
352+
}
353+
354+
@Override
355+
public boolean hasValue() {
356+
return hasValue;
357+
}
358+
359+
@Override
360+
public void write(XContentBuilder b) throws IOException {
361+
if (hasValue == false) {
362+
return;
363+
}
364+
365+
int[] counts = new BytesArray(countsReader.binaryValue()).streamInput().readVIntArray();
366+
boolean singleValue = counts.length == 1 && counts[0] == 1;
367+
368+
if (singleValue) {
369+
b.field(leafName);
370+
} else {
371+
b.startArray(leafName);
372+
}
373+
374+
for (int i = 0; i < keywordsReader.docValueCount(); i++) {
375+
BytesRef currKeyword = keywordsReader.lookupOrd(keywordsReader.nextOrd());
376+
for (int j = 0; j < counts[i]; j++) {
377+
b.utf8Value(currKeyword.bytes, currKeyword.offset, currKeyword.length);
378+
}
379+
}
380+
381+
if (singleValue == false) {
382+
b.endArray();
383+
}
384+
}
385+
386+
@Override
387+
public String fieldName() {
388+
return keywordsFieldName;
389+
}
390+
}
391+
312392
public static TypeParser PARSER = new TypeParser((n, c) -> new CountedKeywordFieldMapper.Builder(n));
313393

314394
private final FieldType fieldType;
@@ -345,6 +425,11 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
345425
} else {
346426
throw new IllegalArgumentException("Encountered unexpected token [" + parser.currentToken() + "].");
347427
}
428+
429+
if (values.isEmpty()) {
430+
return;
431+
}
432+
348433
int i = 0;
349434
int[] counts = new int[values.size()];
350435
for (Map.Entry<String, Integer> value : values.entrySet()) {
@@ -358,13 +443,18 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
358443

359444
private void parseArray(DocumentParserContext context, SortedMap<String, Integer> values) throws IOException {
360445
XContentParser parser = context.parser();
446+
int arrDepth = 1;
361447
while (true) {
362448
XContentParser.Token token = parser.nextToken();
363449
if (token == XContentParser.Token.END_ARRAY) {
364-
return;
365-
}
366-
if (token == XContentParser.Token.VALUE_STRING) {
450+
arrDepth -= 1;
451+
if (arrDepth <= 0) {
452+
return;
453+
}
454+
} else if (token == XContentParser.Token.VALUE_STRING) {
367455
parseValue(parser, values);
456+
} else if (token == XContentParser.Token.START_ARRAY) {
457+
arrDepth += 1;
368458
} else if (token == XContentParser.Token.VALUE_NULL) {
369459
// ignore null values
370460
} else {
@@ -402,4 +492,16 @@ public FieldMapper.Builder getMergeBuilder() {
402492
protected String contentType() {
403493
return CONTENT_TYPE;
404494
}
495+
496+
@Override
497+
protected SyntheticSourceSupport syntheticSourceSupport() {
498+
var keepMode = sourceKeepMode();
499+
if (keepMode.isPresent() == false || keepMode.get() != SourceKeepMode.NONE) {
500+
return super.syntheticSourceSupport();
501+
}
502+
503+
var loader = new CountedKeywordFieldSyntheticSourceLoader(fullPath(), countFieldMapper.fullPath(), leafName());
504+
return new SyntheticSourceSupport.Native(loader);
505+
}
506+
405507
}

x-pack/plugin/mapper-counted-keyword/src/test/java/org/elasticsearch/xpack/countedkeyword/CountedKeywordFieldMapperTests.java

Lines changed: 102 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,25 @@
1010
import org.apache.lucene.index.DocValuesType;
1111
import org.apache.lucene.index.IndexOptions;
1212
import org.apache.lucene.index.IndexableField;
13+
import org.elasticsearch.core.CheckedConsumer;
14+
import org.elasticsearch.core.Tuple;
1315
import org.elasticsearch.index.mapper.DocumentMapper;
1416
import org.elasticsearch.index.mapper.MappedFieldType;
1517
import org.elasticsearch.index.mapper.MapperTestCase;
1618
import org.elasticsearch.index.mapper.ParsedDocument;
1719
import org.elasticsearch.plugins.Plugin;
20+
import org.elasticsearch.search.lookup.SourceFilter;
21+
import org.elasticsearch.test.ESTestCase;
1822
import org.elasticsearch.xcontent.XContentBuilder;
1923
import org.junit.AssumptionViolatedException;
2024

2125
import java.io.IOException;
2226
import java.util.Collection;
2327
import java.util.Collections;
2428
import java.util.List;
29+
import java.util.stream.Stream;
30+
31+
import static org.hamcrest.Matchers.equalTo;
2532

2633
public class CountedKeywordFieldMapperTests extends MapperTestCase {
2734
@Override
@@ -64,9 +71,103 @@ protected Object generateRandomInputValue(MappedFieldType ft) {
6471
return randomBoolean() ? null : randomAlphaOfLengthBetween(1, 10);
6572
}
6673

74+
public void testSyntheticSourceSingleNullValue() throws IOException {
75+
DocumentMapper mapper = createSytheticSourceMapperService(mapping(b -> {
76+
b.startObject("field");
77+
minimalMapping(b);
78+
b.field("synthetic_source_keep", "none");
79+
b.endObject();
80+
})).documentMapper();
81+
82+
String expected = "{}";
83+
CheckedConsumer<XContentBuilder, IOException> buildInput = b -> {
84+
b.field("field");
85+
b.nullValue();
86+
};
87+
88+
assertThat(syntheticSource(mapper, buildInput), equalTo(expected));
89+
assertThat(syntheticSource(mapper, new SourceFilter(new String[] { "field" }, null), buildInput), equalTo(expected));
90+
assertThat(syntheticSource(mapper, new SourceFilter(null, new String[] { "field" }), buildInput), equalTo("{}"));
91+
}
92+
93+
public void testSyntheticSourceManyNullValue() throws IOException {
94+
DocumentMapper mapper = createSytheticSourceMapperService(mapping(b -> {
95+
b.startObject("field");
96+
minimalMapping(b);
97+
b.field("synthetic_source_keep", "none");
98+
b.endObject();
99+
})).documentMapper();
100+
101+
int nullCount = randomIntBetween(1, 5);
102+
103+
String expected = "{}";
104+
CheckedConsumer<XContentBuilder, IOException> buildInput = b -> {
105+
b.startArray("field");
106+
for (int i = 0; i < nullCount; i++) {
107+
b.nullValue();
108+
}
109+
b.endArray();
110+
};
111+
112+
assertThat(syntheticSource(mapper, buildInput), equalTo(expected));
113+
assertThat(syntheticSource(mapper, new SourceFilter(new String[] { "field" }, null), buildInput), equalTo(expected));
114+
assertThat(syntheticSource(mapper, new SourceFilter(null, new String[] { "field" }), buildInput), equalTo("{}"));
115+
}
116+
117+
@Override
118+
public void testSyntheticSourceKeepAll() throws IOException {
119+
// For now, native synthetic source is only supported when "synthetic_source_keep" mapping attribute is "none"
120+
}
121+
122+
@Override
123+
public void testSyntheticSourceKeepArrays() throws IOException {
124+
// For now, native synthetic source is only supported when "synthetic_source_keep" mapping attribute is "none"
125+
}
126+
127+
@Override
128+
public void testSyntheticSourceKeepNone() throws IOException {
129+
// For now, native synthetic source is only supported when "synthetic_source_keep" mapping attribute is "none"
130+
}
131+
67132
@Override
68133
protected SyntheticSourceSupport syntheticSourceSupport(boolean ignoreMalformed) {
69-
throw new AssumptionViolatedException("not supported");
134+
return new SyntheticSourceSupport() {
135+
@Override
136+
public SyntheticSourceExample example(int maxValues) throws IOException {
137+
if (randomBoolean()) {
138+
Tuple<String, String> v = generateValue();
139+
return new SyntheticSourceExample(v.v1(), v.v2(), this::mapping);
140+
}
141+
int maxNullValues = 5;
142+
List<Tuple<String, String>> values = randomList(1, maxValues, this::generateValue);
143+
List<String> in = Stream.concat(values.stream().map(Tuple::v1), randomList(0, maxNullValues, () -> (String) null).stream())
144+
.toList();
145+
146+
in = shuffledList(in);
147+
148+
List<String> outList = values.stream().map(Tuple::v2).sorted().toList();
149+
150+
Object out = outList.size() == 1 ? outList.get(0) : outList;
151+
return new SyntheticSourceExample(in, out, this::mapping);
152+
}
153+
154+
private Tuple<String, String> generateValue() {
155+
String v = ESTestCase.randomAlphaOfLength(5);
156+
return Tuple.tuple(v, v);
157+
}
158+
159+
private void mapping(XContentBuilder b) throws IOException {
160+
minimalMapping(b);
161+
// For now, synthetic source is only supported when "synthetic_source_keep" is "none".
162+
// Once we implement true synthetic source support, we should remove this.
163+
b.field("synthetic_source_keep", "none");
164+
}
165+
166+
@Override
167+
public List<SyntheticSourceInvalidExample> invalidExample() throws IOException {
168+
return List.of();
169+
}
170+
};
70171
}
71172

72173
@Override

0 commit comments

Comments
 (0)