Skip to content

Commit aa6822e

Browse files
Initial native synthetic source for counted_keyword fields (#120078)
Natively support synthetic source for the counted_keyword field type if the "synthetic_source_keep" mapping attribute is "none". Right now we don't have the logic set up to get the correct value of synthetic_source_keep if the value is inherited. Until we get that set up, we can only confidently use the doc_values implementation of synthetic_source if the synthetic_source_keep is explicitly set to "none" in the mapping parameters.
1 parent 06e1621 commit aa6822e

File tree

4 files changed

+336
-5
lines changed

4 files changed

+336
-5
lines changed

server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@ public Set<NodeFeature> getFeatures() {
3232
"mapper.constant_keyword.synthetic_source_write_fix"
3333
);
3434

35+
public static final NodeFeature COUNTED_KEYWORD_SYNTHETIC_SOURCE_NATIVE_SUPPORT = new NodeFeature(
36+
"mapper.counted_keyword.synthetic_source_native_support"
37+
);
38+
3539
public static final NodeFeature META_FETCH_FIELDS_ERROR_CODE_CHANGED = new NodeFeature("meta_fetch_fields_error_code_changed");
3640
public static final NodeFeature SPARSE_VECTOR_STORE_SUPPORT = new NodeFeature("mapper.sparse_vector.store_support");
3741

@@ -49,6 +53,7 @@ public Set<NodeFeature> getTestFeatures() {
4953
CONSTANT_KEYWORD_SYNTHETIC_SOURCE_WRITE_FIX,
5054
META_FETCH_FIELDS_ERROR_CODE_CHANGED,
5155
SPARSE_VECTOR_STORE_SUPPORT,
56+
COUNTED_KEYWORD_SYNTHETIC_SOURCE_NATIVE_SUPPORT,
5257
SourceFieldMapper.SYNTHETIC_RECOVERY_SOURCE
5358
);
5459
}

x-pack/plugin/mapper-counted-keyword/src/main/java/org/elasticsearch/xpack/countedkeyword/CountedKeywordFieldMapper.java

Lines changed: 106 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,13 @@
1313
import org.apache.lucene.index.DocValues;
1414
import org.apache.lucene.index.DocValuesType;
1515
import org.apache.lucene.index.IndexOptions;
16+
import org.apache.lucene.index.LeafReader;
1617
import org.apache.lucene.index.LeafReaderContext;
1718
import org.apache.lucene.index.SortedSetDocValues;
1819
import org.apache.lucene.index.TermsEnum;
1920
import org.apache.lucene.search.SortField;
2021
import org.apache.lucene.util.BytesRef;
22+
import org.elasticsearch.common.bytes.BytesArray;
2123
import org.elasticsearch.common.io.stream.ByteArrayStreamInput;
2224
import org.elasticsearch.common.io.stream.BytesStreamOutput;
2325
import org.elasticsearch.common.util.BigArrays;
@@ -35,6 +37,7 @@
3537
import org.elasticsearch.index.mapper.MappedFieldType;
3638
import org.elasticsearch.index.mapper.Mapper;
3739
import org.elasticsearch.index.mapper.MapperBuilderContext;
40+
import org.elasticsearch.index.mapper.SourceLoader;
3841
import org.elasticsearch.index.mapper.SourceValueFetcher;
3942
import org.elasticsearch.index.mapper.StringFieldType;
4043
import org.elasticsearch.index.mapper.TextSearchInfo;
@@ -46,6 +49,7 @@
4649
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
4750
import org.elasticsearch.search.sort.BucketedSort;
4851
import org.elasticsearch.search.sort.SortOrder;
52+
import org.elasticsearch.xcontent.XContentBuilder;
4953
import org.elasticsearch.xcontent.XContentParser;
5054

5155
import java.io.IOException;
@@ -72,7 +76,8 @@
7276
* 2 for each key (one per document), a <code>counted_terms</code> aggregation on a <code>counted_keyword</code> field will consider
7377
* the actual count and report a count of 3 for each key.</p>
7478
*
75-
* <p>Only regular source is supported; synthetic source won't work.</p>
79+
* <p>Synthetic source is supported, but uses the fallback "ignore source" infrastructure unless the <code>source_keep_mode</code> is
80+
* explicitly set to <code>none</code> in the field mapping parameters.</p>
7681
*/
7782
public class CountedKeywordFieldMapper extends FieldMapper {
7883
public static final String CONTENT_TYPE = "counted_keyword";
@@ -306,6 +311,81 @@ public FieldMapper build(MapperBuilderContext context) {
306311
}
307312
}
308313

314+
private static class CountedKeywordFieldSyntheticSourceLoader extends SourceLoader.DocValuesBasedSyntheticFieldLoader {
315+
private final String keywordsFieldName;
316+
private final String countsFieldName;
317+
private final String leafName;
318+
319+
private SortedSetDocValues keywordsReader;
320+
private BinaryDocValues countsReader;
321+
private boolean hasValue;
322+
323+
CountedKeywordFieldSyntheticSourceLoader(String keywordsFieldName, String countsFieldName, String leafName) {
324+
this.keywordsFieldName = keywordsFieldName;
325+
this.countsFieldName = countsFieldName;
326+
this.leafName = leafName;
327+
}
328+
329+
@Override
330+
public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException {
331+
keywordsReader = leafReader.getSortedSetDocValues(keywordsFieldName);
332+
countsReader = leafReader.getBinaryDocValues(countsFieldName);
333+
334+
if (keywordsReader == null || countsReader == null) {
335+
return null;
336+
}
337+
338+
return docId -> {
339+
hasValue = keywordsReader.advanceExact(docId);
340+
if (hasValue == false) {
341+
return false;
342+
}
343+
344+
boolean countsHasValue = countsReader.advanceExact(docId);
345+
assert countsHasValue;
346+
347+
return true;
348+
};
349+
}
350+
351+
@Override
352+
public boolean hasValue() {
353+
return hasValue;
354+
}
355+
356+
@Override
357+
public void write(XContentBuilder b) throws IOException {
358+
if (hasValue == false) {
359+
return;
360+
}
361+
362+
int[] counts = new BytesArray(countsReader.binaryValue()).streamInput().readVIntArray();
363+
boolean singleValue = counts.length == 1 && counts[0] == 1;
364+
365+
if (singleValue) {
366+
b.field(leafName);
367+
} else {
368+
b.startArray(leafName);
369+
}
370+
371+
for (int i = 0; i < keywordsReader.docValueCount(); i++) {
372+
BytesRef currKeyword = keywordsReader.lookupOrd(keywordsReader.nextOrd());
373+
for (int j = 0; j < counts[i]; j++) {
374+
b.utf8Value(currKeyword.bytes, currKeyword.offset, currKeyword.length);
375+
}
376+
}
377+
378+
if (singleValue == false) {
379+
b.endArray();
380+
}
381+
}
382+
383+
@Override
384+
public String fieldName() {
385+
return keywordsFieldName;
386+
}
387+
}
388+
309389
public static TypeParser PARSER = new TypeParser((n, c) -> new CountedKeywordFieldMapper.Builder(n));
310390

311391
private final FieldType fieldType;
@@ -342,6 +422,11 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
342422
} else {
343423
throw new IllegalArgumentException("Encountered unexpected token [" + parser.currentToken() + "].");
344424
}
425+
426+
if (values.isEmpty()) {
427+
return;
428+
}
429+
345430
int i = 0;
346431
int[] counts = new int[values.size()];
347432
for (Map.Entry<String, Integer> value : values.entrySet()) {
@@ -355,13 +440,18 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
355440

356441
private void parseArray(DocumentParserContext context, SortedMap<String, Integer> values) throws IOException {
357442
XContentParser parser = context.parser();
443+
int arrDepth = 1;
358444
while (true) {
359445
XContentParser.Token token = parser.nextToken();
360446
if (token == XContentParser.Token.END_ARRAY) {
361-
return;
362-
}
363-
if (token == XContentParser.Token.VALUE_STRING) {
447+
arrDepth -= 1;
448+
if (arrDepth <= 0) {
449+
return;
450+
}
451+
} else if (token == XContentParser.Token.VALUE_STRING) {
364452
parseValue(parser, values);
453+
} else if (token == XContentParser.Token.START_ARRAY) {
454+
arrDepth += 1;
365455
} else if (token == XContentParser.Token.VALUE_NULL) {
366456
// ignore null values
367457
} else {
@@ -399,4 +489,16 @@ public FieldMapper.Builder getMergeBuilder() {
399489
protected String contentType() {
400490
return CONTENT_TYPE;
401491
}
492+
493+
@Override
494+
protected SyntheticSourceSupport syntheticSourceSupport() {
495+
var keepMode = sourceKeepMode();
496+
if (keepMode.isPresent() == false || keepMode.get() != SourceKeepMode.NONE) {
497+
return super.syntheticSourceSupport();
498+
}
499+
500+
var loader = new CountedKeywordFieldSyntheticSourceLoader(fullPath(), countFieldMapper.fullPath(), leafName());
501+
return new SyntheticSourceSupport.Native(loader);
502+
}
503+
402504
}

x-pack/plugin/mapper-counted-keyword/src/test/java/org/elasticsearch/xpack/countedkeyword/CountedKeywordFieldMapperTests.java

Lines changed: 102 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,25 @@
1010
import org.apache.lucene.index.DocValuesType;
1111
import org.apache.lucene.index.IndexOptions;
1212
import org.apache.lucene.index.IndexableField;
13+
import org.elasticsearch.core.CheckedConsumer;
14+
import org.elasticsearch.core.Tuple;
1315
import org.elasticsearch.index.mapper.DocumentMapper;
1416
import org.elasticsearch.index.mapper.MappedFieldType;
1517
import org.elasticsearch.index.mapper.MapperTestCase;
1618
import org.elasticsearch.index.mapper.ParsedDocument;
1719
import org.elasticsearch.plugins.Plugin;
20+
import org.elasticsearch.search.lookup.SourceFilter;
21+
import org.elasticsearch.test.ESTestCase;
1822
import org.elasticsearch.xcontent.XContentBuilder;
1923
import org.junit.AssumptionViolatedException;
2024

2125
import java.io.IOException;
2226
import java.util.Collection;
2327
import java.util.Collections;
2428
import java.util.List;
29+
import java.util.stream.Stream;
30+
31+
import static org.hamcrest.Matchers.equalTo;
2532

2633
public class CountedKeywordFieldMapperTests extends MapperTestCase {
2734
@Override
@@ -64,9 +71,103 @@ protected Object generateRandomInputValue(MappedFieldType ft) {
6471
return randomBoolean() ? null : randomAlphaOfLengthBetween(1, 10);
6572
}
6673

74+
public void testSyntheticSourceSingleNullValue() throws IOException {
75+
DocumentMapper mapper = createSytheticSourceMapperService(mapping(b -> {
76+
b.startObject("field");
77+
minimalMapping(b);
78+
b.field("synthetic_source_keep", "none");
79+
b.endObject();
80+
})).documentMapper();
81+
82+
String expected = "{}";
83+
CheckedConsumer<XContentBuilder, IOException> buildInput = b -> {
84+
b.field("field");
85+
b.nullValue();
86+
};
87+
88+
assertThat(syntheticSource(mapper, buildInput), equalTo(expected));
89+
assertThat(syntheticSource(mapper, new SourceFilter(new String[] { "field" }, null), buildInput), equalTo(expected));
90+
assertThat(syntheticSource(mapper, new SourceFilter(null, new String[] { "field" }), buildInput), equalTo("{}"));
91+
}
92+
93+
public void testSyntheticSourceManyNullValue() throws IOException {
94+
DocumentMapper mapper = createSytheticSourceMapperService(mapping(b -> {
95+
b.startObject("field");
96+
minimalMapping(b);
97+
b.field("synthetic_source_keep", "none");
98+
b.endObject();
99+
})).documentMapper();
100+
101+
int nullCount = randomIntBetween(1, 5);
102+
103+
String expected = "{}";
104+
CheckedConsumer<XContentBuilder, IOException> buildInput = b -> {
105+
b.startArray("field");
106+
for (int i = 0; i < nullCount; i++) {
107+
b.nullValue();
108+
}
109+
b.endArray();
110+
};
111+
112+
assertThat(syntheticSource(mapper, buildInput), equalTo(expected));
113+
assertThat(syntheticSource(mapper, new SourceFilter(new String[] { "field" }, null), buildInput), equalTo(expected));
114+
assertThat(syntheticSource(mapper, new SourceFilter(null, new String[] { "field" }), buildInput), equalTo("{}"));
115+
}
116+
117+
@Override
118+
public void testSyntheticSourceKeepAll() throws IOException {
119+
// For now, native synthetic source is only supported when "synthetic_source_keep" mapping attribute is "none"
120+
}
121+
122+
@Override
123+
public void testSyntheticSourceKeepArrays() throws IOException {
124+
// For now, native synthetic source is only supported when "synthetic_source_keep" mapping attribute is "none"
125+
}
126+
127+
@Override
128+
public void testSyntheticSourceKeepNone() throws IOException {
129+
// For now, native synthetic source is only supported when "synthetic_source_keep" mapping attribute is "none"
130+
}
131+
67132
@Override
68133
protected SyntheticSourceSupport syntheticSourceSupport(boolean ignoreMalformed) {
69-
throw new AssumptionViolatedException("not supported");
134+
return new SyntheticSourceSupport() {
135+
@Override
136+
public SyntheticSourceExample example(int maxValues) throws IOException {
137+
if (randomBoolean()) {
138+
Tuple<String, String> v = generateValue();
139+
return new SyntheticSourceExample(v.v1(), v.v2(), this::mapping);
140+
}
141+
int maxNullValues = 5;
142+
List<Tuple<String, String>> values = randomList(1, maxValues, this::generateValue);
143+
List<String> in = Stream.concat(values.stream().map(Tuple::v1), randomList(0, maxNullValues, () -> (String) null).stream())
144+
.toList();
145+
146+
in = shuffledList(in);
147+
148+
List<String> outList = values.stream().map(Tuple::v2).sorted().toList();
149+
150+
Object out = outList.size() == 1 ? outList.get(0) : outList;
151+
return new SyntheticSourceExample(in, out, this::mapping);
152+
}
153+
154+
private Tuple<String, String> generateValue() {
155+
String v = ESTestCase.randomAlphaOfLength(5);
156+
return Tuple.tuple(v, v);
157+
}
158+
159+
private void mapping(XContentBuilder b) throws IOException {
160+
minimalMapping(b);
161+
// For now, synthetic source is only supported when "synthetic_source_keep" is "none".
162+
// Once we implement true synthetic source support, we should remove this.
163+
b.field("synthetic_source_keep", "none");
164+
}
165+
166+
@Override
167+
public List<SyntheticSourceInvalidExample> invalidExample() throws IOException {
168+
return List.of();
169+
}
170+
};
70171
}
71172

72173
@Override

0 commit comments

Comments
 (0)