Skip to content

Commit 13b743c

Browse files
Fix counted_keyword support in arrays of objects (#121558)
As a result of the randomized testing enabled in #121462, we found that we currently fail to parse documents with arrays of objects containing counted_keyword fields. This PR fixes this issue by using a custom docvalues field to store the count instead of the built-in lucene BinaryDocValues. This custom CountsBinaryDocValuesField has logic to handle multiple values for the same field.
1 parent 9b6af6a commit 13b743c

File tree

3 files changed

+257
-166
lines changed

3 files changed

+257
-166
lines changed

x-pack/plugin/mapper-counted-keyword/src/main/java/org/elasticsearch/xpack/countedkeyword/CountedKeywordFieldMapper.java

Lines changed: 45 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77

88
package org.elasticsearch.xpack.countedkeyword;
99

10-
import org.apache.lucene.document.BinaryDocValuesField;
1110
import org.apache.lucene.document.FieldType;
1211
import org.apache.lucene.index.BinaryDocValues;
1312
import org.apache.lucene.index.DocValues;
@@ -19,6 +18,7 @@
1918
import org.apache.lucene.index.TermsEnum;
2019
import org.apache.lucene.search.SortField;
2120
import org.apache.lucene.util.BytesRef;
21+
import org.elasticsearch.ElasticsearchException;
2222
import org.elasticsearch.common.bytes.BytesArray;
2323
import org.elasticsearch.common.io.stream.ByteArrayStreamInput;
2424
import org.elasticsearch.common.io.stream.BytesStreamOutput;
@@ -31,6 +31,7 @@
3131
import org.elasticsearch.index.fielddata.plain.AbstractIndexOrdinalsFieldData;
3232
import org.elasticsearch.index.fielddata.plain.AbstractLeafOrdinalsFieldData;
3333
import org.elasticsearch.index.mapper.BinaryFieldMapper;
34+
import org.elasticsearch.index.mapper.CustomDocValuesField;
3435
import org.elasticsearch.index.mapper.DocumentParserContext;
3536
import org.elasticsearch.index.mapper.FieldMapper;
3637
import org.elasticsearch.index.mapper.KeywordFieldMapper;
@@ -434,15 +435,17 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
434435
return;
435436
}
436437

437-
int i = 0;
438-
int[] counts = new int[values.size()];
439-
for (Map.Entry<String, Integer> value : values.entrySet()) {
440-
context.doc().add(new KeywordFieldMapper.KeywordField(fullPath(), new BytesRef(value.getKey()), fieldType));
441-
counts[i++] = value.getValue();
438+
for (String value : values.keySet()) {
439+
context.doc().add(new KeywordFieldMapper.KeywordField(fullPath(), new BytesRef(value), fieldType));
440+
}
441+
CountsBinaryDocValuesField field = (CountsBinaryDocValuesField) context.doc().getByKey(countFieldMapper.fieldType().name());
442+
if (field == null) {
443+
field = new CountsBinaryDocValuesField(countFieldMapper.fieldType().name());
444+
field.add(values);
445+
context.doc().addWithKey(countFieldMapper.fieldType().name(), field);
446+
} else {
447+
field.add(values);
442448
}
443-
BytesStreamOutput streamOutput = new BytesStreamOutput();
444-
streamOutput.writeVIntArray(counts);
445-
context.doc().add(new BinaryDocValuesField(countFieldMapper.fullPath(), streamOutput.bytes().toBytesRef()));
446449
}
447450

448451
private void parseArray(DocumentParserContext context, SortedMap<String, Integer> values) throws IOException {
@@ -509,4 +512,37 @@ protected SyntheticSourceSupport syntheticSourceSupport() {
509512
);
510513
}
511514

515+
private class CountsBinaryDocValuesField extends CustomDocValuesField {
516+
private final SortedMap<String, Integer> counts;
517+
518+
CountsBinaryDocValuesField(String name) {
519+
super(name);
520+
counts = new TreeMap<>();
521+
}
522+
523+
public void add(SortedMap<String, Integer> newCounts) {
524+
for (Map.Entry<String, Integer> currCount : newCounts.entrySet()) {
525+
this.counts.put(currCount.getKey(), this.counts.getOrDefault(currCount.getKey(), 0) + currCount.getValue());
526+
}
527+
}
528+
529+
@Override
530+
public BytesRef binaryValue() {
531+
try {
532+
int maxBytesPerVInt = 5;
533+
int bytesSize = (counts.size() + 1) * maxBytesPerVInt;
534+
BytesStreamOutput out = new BytesStreamOutput(bytesSize);
535+
int countsArr[] = new int[counts.size()];
536+
int i = 0;
537+
for (Integer currCount : counts.values()) {
538+
countsArr[i++] = currCount;
539+
}
540+
out.writeVIntArray(countsArr);
541+
return out.bytes().toBytesRef();
542+
} catch (IOException e) {
543+
throw new ElasticsearchException("Failed to get binary value", e);
544+
}
545+
}
546+
}
547+
512548
}

0 commit comments

Comments
 (0)