Skip to content

Commit ba3db3a

Browse files
Fix counted_keyword support in arrays of objects (#121558) (#121708)
As a result of the randomized testing enabled in #121462, we found that we currently fail to parse documents with arrays of objects containing counted_keyword fields. This PR fixes this issue by using a custom docvalues field to store the count instead of the built-in lucene BinaryDocValues. This custom CountsBinaryDocValuesField has logic to handle multiple values for the same field. (cherry picked from commit 13b743c)
1 parent d2b27d9 commit ba3db3a

File tree

3 files changed

+257
-166
lines changed

3 files changed

+257
-166
lines changed

x-pack/plugin/mapper-counted-keyword/src/main/java/org/elasticsearch/xpack/countedkeyword/CountedKeywordFieldMapper.java

Lines changed: 45 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77

88
package org.elasticsearch.xpack.countedkeyword;
99

10-
import org.apache.lucene.document.BinaryDocValuesField;
1110
import org.apache.lucene.document.FieldType;
1211
import org.apache.lucene.index.BinaryDocValues;
1312
import org.apache.lucene.index.DocValues;
@@ -19,6 +18,7 @@
1918
import org.apache.lucene.index.TermsEnum;
2019
import org.apache.lucene.search.SortField;
2120
import org.apache.lucene.util.BytesRef;
21+
import org.elasticsearch.ElasticsearchException;
2222
import org.elasticsearch.common.bytes.BytesArray;
2323
import org.elasticsearch.common.io.stream.ByteArrayStreamInput;
2424
import org.elasticsearch.common.io.stream.BytesStreamOutput;
@@ -31,6 +31,7 @@
3131
import org.elasticsearch.index.fielddata.plain.AbstractIndexOrdinalsFieldData;
3232
import org.elasticsearch.index.fielddata.plain.AbstractLeafOrdinalsFieldData;
3333
import org.elasticsearch.index.mapper.BinaryFieldMapper;
34+
import org.elasticsearch.index.mapper.CustomDocValuesField;
3435
import org.elasticsearch.index.mapper.DocumentParserContext;
3536
import org.elasticsearch.index.mapper.FieldMapper;
3637
import org.elasticsearch.index.mapper.KeywordFieldMapper;
@@ -437,15 +438,17 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
437438
return;
438439
}
439440

440-
int i = 0;
441-
int[] counts = new int[values.size()];
442-
for (Map.Entry<String, Integer> value : values.entrySet()) {
443-
context.doc().add(new KeywordFieldMapper.KeywordField(fullPath(), new BytesRef(value.getKey()), fieldType));
444-
counts[i++] = value.getValue();
441+
for (String value : values.keySet()) {
442+
context.doc().add(new KeywordFieldMapper.KeywordField(fullPath(), new BytesRef(value), fieldType));
443+
}
444+
CountsBinaryDocValuesField field = (CountsBinaryDocValuesField) context.doc().getByKey(countFieldMapper.fieldType().name());
445+
if (field == null) {
446+
field = new CountsBinaryDocValuesField(countFieldMapper.fieldType().name());
447+
field.add(values);
448+
context.doc().addWithKey(countFieldMapper.fieldType().name(), field);
449+
} else {
450+
field.add(values);
445451
}
446-
BytesStreamOutput streamOutput = new BytesStreamOutput();
447-
streamOutput.writeVIntArray(counts);
448-
context.doc().add(new BinaryDocValuesField(countFieldMapper.fullPath(), streamOutput.bytes().toBytesRef()));
449452
}
450453

451454
private void parseArray(DocumentParserContext context, SortedMap<String, Integer> values) throws IOException {
@@ -512,4 +515,37 @@ protected SyntheticSourceSupport syntheticSourceSupport() {
512515
);
513516
}
514517

518+
private class CountsBinaryDocValuesField extends CustomDocValuesField {
519+
private final SortedMap<String, Integer> counts;
520+
521+
CountsBinaryDocValuesField(String name) {
522+
super(name);
523+
counts = new TreeMap<>();
524+
}
525+
526+
public void add(SortedMap<String, Integer> newCounts) {
527+
for (Map.Entry<String, Integer> currCount : newCounts.entrySet()) {
528+
this.counts.put(currCount.getKey(), this.counts.getOrDefault(currCount.getKey(), 0) + currCount.getValue());
529+
}
530+
}
531+
532+
@Override
533+
public BytesRef binaryValue() {
534+
try {
535+
int maxBytesPerVInt = 5;
536+
int bytesSize = (counts.size() + 1) * maxBytesPerVInt;
537+
BytesStreamOutput out = new BytesStreamOutput(bytesSize);
538+
int countsArr[] = new int[counts.size()];
539+
int i = 0;
540+
for (Integer currCount : counts.values()) {
541+
countsArr[i++] = currCount;
542+
}
543+
out.writeVIntArray(countsArr);
544+
return out.bytes().toBytesRef();
545+
} catch (IOException e) {
546+
throw new ElasticsearchException("Failed to get binary value", e);
547+
}
548+
}
549+
}
550+
515551
}

0 commit comments

Comments
 (0)