-
Notifications
You must be signed in to change notification settings - Fork 25.6k
Store arrays offsets for keyword fields natively with synthetic source #113757
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
c5580da
acf4d09
dca77d7
49efe26
f5e3d5a
59010c3
a5198ae
9b4aa5f
12d30c5
2ae8d83
fc0e627
a111f94
14c2ddd
ba9e513
007afd3
194b4ca
52c0db4
8cc5b46
dc9db8a
6e03aca
674f03e
acfaa55
0d90234
259d212
bf9ed2f
7bd3a15
d8e48c5
ae1ce9f
5e610ef
8f163eb
43a1375
cf2b9a3
893f555
61fd132
a428f11
962ac8a
1c77cfe
a785110
fa03f46
ccbf0cd
9664fa7
01cd313
7ed0857
012ac7f
4b4eaf4
64c6fe8
38f784a
b9535e1
470afad
80521c2
ab612ba
f21cce6
ca21c22
969139e
acf0aed
3d75e27
5487cf8
b89660a
ba0434b
4bcde0d
5b6b05c
37634b9
09c6a0e
60f45f2
bbee160
4e6265f
405edf4
7c7b3a3
cfe5b56
8049206
3fcb461
5b1f80b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| pr: 113757 | ||
| summary: Store arrays offsets for keyword fields natively with synthetic source instead of falling back to ignored source. | ||
| area: Mapping | ||
| type: enhancement | ||
| issues: [] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,93 @@ | ||
| /* | ||
| * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
| * or more contributor license agreements. Licensed under the "Elastic License | ||
| * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side | ||
| * Public License v 1"; you may not use this file except in compliance with, at | ||
| * your election, the "Elastic License 2.0", the "GNU Affero General Public | ||
| * License v3.0 only", or the "Server Side Public License, v 1". | ||
| */ | ||
|
|
||
| package org.elasticsearch.index.mapper; | ||
|
|
||
| import org.apache.lucene.document.SortedDocValuesField; | ||
| import org.apache.lucene.util.BitUtil; | ||
| import org.elasticsearch.common.io.stream.BytesStreamOutput; | ||
| import org.elasticsearch.common.io.stream.StreamInput; | ||
|
|
||
| import java.io.IOException; | ||
| import java.util.ArrayList; | ||
| import java.util.HashMap; | ||
| import java.util.List; | ||
| import java.util.Map; | ||
| import java.util.TreeMap; | ||
|
|
||
| public class FieldArrayContext { | ||
|
|
||
| private final Map<String, Offsets> offsetsPerField = new HashMap<>(); | ||
|
|
||
| void recordOffset(String field, String value) { | ||
| Offsets arrayOffsets = offsetsPerField.computeIfAbsent(field, k -> new Offsets()); | ||
| int nextOffset = arrayOffsets.currentOffset++; | ||
| var offsets = arrayOffsets.valueToOffsets.computeIfAbsent(value, s -> new ArrayList<>(2)); | ||
| offsets.add(nextOffset); | ||
| } | ||
|
|
||
| void recordNull(String field) { | ||
| Offsets arrayOffsets = offsetsPerField.computeIfAbsent(field, k -> new Offsets()); | ||
| int nextOffset = arrayOffsets.currentOffset++; | ||
| arrayOffsets.nullValueOffsets.add(nextOffset); | ||
| } | ||
|
|
||
| void maybeRecordEmptyArray(String field) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need this if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is invoked when END_ARRAY is encountered. At this time, there may be an entry for the |
||
| offsetsPerField.computeIfAbsent(field, k -> new Offsets()); | ||
| } | ||
|
|
||
| void addToLuceneDocument(DocumentParserContext context) throws IOException { | ||
| for (var entry : offsetsPerField.entrySet()) { | ||
| var fieldName = entry.getKey(); | ||
| var offset = entry.getValue(); | ||
|
|
||
| int currentOrd = 0; | ||
| // This array allows to retain the original ordering of elements in leaf arrays and retain duplicates. | ||
| int[] offsetToOrd = new int[offset.currentOffset]; | ||
| for (var offsetEntry : offset.valueToOffsets.entrySet()) { | ||
| for (var offsetAndLevel : offsetEntry.getValue()) { | ||
| offsetToOrd[offsetAndLevel] = currentOrd; | ||
| } | ||
| currentOrd++; | ||
| } | ||
| for (var nullOffset : offset.nullValueOffsets) { | ||
| offsetToOrd[nullOffset] = -1; | ||
| } | ||
|
|
||
| try (var streamOutput = new BytesStreamOutput()) { | ||
| // Could just use vint for array length, but this allows for decoding my_field: null as -1 | ||
| streamOutput.writeVInt(BitUtil.zigZagEncode(offsetToOrd.length)); | ||
| for (int ord : offsetToOrd) { | ||
| streamOutput.writeVInt(BitUtil.zigZagEncode(ord)); | ||
| } | ||
| context.doc().add(new SortedDocValuesField(fieldName, streamOutput.bytes().toBytesRef())); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| static int[] parseOffsetArray(StreamInput in) throws IOException { | ||
| int[] offsetToOrd = new int[BitUtil.zigZagDecode(in.readVInt())]; | ||
| for (int i = 0; i < offsetToOrd.length; i++) { | ||
| offsetToOrd[i] = BitUtil.zigZagDecode(in.readVInt()); | ||
| } | ||
| return offsetToOrd; | ||
| } | ||
|
|
||
| private static class Offsets { | ||
|
|
||
| int currentOffset; | ||
| // Need to use TreeMap here, so that we maintain the order in which each value (with offset) stored inserted, | ||
| // (which is in the same order the document gets parsed) so we store offsets in right order. This is the same | ||
| // order in what the values get stored in SortedSetDocValues. | ||
| final Map<String, List<Integer>> valueToOffsets = new TreeMap<>(); | ||
| final List<Integer> nullValueOffsets = new ArrayList<>(2); | ||
|
|
||
| } | ||
|
|
||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why do we skip instead of changing the test?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, because the yaml tests come from 8.x. The
yamlRestCompatTestTransformtask mutes or transforms tests before testing tests api backwards compatibility viayamlRestCompatTesttask.