|
| 1 | +/* |
| 2 | + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one |
| 3 | + * or more contributor license agreements. Licensed under the "Elastic License |
| 4 | + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side |
| 5 | + * Public License v 1"; you may not use this file except in compliance with, at |
| 6 | + * your election, the "Elastic License 2.0", the "GNU Affero General Public |
| 7 | + * License v3.0 only", or the "Server Side Public License, v 1". |
| 8 | + */ |
| 9 | + |
| 10 | +package org.elasticsearch.index.mapper; |
| 11 | + |
| 12 | +import org.apache.lucene.index.LeafReaderContext; |
| 13 | +import org.apache.lucene.index.SortedSetDocValues; |
| 14 | +import org.elasticsearch.search.fetch.StoredFieldsSpec; |
| 15 | +import org.elasticsearch.xcontent.XContentParser; |
| 16 | +import org.elasticsearch.xcontent.XContentParserConfiguration; |
| 17 | + |
| 18 | +import java.io.IOException; |
| 19 | +import java.util.ArrayList; |
| 20 | +import java.util.HashMap; |
| 21 | +import java.util.HashSet; |
| 22 | +import java.util.List; |
| 23 | +import java.util.Map; |
| 24 | +import java.util.Optional; |
| 25 | +import java.util.Set; |
| 26 | + |
| 27 | +/** |
| 28 | + * Block loader for fields that use fallback synthetic source implementation. |
| 29 | + * <br> |
| 30 | + * Usually fields have doc_values or stored fields and block loaders use them directly. In some cases neither is available |
| 31 | + * and we would fall back to (potentially synthetic) _source. However, in case of synthetic source, there is actually no need to |
| 32 | + * construct the entire _source. We know that there is no doc_values and stored fields, and therefore we will be using fallback synthetic |
| 33 | + * source. That is equivalent to just reading _ignored_source stored field directly and doing an in-place synthetic source just |
| 34 | + * for this field. |
| 35 | + * <br> |
| 36 | + * See {@link IgnoredSourceFieldMapper}. |
| 37 | + */ |
| 38 | +public abstract class FallbackSyntheticSourceBlockLoader implements BlockLoader { |
| 39 | + private final Reader<?> reader; |
| 40 | + private final String fieldName; |
| 41 | + |
| 42 | + protected FallbackSyntheticSourceBlockLoader(Reader<?> reader, String fieldName) { |
| 43 | + this.reader = reader; |
| 44 | + this.fieldName = fieldName; |
| 45 | + } |
| 46 | + |
| 47 | + @Override |
| 48 | + public ColumnAtATimeReader columnAtATimeReader(LeafReaderContext context) throws IOException { |
| 49 | + return null; |
| 50 | + } |
| 51 | + |
| 52 | + @Override |
| 53 | + public RowStrideReader rowStrideReader(LeafReaderContext context) throws IOException { |
| 54 | + return new IgnoredSourceRowStrideReader<>(fieldName, reader); |
| 55 | + } |
| 56 | + |
| 57 | + @Override |
| 58 | + public StoredFieldsSpec rowStrideStoredFieldSpec() { |
| 59 | + return new StoredFieldsSpec(false, false, Set.of(IgnoredSourceFieldMapper.NAME)); |
| 60 | + } |
| 61 | + |
| 62 | + @Override |
| 63 | + public boolean supportsOrdinals() { |
| 64 | + return false; |
| 65 | + } |
| 66 | + |
| 67 | + @Override |
| 68 | + public SortedSetDocValues ordinals(LeafReaderContext context) throws IOException { |
| 69 | + throw new UnsupportedOperationException(); |
| 70 | + } |
| 71 | + |
| 72 | + private record IgnoredSourceRowStrideReader<T>(String fieldName, Reader<T> reader) implements RowStrideReader { |
| 73 | + @Override |
| 74 | + public void read(int docId, StoredFields storedFields, Builder builder) throws IOException { |
| 75 | + var ignoredSource = storedFields.storedFields().get(IgnoredSourceFieldMapper.NAME); |
| 76 | + if (ignoredSource == null) { |
| 77 | + return; |
| 78 | + } |
| 79 | + |
| 80 | + Map<String, List<IgnoredSourceFieldMapper.NameValue>> valuesForFieldAndParents = new HashMap<>(); |
| 81 | + |
| 82 | + // Contains name of the field and all its parents |
| 83 | + Set<String> fieldNames = new HashSet<>() { |
| 84 | + { |
| 85 | + add("_doc"); |
| 86 | + } |
| 87 | + }; |
| 88 | + |
| 89 | + var current = new StringBuilder(); |
| 90 | + for (String part : fieldName.split("\\.")) { |
| 91 | + if (current.isEmpty() == false) { |
| 92 | + current.append('.'); |
| 93 | + } |
| 94 | + current.append(part); |
| 95 | + fieldNames.add(current.toString()); |
| 96 | + } |
| 97 | + |
| 98 | + for (Object value : ignoredSource) { |
| 99 | + IgnoredSourceFieldMapper.NameValue nameValue = IgnoredSourceFieldMapper.decode(value); |
| 100 | + if (fieldNames.contains(nameValue.name())) { |
| 101 | + valuesForFieldAndParents.computeIfAbsent(nameValue.name(), k -> new ArrayList<>()).add(nameValue); |
| 102 | + } |
| 103 | + } |
| 104 | + |
| 105 | + // TODO figure out how to handle XContentDataHelper#voidValue() |
| 106 | + |
| 107 | + var blockValues = new ArrayList<T>(); |
| 108 | + |
| 109 | + var leafFieldValue = valuesForFieldAndParents.get(fieldName); |
| 110 | + if (leafFieldValue != null) { |
| 111 | + readFromFieldValue(leafFieldValue, blockValues); |
| 112 | + } else { |
| 113 | + readFromParentValue(valuesForFieldAndParents, blockValues); |
| 114 | + } |
| 115 | + |
| 116 | + if (blockValues.isEmpty() == false) { |
| 117 | + if (blockValues.size() > 1) { |
| 118 | + builder.beginPositionEntry(); |
| 119 | + } |
| 120 | + |
| 121 | + reader.writeToBlock(blockValues, builder); |
| 122 | + |
| 123 | + if (blockValues.size() > 1) { |
| 124 | + builder.endPositionEntry(); |
| 125 | + } |
| 126 | + } else { |
| 127 | + builder.appendNull(); |
| 128 | + } |
| 129 | + } |
| 130 | + |
| 131 | + private void readFromFieldValue(List<IgnoredSourceFieldMapper.NameValue> nameValues, List<T> blockValues) throws IOException { |
| 132 | + if (nameValues.isEmpty()) { |
| 133 | + return; |
| 134 | + } |
| 135 | + |
| 136 | + for (var nameValue : nameValues) { |
| 137 | + // Leaf field is stored directly (not as a part of a parent object), let's try to decode it. |
| 138 | + Optional<Object> singleValue = XContentDataHelper.decode(nameValue.value()); |
| 139 | + if (singleValue.isPresent()) { |
| 140 | + reader.convertValue(singleValue.get(), blockValues); |
| 141 | + continue; |
| 142 | + } |
| 143 | + |
| 144 | + // We have a value for this field but it's an array or an object |
| 145 | + var type = XContentDataHelper.decodeType(nameValue.value()); |
| 146 | + assert type.isPresent(); |
| 147 | + |
| 148 | + try ( |
| 149 | + XContentParser parser = type.get() |
| 150 | + .xContent() |
| 151 | + .createParser( |
| 152 | + XContentParserConfiguration.EMPTY, |
| 153 | + nameValue.value().bytes, |
| 154 | + nameValue.value().offset + 1, |
| 155 | + nameValue.value().length - 1 |
| 156 | + ) |
| 157 | + ) { |
| 158 | + parser.nextToken(); |
| 159 | + parseWithReader(parser, blockValues); |
| 160 | + } |
| 161 | + } |
| 162 | + } |
| 163 | + |
| 164 | + private void readFromParentValue( |
| 165 | + Map<String, List<IgnoredSourceFieldMapper.NameValue>> valuesForFieldAndParents, |
| 166 | + List<T> blockValues |
| 167 | + ) throws IOException { |
| 168 | + if (valuesForFieldAndParents.isEmpty()) { |
| 169 | + return; |
| 170 | + } |
| 171 | + |
| 172 | + // If a parent object is stored at a particular level its children won't be stored. |
| 173 | + // So we should only ever have one parent here. |
| 174 | + assert valuesForFieldAndParents.size() == 1 : "_ignored_source field contains multiple levels of the same object"; |
| 175 | + var parentValues = valuesForFieldAndParents.values().iterator().next(); |
| 176 | + |
| 177 | + for (var nameValue : parentValues) { |
| 178 | + parseFieldFromParent(nameValue, blockValues); |
| 179 | + } |
| 180 | + } |
| 181 | + |
| 182 | + private void parseFieldFromParent(IgnoredSourceFieldMapper.NameValue nameValue, List<T> blockValues) throws IOException { |
| 183 | + var type = XContentDataHelper.decodeType(nameValue.value()); |
| 184 | + assert type.isPresent(); |
| 185 | + |
| 186 | + String nameAtThisLevel = fieldName.substring(nameValue.name().length() + 1); |
| 187 | + var filterParserConfig = XContentParserConfiguration.EMPTY.withFiltering(null, Set.of(nameAtThisLevel), Set.of(), true); |
| 188 | + try ( |
| 189 | + XContentParser parser = type.get() |
| 190 | + .xContent() |
| 191 | + .createParser(filterParserConfig, nameValue.value().bytes, nameValue.value().offset + 1, nameValue.value().length - 1) |
| 192 | + ) { |
| 193 | + parser.nextToken(); |
| 194 | + var fieldNameInParser = new StringBuilder(nameValue.name()); |
| 195 | + while (true) { |
| 196 | + if (parser.currentToken() == XContentParser.Token.FIELD_NAME) { |
| 197 | + fieldNameInParser.append('.').append(parser.currentName()); |
| 198 | + if (fieldNameInParser.toString().equals(fieldName)) { |
| 199 | + parser.nextToken(); |
| 200 | + break; |
| 201 | + } |
| 202 | + } |
| 203 | + parser.nextToken(); |
| 204 | + } |
| 205 | + parseWithReader(parser, blockValues); |
| 206 | + } |
| 207 | + } |
| 208 | + |
| 209 | + private void parseWithReader(XContentParser parser, List<T> blockValues) throws IOException { |
| 210 | + if (parser.currentToken() == XContentParser.Token.START_ARRAY) { |
| 211 | + while (parser.nextToken() != XContentParser.Token.END_ARRAY) { |
| 212 | + reader.parse(parser, blockValues); |
| 213 | + } |
| 214 | + return; |
| 215 | + } |
| 216 | + |
| 217 | + reader.parse(parser, blockValues); |
| 218 | + } |
| 219 | + |
| 220 | + @Override |
| 221 | + public boolean canReuse(int startingDocID) { |
| 222 | + return true; |
| 223 | + } |
| 224 | + } |
| 225 | + |
| 226 | + /** |
| 227 | + * Field-specific implementation that converts data stored in _ignored_source field to block loader values. |
| 228 | + * @param <T> |
| 229 | + */ |
| 230 | + public interface Reader<T> { |
| 231 | + /** |
| 232 | + * Converts a raw stored value for this field to a value in a format suitable for block loader and adds it to the provided |
| 233 | + * accumulator. |
| 234 | + * @param value raw decoded value from _ignored_source field (synthetic _source value) |
| 235 | + * @param accumulator list containing the result of conversion |
| 236 | + */ |
| 237 | + void convertValue(Object value, List<T> accumulator); |
| 238 | + |
| 239 | + /** |
| 240 | + * Parses one or more complex values using a provided parser and adds them to the provided accumulator. |
| 241 | + * @param parser parser of a value from _ignored_source field (synthetic _source value) |
| 242 | + * @param accumulator list containing the results of parsing |
| 243 | + */ |
| 244 | + void parse(XContentParser parser, List<T> accumulator) throws IOException; |
| 245 | + |
| 246 | + void writeToBlock(List<T> values, Builder blockBuilder); |
| 247 | + } |
| 248 | + |
| 249 | + public abstract static class ReaderWithNullValueSupport<T> implements Reader<T> { |
| 250 | + private final T nullValue; |
| 251 | + |
| 252 | + public ReaderWithNullValueSupport(T nullValue) { |
| 253 | + this.nullValue = nullValue; |
| 254 | + } |
| 255 | + |
| 256 | + @Override |
| 257 | + public void parse(XContentParser parser, List<T> accumulator) throws IOException { |
| 258 | + if (parser.currentToken() == XContentParser.Token.VALUE_NULL) { |
| 259 | + if (nullValue != null) { |
| 260 | + convertValue(nullValue, accumulator); |
| 261 | + } |
| 262 | + return; |
| 263 | + } |
| 264 | + |
| 265 | + parseNonNullValue(parser, accumulator); |
| 266 | + } |
| 267 | + |
| 268 | + abstract void parseNonNullValue(XContentParser parser, List<T> accumulator) throws IOException; |
| 269 | + } |
| 270 | +} |
0 commit comments