Skip to content

Commit 0024eea

Browse files
committed
experiment: store ignored source as binary doc values.
1 parent 7383e94 commit 0024eea

File tree

8 files changed

+394
-100
lines changed

8 files changed

+394
-100
lines changed

server/src/main/java/org/elasticsearch/index/IndexVersions.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,7 @@ private static Version parseUnchecked(String version) {
224224
public static final IndexVersion TIME_SERIES_USE_SYNTHETIC_ID_94 = def(9_071_0_00, Version.LUCENE_10_3_2);
225225
public static final IndexVersion TIME_SERIES_DOC_VALUES_FORMAT_VERSION_3 = def(9_072_0_00, Version.LUCENE_10_3_2);
226226
public static final IndexVersion STORE_IGNORED_MALFORMED_IN_BINARY_DOC_VALUES = def(9_073_0_00, Version.LUCENE_10_3_2);
227+
public static final IndexVersion IGNORED_SOURCE_AS_DOC_VALUES = def(9_074_0_00, Version.LUCENE_10_3_2);
227228

228229
/*
229230
* STOP! READ THIS FIRST! No, really,

server/src/main/java/org/elasticsearch/index/fieldvisitor/StoredFieldLoader.java

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,9 +86,6 @@ public static StoredFieldLoader create(boolean loadSource, Set<String> fields) {
8686
* otherwise, uses the heuristic defined in {@link StoredFieldLoader#reader(LeafReaderContext, int[])}.
8787
*/
8888
public static StoredFieldLoader create(boolean loadSource, Set<String> fields, boolean forceSequentialReader) {
89-
if (loadSource == false && fields.isEmpty()) {
90-
return StoredFieldLoader.empty();
91-
}
9289
List<String> fieldsToLoad = fieldsToLoad(loadSource, fields);
9390
return new StoredFieldLoader() {
9491
@Override

server/src/main/java/org/elasticsearch/index/mapper/FallbackSyntheticSourceBlockLoader.java

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -57,14 +57,27 @@ protected FallbackSyntheticSourceBlockLoader(
5757
this.fieldPaths = splitIntoFieldPaths(fieldName);
5858
}
5959

60+
/**
61+
* Returns the ignored source format used by this loader.
62+
*/
63+
public IgnoredSourceFieldMapper.IgnoredSourceFormat ignoredSourceFormat() {
64+
return ignoredSourceFormat;
65+
}
66+
6067
@Override
6168
public IOFunction<CircuitBreaker, ColumnAtATimeReader> columnAtATimeReader(LeafReaderContext context) {
6269
return null;
6370
}
6471

6572
@Override
6673
public RowStrideReader rowStrideReader(CircuitBreaker breaker, LeafReaderContext context) throws IOException {
67-
return new IgnoredSourceRowStrideReader<>(breaker, fieldName, fieldPaths, reader, ignoredSourceFormat);
74+
return new IgnoredSourceRowStrideReader<>(
75+
breaker,
76+
fieldName,
77+
fieldPaths,
78+
reader,
79+
ignoredSourceFormat.createLeafLoader(context.reader())
80+
);
6881
}
6982

7083
@Override
@@ -107,28 +120,29 @@ private static class IgnoredSourceRowStrideReader<T> implements RowStrideReader
107120
// Contains name of the field and all its parents
108121
private final Set<String> fieldPaths;
109122
private final Reader<T> reader;
110-
private final IgnoredSourceFieldMapper.IgnoredSourceFormat ignoredSourceFormat;
123+
private final IgnoredSourceFieldMapper.IgnoredSourceLeafLoader ignoredSourceLeafLoader;
111124

112125
IgnoredSourceRowStrideReader(
113126
CircuitBreaker breaker,
114127
String fieldName,
115128
Set<String> fieldPaths,
116129
Reader<T> reader,
117-
IgnoredSourceFieldMapper.IgnoredSourceFormat ignoredSourceFormat
130+
IgnoredSourceFieldMapper.IgnoredSourceLeafLoader ignoredSourceLeafLoader
118131
) {
119132
breaker.addEstimateBytesAndMaybeBreak(ESTIMATED_SIZE, "load blocks");
120133
this.breaker = breaker;
121134
this.fieldName = fieldName;
122135
this.fieldPaths = fieldPaths;
123136
this.reader = reader;
124-
this.ignoredSourceFormat = ignoredSourceFormat;
137+
this.ignoredSourceLeafLoader = ignoredSourceLeafLoader;
125138
}
126139

127140
@Override
128141
public void read(int docId, StoredFields storedFields, Builder builder) throws IOException {
129-
Map<String, List<IgnoredSourceFieldMapper.NameValue>> valuesForFieldAndParents = ignoredSourceFormat.loadSingleIgnoredField(
142+
Map<String, List<IgnoredSourceFieldMapper.NameValue>> valuesForFieldAndParents = ignoredSourceLeafLoader.loadSingleIgnoredField(
130143
fieldPaths,
131-
storedFields.storedFields()
144+
storedFields.storedFields(),
145+
docId
132146
);
133147

134148
if (valuesForFieldAndParents.isEmpty()) {

server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java

Lines changed: 269 additions & 80 deletions
Large diffs are not rendered by default.

server/src/main/java/org/elasticsearch/index/mapper/SourceLoader.java

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,12 @@ public Set<String> requiredStoredFields() {
157157
public Leaf leaf(LeafReader reader, int[] docIdsInLeaf) throws IOException {
158158
SyntheticFieldLoader loader = syntheticFieldLoaderLeafSupplier.get();
159159
return new LeafWithMetrics(
160-
new SyntheticLeaf(filter, loader, loader.docValuesLoader(reader, docIdsInLeaf), ignoredSourceFormat),
160+
new SyntheticLeaf(
161+
filter,
162+
loader,
163+
loader.docValuesLoader(reader, docIdsInLeaf),
164+
ignoredSourceFormat.createLeafLoader(reader)
165+
),
161166
metrics
162167
);
163168
}
@@ -192,21 +197,21 @@ private static class SyntheticLeaf implements Leaf {
192197
private final SyntheticFieldLoader loader;
193198
private final SyntheticFieldLoader.DocValuesLoader docValuesLoader;
194199
private final Map<String, SyntheticFieldLoader.StoredFieldLoader> storedFieldLoaders;
195-
private final IgnoredSourceFieldMapper.IgnoredSourceFormat ignoredSourceFormat;
200+
private final IgnoredSourceFieldMapper.IgnoredSourceLeafLoader ignoredSourceLeafLoader;
196201

197202
private SyntheticLeaf(
198203
SourceFilter filter,
199204
SyntheticFieldLoader loader,
200205
SyntheticFieldLoader.DocValuesLoader docValuesLoader,
201-
IgnoredSourceFieldMapper.IgnoredSourceFormat ignoredSourceFormat
206+
IgnoredSourceFieldMapper.IgnoredSourceLeafLoader ignoredSourceLeafLoader
202207
) {
203208
this.filter = filter;
204209
this.loader = loader;
205210
this.docValuesLoader = docValuesLoader;
206211
this.storedFieldLoaders = Map.copyOf(
207212
loader.storedFieldLoaders().collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))
208213
);
209-
this.ignoredSourceFormat = ignoredSourceFormat;
214+
this.ignoredSourceLeafLoader = ignoredSourceLeafLoader;
210215
}
211216

212217
@Override
@@ -227,10 +232,8 @@ public void write(LeafStoredFieldLoader storedFieldLoader, int docId, XContentBu
227232
}
228233

229234
// Maps the names of existing objects to lists of ignored fields they contain.
230-
Map<String, List<IgnoredSourceFieldMapper.NameValue>> objectsWithIgnoredFields = ignoredSourceFormat.loadAllIgnoredFields(
231-
filter,
232-
storedFieldLoader.storedFields()
233-
);
235+
Map<String, List<IgnoredSourceFieldMapper.NameValue>> objectsWithIgnoredFields = ignoredSourceLeafLoader
236+
.loadAllIgnoredFields(filter, storedFieldLoader.storedFields(), docId);
234237

235238
if (objectsWithIgnoredFields != null) {
236239
loader.setIgnoredValues(objectsWithIgnoredFields);

server/src/main/java/org/elasticsearch/search/fetch/StoredFieldsSpec.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,9 @@ private Set<String> mergeSourcePaths(StoredFieldsSpec other) {
132132
}
133133

134134
public Set<String> requiredStoredFields() {
135-
if (sourcePaths.isEmpty() || ignoredSourceFormat == IgnoredSourceFormat.NO_IGNORED_SOURCE) {
135+
if (sourcePaths.isEmpty()
136+
|| ignoredSourceFormat == IgnoredSourceFormat.NO_IGNORED_SOURCE
137+
|| ignoredSourceFormat == IgnoredSourceFormat.DOC_VALUES_IGNORED_SOURCE) {
136138
return requiredStoredFields;
137139
}
138140
if (requiredStoredFields.isEmpty()) {

server/src/test/java/org/elasticsearch/index/mapper/BlockSourceReaderTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ private void loadBlock(MapperService mapperService, LeafReaderContext ctx, Consu
8383
loader.rowStrideStoredFieldSpec(),
8484
equalTo(
8585
StoredFieldsSpec.withSourcePaths(
86-
syntheticSource ? IgnoredSourceFormat.COALESCED_SINGLE_IGNORED_SOURCE : IgnoredSourceFormat.NO_IGNORED_SOURCE,
86+
syntheticSource ? IgnoredSourceFormat.DOC_VALUES_IGNORED_SOURCE : IgnoredSourceFormat.NO_IGNORED_SOURCE,
8787
Set.of("field")
8888
)
8989
)

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/FieldSubsetReader.java

Lines changed: 89 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,95 @@ public SortedNumericDocValues getSortedNumericDocValues(String field) throws IOE
321321

322322
@Override
323323
public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
324-
return hasField(field) ? super.getSortedSetDocValues(field) : null;
324+
if (hasField(field) == false) {
325+
return null;
326+
}
327+
SortedSetDocValues dv = super.getSortedSetDocValues(field);
328+
if (dv != null
329+
&& IgnoredSourceFieldMapper.NAME.equals(field)
330+
&& ignoredSourceFormat == IgnoredSourceFieldMapper.IgnoredSourceFormat.DOC_VALUES_IGNORED_SOURCE) {
331+
return new FilteredIgnoredSourceDocValues(dv);
332+
}
333+
return dv;
334+
}
335+
336+
/**
337+
* Wraps {@link SortedSetDocValues} for the {@code _ignored_source} field to apply field-level security filtering.
338+
* Per-document values are decoded, filtered through the DLS field automaton, and re-encoded so that callers
339+
* only observe field values the current user is authorised to see.
340+
*/
341+
private class FilteredIgnoredSourceDocValues extends SortedSetDocValues {
342+
private final SortedSetDocValues in;
343+
private List<BytesRef> filteredValues;
344+
private int pos;
345+
346+
FilteredIgnoredSourceDocValues(SortedSetDocValues in) {
347+
this.in = in;
348+
}
349+
350+
@Override
351+
public boolean advanceExact(int target) throws IOException {
352+
if (in.advanceExact(target) == false) {
353+
return false;
354+
}
355+
filteredValues = new ArrayList<>();
356+
int count = in.docValueCount();
357+
for (int i = 0; i < count; i++) {
358+
long ord = in.nextOrd();
359+
BytesRef encoded = in.lookupOrd(ord);
360+
BytesRef filtered = ignoredSourceFormat.filterValue(encoded, v -> filter(v, filter, 0));
361+
if (filtered != null) {
362+
filteredValues.add(BytesRef.deepCopyOf(filtered));
363+
}
364+
}
365+
pos = -1;
366+
return filteredValues.isEmpty() == false;
367+
}
368+
369+
@Override
370+
public int docValueCount() {
371+
return filteredValues == null ? 0 : filteredValues.size();
372+
}
373+
374+
/**
375+
* Returns a sequential local index (0-based) into {@link #filteredValues}, not a global ordinal.
376+
* Callers must pass the returned value directly to {@link #lookupOrd(long)}.
377+
*/
378+
@Override
379+
public long nextOrd() throws IOException {
380+
return ++pos;
381+
}
382+
383+
/** Returns the pre-filtered value at the given local index returned by {@link #nextOrd()}. */
384+
@Override
385+
public BytesRef lookupOrd(long ord) throws IOException {
386+
return filteredValues.get((int) ord);
387+
}
388+
389+
@Override
390+
public long getValueCount() {
391+
return filteredValues == null ? 0 : filteredValues.size();
392+
}
393+
394+
@Override
395+
public int docID() {
396+
return in.docID();
397+
}
398+
399+
@Override
400+
public int nextDoc() throws IOException {
401+
return in.nextDoc();
402+
}
403+
404+
@Override
405+
public int advance(int target) throws IOException {
406+
return in.advance(target);
407+
}
408+
409+
@Override
410+
public long cost() {
411+
return in.cost();
412+
}
325413
}
326414

327415
@Override

0 commit comments

Comments
 (0)