Skip to content

Commit 2258610

Browse files
Get from translog fails with large dense_vector (#104700) (#104915)
This change fixes the engine to apply the current codec when retrieving documents from the translog. We need to use the same codec than the main index in order to ensure that all the source data is indexable. The internal codec treats some fields differently than the default one, for instance dense_vectors are limited to 1024 dimensions. This PR ensures that these customizations are applied when indexing document for translog retrieval. Closes #104639 Co-authored-by: Elastic Machine <[email protected]>
1 parent c95df79 commit 2258610

File tree

3 files changed

+63
-21
lines changed

3 files changed

+63
-21
lines changed

server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -808,7 +808,7 @@ private GetResult getFromTranslog(
808808
index,
809809
mappingLookup,
810810
documentParser,
811-
config().getAnalyzer(),
811+
config(),
812812
translogInMemorySegmentsCount::incrementAndGet
813813
);
814814
final Engine.Searcher searcher = new Engine.Searcher(

server/src/main/java/org/elasticsearch/index/engine/TranslogDirectoryReader.java

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
package org.elasticsearch.index.engine;
1010

11-
import org.apache.lucene.analysis.Analyzer;
1211
import org.apache.lucene.index.BaseTermsEnum;
1312
import org.apache.lucene.index.BinaryDocValues;
1413
import org.apache.lucene.index.ByteVectorValues;
@@ -83,10 +82,10 @@ final class TranslogDirectoryReader extends DirectoryReader {
8382
Translog.Index operation,
8483
MappingLookup mappingLookup,
8584
DocumentParser documentParser,
86-
Analyzer analyzer,
85+
EngineConfig engineConfig,
8786
Runnable onSegmentCreated
8887
) throws IOException {
89-
this(new TranslogLeafReader(shardId, operation, mappingLookup, documentParser, analyzer, onSegmentCreated));
88+
this(new TranslogLeafReader(shardId, operation, mappingLookup, documentParser, engineConfig, onSegmentCreated));
9089
}
9190

9291
private TranslogDirectoryReader(TranslogLeafReader leafReader) throws IOException {
@@ -205,7 +204,7 @@ private static class TranslogLeafReader extends LeafReader {
205204
private final Translog.Index operation;
206205
private final MappingLookup mappingLookup;
207206
private final DocumentParser documentParser;
208-
private final Analyzer analyzer;
207+
private final EngineConfig engineConfig;
209208
private final Directory directory;
210209
private final Runnable onSegmentCreated;
211210

@@ -217,14 +216,14 @@ private static class TranslogLeafReader extends LeafReader {
217216
Translog.Index operation,
218217
MappingLookup mappingLookup,
219218
DocumentParser documentParser,
220-
Analyzer analyzer,
219+
EngineConfig engineConfig,
221220
Runnable onSegmentCreated
222221
) {
223222
this.shardId = shardId;
224223
this.operation = operation;
225224
this.mappingLookup = mappingLookup;
226225
this.documentParser = documentParser;
227-
this.analyzer = analyzer;
226+
this.engineConfig = engineConfig;
228227
this.onSegmentCreated = onSegmentCreated;
229228
this.directory = new ByteBuffersDirectory();
230229
this.uid = Uid.encodeId(operation.id());
@@ -264,7 +263,10 @@ private LeafReader createInMemoryLeafReader() {
264263

265264
parsedDocs.updateSeqID(operation.seqNo(), operation.primaryTerm());
266265
parsedDocs.version().setLongValue(operation.version());
267-
final IndexWriterConfig writeConfig = new IndexWriterConfig(analyzer).setOpenMode(IndexWriterConfig.OpenMode.CREATE);
266+
// To guarantee indexability, we configure the analyzer and codec using the main engine configuration
267+
final IndexWriterConfig writeConfig = new IndexWriterConfig(engineConfig.getAnalyzer()).setOpenMode(
268+
IndexWriterConfig.OpenMode.CREATE
269+
).setCodec(engineConfig.getCodec());
268270
try (IndexWriter writer = new IndexWriter(directory, writeConfig)) {
269271
writer.addDocument(parsedDocs.rootDoc());
270272
final DirectoryReader reader = open(writer);

server/src/test/java/org/elasticsearch/index/shard/ShardGetServiceTests.java

Lines changed: 53 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.elasticsearch.xcontent.XContentType;
2525

2626
import java.io.IOException;
27+
import java.util.Arrays;
2728
import java.util.function.LongSupplier;
2829

2930
import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_PRIMARY_TERM;
@@ -114,6 +115,20 @@ public void testGetFromTranslogWithSyntheticSource() throws IOException {
114115
runGetFromTranslogWithOptions(docToIndex, sourceOptions, expectedFetchedSource, "\"long\"", 7L, true);
115116
}
116117

118+
public void testGetFromTranslogWithDenseVector() throws IOException {
119+
float[] vector = new float[2048];
120+
for (int i = 0; i < vector.length; i++) {
121+
vector[i] = randomFloat();
122+
}
123+
String docToIndex = Strings.format("""
124+
{
125+
"bar": %s,
126+
"foo": "foo"
127+
}
128+
""", Arrays.toString(vector));
129+
runGetFromTranslogWithOptions(docToIndex, "\"enabled\": true", docToIndex, "\"text\"", "foo", "\"dense_vector\"", false);
130+
}
131+
117132
private void runGetFromTranslogWithOptions(
118133
String docToIndex,
119134
String sourceOptions,
@@ -122,23 +137,48 @@ private void runGetFromTranslogWithOptions(
122137
Object expectedFooVal,
123138
boolean sourceOnlyFetchCreatesInMemoryReader
124139
) throws IOException {
125-
IndexMetadata metadata = IndexMetadata.builder("test").putMapping(Strings.format("""
126-
{
127-
"properties": {
128-
"foo": {
129-
"type": %s,
130-
"store": true
131-
},
132-
"bar": { "type": %s }
133-
},
134-
"_source": { %s }
135-
}
136-
}""", fieldType, fieldType, sourceOptions)).settings(indexSettings(IndexVersion.current(), 1, 1)).primaryTerm(0, 1).build();
140+
runGetFromTranslogWithOptions(
141+
docToIndex,
142+
sourceOptions,
143+
expectedResult,
144+
fieldType,
145+
expectedFooVal,
146+
fieldType,
147+
sourceOnlyFetchCreatesInMemoryReader
148+
);
149+
}
150+
151+
private void runGetFromTranslogWithOptions(
152+
String docToIndex,
153+
String sourceOptions,
154+
String expectedResult,
155+
String fieldTypeFoo,
156+
Object expectedFooVal,
157+
String fieldTypeBar,
158+
boolean sourceOnlyFetchCreatesInMemoryReader
159+
) throws IOException {
160+
IndexMetadata metadata = IndexMetadata.builder("test")
161+
.putMapping(Strings.format("""
162+
{
163+
"properties": {
164+
"foo": {
165+
"type": %s,
166+
"store": true
167+
},
168+
"bar": { "type": %s }
169+
},
170+
"_source": { %s }
171+
}
172+
}""", fieldTypeFoo, fieldTypeBar, sourceOptions))
173+
.settings(indexSettings(IndexVersion.current(), 1, 1))
174+
.primaryTerm(0, 1)
175+
.build();
137176
IndexShard primary = newShard(new ShardId(metadata.getIndex(), 0), true, "n1", metadata, EngineTestCase.randomReaderWrapper());
138177
recoverShardFromStore(primary);
139178
LongSupplier translogInMemorySegmentCount = ((InternalEngine) primary.getEngine()).translogInMemorySegmentsCount::get;
140179
long translogInMemorySegmentCountExpected = 0;
141-
indexDoc(primary, "test", "0", docToIndex);
180+
Engine.IndexResult res = indexDoc(primary, "test", "0", docToIndex);
181+
assertTrue(res.isCreated());
142182
assertTrue(primary.getEngine().refreshNeeded());
143183
GetResult testGet = primary.getService().getForUpdate("0", UNASSIGNED_SEQ_NO, UNASSIGNED_PRIMARY_TERM);
144184
assertFalse(testGet.getFields().containsKey(RoutingFieldMapper.NAME));

0 commit comments

Comments
 (0)