Skip to content

Commit 3cb2144

Browse files
authored
Isolate Real-Time Get from Translog Assertion Logic (#121161) (#121222)
This commit refactors the logic introduced in #121092, ensuring that translog assertion logic related to inference fields resides exclusively within `TranslogOperationAsserter`. For mappings that contain inference fields, we regenerate the metadata field's content during peer recovery. This can create discrepancies between the original and regenerated sources, which is expected since indexed values may differ from the original. This change ensures that we compare the "synthetic" version of index operations in the same way we do for mappings with synthetic source enabled. In contrast, real-time get (`realtime-get`) simply filters inference fields from the original source, eliminating the need to synthesize them as we do for synthetic sources.
1 parent 9833ca5 commit 3cb2144

File tree

4 files changed

+42
-31
lines changed

4 files changed

+42
-31
lines changed

server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -825,7 +825,8 @@ private GetResult getFromTranslog(
825825
mappingLookup,
826826
documentParser,
827827
config(),
828-
translogInMemorySegmentsCount::incrementAndGet
828+
translogInMemorySegmentsCount::incrementAndGet,
829+
false
829830
);
830831
final Searcher searcher = new Searcher(
831832
"realtime_get",

server/src/main/java/org/elasticsearch/index/engine/TranslogDirectoryReader.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,8 @@ static DirectoryReader create(
8787
MappingLookup mappingLookup,
8888
DocumentParser documentParser,
8989
EngineConfig engineConfig,
90-
Runnable onSegmentCreated
90+
Runnable onSegmentCreated,
91+
boolean forceSynthetic
9192
) throws IOException {
9293
final Directory directory = new ByteBuffersDirectory();
9394
boolean success = false;
@@ -96,7 +97,7 @@ static DirectoryReader create(
9697
// When using synthetic source, the translog operation must always be reindexed into an in-memory Lucene to ensure consistent
9798
// output for realtime-get operations. However, this can degrade the performance of realtime-get and update operations.
9899
// If slight inconsistencies in realtime-get operations are acceptable, the translog operation can be reindexed lazily.
99-
if (mappingLookup.isSourceSynthetic() || mappingLookup.inferenceFields().isEmpty() == false) {
100+
if (mappingLookup.isSourceSynthetic() || forceSynthetic) {
100101
onSegmentCreated.run();
101102
leafReader = createInMemoryReader(shardId, engineConfig, directory, documentParser, mappingLookup, false, operation);
102103
} else {

server/src/main/java/org/elasticsearch/index/engine/TranslogOperationAsserter.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ static Translog.Index synthesizeSource(EngineConfig engineConfig, Translog.Index
5252
final ShardId shardId = engineConfig.getShardId();
5353
final MappingLookup mappingLookup = engineConfig.getMapperService().mappingLookup();
5454
final DocumentParser documentParser = engineConfig.getMapperService().documentParser();
55-
try (var reader = TranslogDirectoryReader.create(shardId, op, mappingLookup, documentParser, engineConfig, () -> {})) {
55+
try (var reader = TranslogDirectoryReader.create(shardId, op, mappingLookup, documentParser, engineConfig, () -> {}, true)) {
5656
final Engine.Searcher searcher = new Engine.Searcher(
5757
"assert_translog",
5858
reader,

x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterIT.java

Lines changed: 36 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,9 @@
2020
import org.elasticsearch.action.update.UpdateRequestBuilder;
2121
import org.elasticsearch.cluster.metadata.IndexMetadata;
2222
import org.elasticsearch.common.settings.Settings;
23-
import org.elasticsearch.index.IndexVersion;
23+
import org.elasticsearch.index.IndexSettings;
2424
import org.elasticsearch.index.mapper.InferenceMetadataFieldsMapper;
25+
import org.elasticsearch.index.mapper.SourceFieldMapper;
2526
import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
2627
import org.elasticsearch.inference.SimilarityMeasure;
2728
import org.elasticsearch.plugins.Plugin;
@@ -49,14 +50,21 @@ public class ShardBulkInferenceActionFilterIT extends ESIntegTestCase {
4950
public static final String INDEX_NAME = "test-index";
5051

5152
private final boolean useLegacyFormat;
53+
private final boolean useSyntheticSource;
5254

53-
public ShardBulkInferenceActionFilterIT(boolean useLegacyFormat) {
55+
public ShardBulkInferenceActionFilterIT(boolean useLegacyFormat, boolean useSyntheticSource) {
5456
this.useLegacyFormat = useLegacyFormat;
57+
this.useSyntheticSource = useSyntheticSource;
5558
}
5659

5760
@ParametersFactory
5861
public static Iterable<Object[]> parameters() throws Exception {
59-
return List.of(new Object[] { true }, new Object[] { false });
62+
return List.of(
63+
new Object[] { true, false },
64+
new Object[] { true, true },
65+
new Object[] { false, false },
66+
new Object[] { false, true }
67+
);
6068
}
6169

6270
@Before
@@ -79,37 +87,38 @@ protected Collection<Class<? extends Plugin>> nodePlugins() {
7987

8088
@Override
8189
public Settings indexSettings() {
82-
return Settings.builder()
83-
.put(IndexMetadata.SETTING_VERSION_CREATED, IndexVersion.current())
90+
var builder = Settings.builder()
8491
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, randomIntBetween(1, 10))
85-
.put(InferenceMetadataFieldsMapper.USE_LEGACY_SEMANTIC_TEXT_FORMAT.getKey(), useLegacyFormat)
86-
.build();
92+
.put(InferenceMetadataFieldsMapper.USE_LEGACY_SEMANTIC_TEXT_FORMAT.getKey(), useLegacyFormat);
93+
if (useSyntheticSource) {
94+
builder.put(IndexSettings.RECOVERY_USE_SYNTHETIC_SOURCE_SETTING.getKey(), true);
95+
builder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), SourceFieldMapper.Mode.SYNTHETIC.name());
96+
}
97+
return builder.build();
8798
}
8899

89100
public void testBulkOperations() throws Exception {
90-
indicesAdmin().prepareCreate(INDEX_NAME)
91-
.setMapping(
92-
String.format(
93-
Locale.ROOT,
94-
"""
95-
{
96-
"properties": {
97-
"sparse_field": {
98-
"type": "semantic_text",
99-
"inference_id": "%s"
100-
},
101-
"dense_field": {
102-
"type": "semantic_text",
103-
"inference_id": "%s"
104-
}
101+
prepareCreate(INDEX_NAME).setMapping(
102+
String.format(
103+
Locale.ROOT,
104+
"""
105+
{
106+
"properties": {
107+
"sparse_field": {
108+
"type": "semantic_text",
109+
"inference_id": "%s"
110+
},
111+
"dense_field": {
112+
"type": "semantic_text",
113+
"inference_id": "%s"
105114
}
106115
}
107-
""",
108-
TestSparseInferenceServiceExtension.TestInferenceService.NAME,
109-
TestDenseInferenceServiceExtension.TestInferenceService.NAME
110-
)
116+
}
117+
""",
118+
TestSparseInferenceServiceExtension.TestInferenceService.NAME,
119+
TestDenseInferenceServiceExtension.TestInferenceService.NAME
111120
)
112-
.get();
121+
).get();
113122

114123
int totalBulkReqs = randomIntBetween(2, 100);
115124
long totalDocs = 0;

0 commit comments

Comments
 (0)