Skip to content

Commit 7df8dcf

Browse files
committed
Handle delete document level failures (#46100)
Today we assume that document failures can not occur for deletes. This assumption is bogus, as they can fail for a variety of reasons such as the Lucene index having reached the document limit. Because of this assumption, we were asserting that such a document-level failure would never happen. When this bogus assertion is violated, we fail the node, a catastrophe. Instead, we need to treat this as a fatal engine exception.
1 parent 43a22da commit 7df8dcf

File tree

2 files changed

+62
-25
lines changed

2 files changed

+62
-25
lines changed

server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@
9292
import java.util.Collection;
9393
import java.util.HashMap;
9494
import java.util.List;
95+
import java.util.Locale;
9596
import java.util.Map;
9697
import java.util.Objects;
9798
import java.util.Optional;
@@ -1464,14 +1465,21 @@ private DeleteResult deleteInLucene(Delete delete, DeletionStrategy plan) throws
14641465
}
14651466
return new DeleteResult(
14661467
plan.versionOfDeletion, getPrimaryTerm(), plan.seqNoOfDeletion, plan.currentlyDeleted == false);
1467-
} catch (Exception ex) {
1468-
if (indexWriter.getTragicException() == null) {
1469-
// there is no tragic event and such it must be a document level failure
1470-
return new DeleteResult(
1471-
ex, plan.versionOfDeletion, delete.primaryTerm(), plan.seqNoOfDeletion, plan.currentlyDeleted == false);
1472-
} else {
1473-
throw ex;
1468+
} catch (final Exception ex) {
1469+
/*
1470+
* Document level failures when deleting are unexpected, we likely hit something fatal such as the Lucene index being corrupt,
1471+
* or the Lucene document limit. We have already issued a sequence number here so this is fatal, fail the engine.
1472+
*/
1473+
if (ex instanceof AlreadyClosedException == false && indexWriter.getTragicException() == null) {
1474+
final String reason = String.format(
1475+
Locale.ROOT,
1476+
"delete id[%s] origin [%s] seq#[%d] failed at the document level",
1477+
delete.id(),
1478+
delete.origin(),
1479+
delete.seqNo());
1480+
failEngine(reason, ex);
14741481
}
1482+
throw ex;
14751483
}
14761484
}
14771485

server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java

Lines changed: 47 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@
7777
import org.elasticsearch.cluster.routing.ShardRouting;
7878
import org.elasticsearch.cluster.routing.ShardRoutingState;
7979
import org.elasticsearch.cluster.routing.TestShardRouting;
80+
import org.elasticsearch.common.CheckedBiConsumer;
8081
import org.elasticsearch.common.CheckedRunnable;
8182
import org.elasticsearch.common.Randomness;
8283
import org.elasticsearch.common.Strings;
@@ -3318,8 +3319,8 @@ public void testHandleDocumentFailure() throws Exception {
33183319
AtomicReference<ThrowingIndexWriter> throwingIndexWriter = new AtomicReference<>();
33193320
try (InternalEngine engine = createEngine(defaultSettings, store, createTempDir(), NoMergePolicy.INSTANCE,
33203321
(directory, iwc) -> {
3321-
throwingIndexWriter.set(new ThrowingIndexWriter(directory, iwc));
3322-
return throwingIndexWriter.get();
3322+
throwingIndexWriter.set(new ThrowingIndexWriter(directory, iwc));
3323+
return throwingIndexWriter.get();
33233324
})
33243325
) {
33253326
// test document failure while indexing
@@ -3343,22 +3344,6 @@ public void testHandleDocumentFailure() throws Exception {
33433344
assertNotNull(indexResult.getTranslogLocation());
33443345
engine.index(indexForDoc(doc2));
33453346

3346-
// test failure while deleting
3347-
// all these simulated exceptions are not fatal to the IW so we treat them as document failures
3348-
final Engine.DeleteResult deleteResult;
3349-
if (randomBoolean()) {
3350-
throwingIndexWriter.get().setThrowFailure(() -> new IOException("simulated"));
3351-
deleteResult = engine.delete(new Engine.Delete("test", "1", newUid(doc1), primaryTerm.get()));
3352-
assertThat(deleteResult.getFailure(), instanceOf(IOException.class));
3353-
} else {
3354-
throwingIndexWriter.get().setThrowFailure(() -> new IllegalArgumentException("simulated max token length"));
3355-
deleteResult = engine.delete(new Engine.Delete("test", "1", newUid(doc1), primaryTerm.get()));
3356-
assertThat(deleteResult.getFailure(),
3357-
instanceOf(IllegalArgumentException.class));
3358-
}
3359-
assertThat(deleteResult.getVersion(), equalTo(2L));
3360-
assertThat(deleteResult.getSeqNo(), equalTo(3L));
3361-
33623347
// test non document level failure is thrown
33633348
if (randomBoolean()) {
33643349
// simulate close by corruption
@@ -5815,4 +5800,48 @@ public long addDocument(Iterable<? extends IndexableField> doc) throws IOExcepti
58155800
}
58165801
}
58175802

5803+
public void testDeleteFailureSoftDeletesEnabledDocAlreadyDeleted() throws IOException {
5804+
runTestDeleteFailure(true, InternalEngine::delete);
5805+
}
5806+
5807+
public void testDeleteFailureSoftDeletesEnabled() throws IOException {
5808+
runTestDeleteFailure(true, (engine, op) -> {});
5809+
}
5810+
5811+
public void testDeleteFailureSoftDeletesDisabled() throws IOException {
5812+
runTestDeleteFailure(false, (engine, op) -> {});
5813+
}
5814+
5815+
private void runTestDeleteFailure(
5816+
final boolean softDeletesEnabled,
5817+
final CheckedBiConsumer<InternalEngine, Engine.Delete, IOException> consumer) throws IOException {
5818+
engine.close();
5819+
final Settings settings = Settings.builder()
5820+
.put(defaultSettings.getSettings())
5821+
.put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), softDeletesEnabled).build();
5822+
final IndexSettings indexSettings = IndexSettingsModule.newIndexSettings(
5823+
IndexMetaData.builder(defaultSettings.getIndexMetaData()).settings(settings).build());
5824+
final AtomicReference<ThrowingIndexWriter> iw = new AtomicReference<>();
5825+
try (Store store = createStore();
5826+
InternalEngine engine = createEngine(
5827+
(dir, iwc) -> {
5828+
iw.set(new ThrowingIndexWriter(dir, iwc));
5829+
return iw.get();
5830+
},
5831+
null,
5832+
null,
5833+
config(indexSettings, store, createTempDir(), NoMergePolicy.INSTANCE, null))) {
5834+
engine.index(new Engine.Index(newUid("0"), primaryTerm.get(), InternalEngineTests.createParsedDoc("0", null)));
5835+
final Engine.Delete op = new Engine.Delete("_doc", "0", newUid("0"), primaryTerm.get());
5836+
consumer.accept(engine, op);
5837+
iw.get().setThrowFailure(() -> new IllegalArgumentException("fatal"));
5838+
final IllegalArgumentException e = expectThrows(IllegalArgumentException. class, () -> engine.delete(op));
5839+
assertThat(e.getMessage(), equalTo("fatal"));
5840+
assertTrue(engine.isClosed.get());
5841+
assertThat(engine.failedEngine.get(), not(nullValue()));
5842+
assertThat(engine.failedEngine.get(), instanceOf(IllegalArgumentException.class));
5843+
assertThat(engine.failedEngine.get().getMessage(), equalTo("fatal"));
5844+
}
5845+
}
5846+
58185847
}

0 commit comments

Comments
 (0)