Skip to content

Commit c701c5b

Browse files
Fix race condition in multi-shard rollup start/stop tests (#1529)
The rollup start/stop tests with multiple shards were experiencing race conditions leading to version conflicts. The issue occurred when: 1. Test reads rollup document (seqNo = N) 2. Active rollup runner updates the document (seqNo = N+1) 3. Test tries to update with seqNo = N → 409 Version Conflict The stop/start actions perform two sequential updates: - First: Update rollup metadata status - Second: Enable/disable the rollup job The fix moves the _stop and _start API calls inside the waitFor block, ensuring automatic retries on version conflicts. This is consistent with the pattern already used in other rollup and transform tests. Fixed tests: - RestStopRollupActionIT: test stop rollup when multiple shards configured for IM config index - RestStartRollupActionIT: test start rollup when multiple shards configured for IM config index Signed-off-by: bowenlan-amzn <[email protected]>
1 parent 39b856d commit c701c5b

File tree

2 files changed

+16
-9
lines changed

2 files changed

+16
-9
lines changed

src/test/kotlin/org/opensearch/indexmanagement/rollup/resthandler/RestStartRollupActionIT.kt

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -245,12 +245,14 @@ class RestStartRollupActionIT : RollupRestAPITestCase() {
245245
val updatedRollup = getRollup(rollupId = rollup.id)
246246
val metadata = getRollupMetadataWithRoutingId(rollup.id, updatedRollup.metadataID!!)
247247
assertEquals("Status should be failed", RollupMetadata.Status.FAILED, metadata.status)
248-
}
249248

250-
val response = client().makeRequest("POST", "${IndexManagementPlugin.ROLLUP_JOBS_BASE_URI}/${rollup.id}/_start")
251-
assertEquals("Start rollup failed", RestStatus.OK, response.restStatus())
252-
val expectedResponse = mapOf("acknowledged" to true)
253-
assertEquals(expectedResponse, response.asMap())
249+
// There are two calls to _start happening serially which is prone to version conflicts with multiple shards
250+
// so including it in a waitFor to ensure it can retry a few times
251+
val response = client().makeRequest("POST", "${IndexManagementPlugin.ROLLUP_JOBS_BASE_URI}/${rollup.id}/_start")
252+
assertEquals("Start rollup failed", RestStatus.OK, response.restStatus())
253+
val expectedResponse = mapOf("acknowledged" to true)
254+
assertEquals(expectedResponse, response.asMap())
255+
}
254256

255257
val updatedRollup = getRollup(rollup.id)
256258
assertTrue("Rollup was not enabled", updatedRollup.enabled)

src/test/kotlin/org/opensearch/indexmanagement/rollup/resthandler/RestStopRollupActionIT.kt

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -305,11 +305,16 @@ class RestStopRollupActionIT : RollupRestAPITestCase() {
305305
waitFor {
306306
val rollupJob = getRollup(rollupId = rollup.id)
307307
assertNotNull("Rollup job doesn't have metadata set", rollupJob.metadataID)
308+
val rollupMetadata = getRollupMetadataWithRoutingId(rollup.id, rollupJob.metadataID!!)
309+
assertEquals("Rollup is not STARTED", RollupMetadata.Status.STARTED, rollupMetadata.status)
310+
311+
// There are two calls to _stop happening serially which is prone to version conflicts during an ongoing job
312+
// so including it in a waitFor to ensure it can retry a few times
313+
val response = client().makeRequest("POST", "$ROLLUP_JOBS_BASE_URI/${rollup.id}/_stop")
314+
assertEquals("Stop rollup failed", RestStatus.OK, response.restStatus())
315+
val expectedResponse = mapOf("acknowledged" to true)
316+
assertEquals(expectedResponse, response.asMap())
308317
}
309-
val response = client().makeRequest("POST", "$ROLLUP_JOBS_BASE_URI/${rollup.id}/_stop")
310-
assertEquals("Stop rollup failed", RestStatus.OK, response.restStatus())
311-
val expectedResponse = mapOf("acknowledged" to true)
312-
assertEquals(expectedResponse, response.asMap())
313318

314319
val updatedRollup = getRollup(rollup.id)
315320
assertFalse("Rollup was not disabled", updatedRollup.enabled)

0 commit comments

Comments
 (0)