1111
1212import org .apache .lucene .index .IndexFileNames ;
1313import org .apache .lucene .tests .util .English ;
14+ import org .elasticsearch .ElasticsearchException ;
1415import org .elasticsearch .action .ActionFuture ;
1516import org .elasticsearch .action .DocWriteResponse ;
1617import org .elasticsearch .action .admin .cluster .health .ClusterHealthResponse ;
3637import org .elasticsearch .env .NodeEnvironment ;
3738import org .elasticsearch .index .IndexService ;
3839import org .elasticsearch .index .IndexSettings ;
40+ import org .elasticsearch .index .engine .Engine ;
3941import org .elasticsearch .index .seqno .ReplicationTracker ;
4042import org .elasticsearch .index .seqno .RetentionLease ;
4143import org .elasticsearch .index .shard .IndexEventListener ;
4244import org .elasticsearch .index .shard .IndexShard ;
4345import org .elasticsearch .index .shard .IndexShardState ;
4446import org .elasticsearch .index .shard .ShardId ;
47+ import org .elasticsearch .indices .IndexingMemoryController ;
48+ import org .elasticsearch .indices .IndicesService ;
4549import org .elasticsearch .indices .recovery .PeerRecoveryTargetService ;
4650import org .elasticsearch .indices .recovery .RecoveryFileChunkRequest ;
4751import org .elasticsearch .plugins .Plugin ;
@@ -161,10 +165,88 @@ public void testSimpleRelocationNoIndexing() {
161165 assertHitCount (prepareSearch ("test" ).setSize (0 ), 20 );
162166 }
163167
168+ // This tests that relocation can successfully suspend index throttling to grab
169+ // indexing permits required for relocation to succeed.
170+ public void testSimpleRelocationWithIndexingPaused () throws Exception {
171+ logger .info ("--> starting [node1] ..." );
172+ // Start node with PAUSE_INDEXING_ON_THROTTLE setting set to true. This means that if we activate
173+ // index throttling for a shard on this node, it will pause indexing for that shard until throttling
174+ // is deactivated.
175+ final String node_1 = internalCluster ().startNode (
176+ Settings .builder ().put (IndexingMemoryController .PAUSE_INDEXING_ON_THROTTLE .getKey (), true )
177+ );
178+
179+ logger .info ("--> creating test index ..." );
180+ prepareCreate ("test" , indexSettings (1 , 0 )).get ();
181+
182+ logger .info ("--> index docs" );
183+ int numDocs = between (1 , 10 );
184+ for (int i = 0 ; i < numDocs ; i ++) {
185+ prepareIndex ("test" ).setId (Integer .toString (i )).setSource ("field" , "value" + i ).get ();
186+ }
187+ logger .info ("--> flush so we have an actual index" );
188+ indicesAdmin ().prepareFlush ().get ();
189+
190+ logger .info ("--> verifying count" );
191+ indicesAdmin ().prepareRefresh ().get ();
192+ assertHitCount (prepareSearch ("test" ).setSize (0 ), numDocs );
193+
194+ logger .info ("--> start another node" );
195+ final String node_2 = internalCluster ().startNode ();
196+ ClusterHealthResponse clusterHealthResponse = clusterAdmin ().prepareHealth (TEST_REQUEST_TIMEOUT )
197+ .setWaitForEvents (Priority .LANGUID )
198+ .setWaitForNodes ("2" )
199+ .get ();
200+ assertThat (clusterHealthResponse .isTimedOut (), equalTo (false ));
201+
202+ // Activate index throttling on "test" index primary shard
203+ IndicesService indicesService = internalCluster ().getInstance (IndicesService .class , node_1 );
204+ IndexShard shard = indicesService .indexServiceSafe (resolveIndex ("test" )).getShard (0 );
205+ shard .activateThrottling ();
206+ // Verify that indexing is paused for the throttled shard
207+ Engine engine = shard .getEngineOrNull ();
208+ assertThat (engine != null && engine .isThrottled (), equalTo (true ));
209+
210+ // Try to index a document into the "test" index which is currently throttled
211+ logger .info ("--> Try to index a doc while indexing is paused" );
212+ IndexRequestBuilder indexRequestBuilder = prepareIndex ("test" ).setId (Integer .toString (20 )).setSource ("field" , "value" + 20 );
213+ var future = indexRequestBuilder .execute ();
214+ expectThrows (ElasticsearchException .class , () -> future .actionGet (500 , TimeUnit .MILLISECONDS ));
215+ // Verify that the new document has not been indexed indicating that the indexing thread is paused.
216+ logger .info ("--> verifying count is unchanged..." );
217+ indicesAdmin ().prepareRefresh ().get ();
218+ assertHitCount (prepareSearch ("test" ).setSize (0 ), numDocs );
219+
220+ logger .info ("--> relocate the shard from node1 to node2" );
221+ updateIndexSettings (Settings .builder ().put ("index.routing.allocation.include._name" , node_2 ), "test" );
222+ ensureGreen (ACCEPTABLE_RELOCATION_TIME , "test" );
223+
224+ // Relocation will suspend throttling for the paused shard, allow the indexing thread to proceed, thereby releasing
225+ // the indexing permit it holds, in turn allowing relocation to acquire the permits and proceed.
226+ clusterHealthResponse = clusterAdmin ().prepareHealth (TEST_REQUEST_TIMEOUT )
227+ .setWaitForEvents (Priority .LANGUID )
228+ .setWaitForNoRelocatingShards (true )
229+ .setTimeout (ACCEPTABLE_RELOCATION_TIME )
230+ .get ();
231+ assertThat (clusterHealthResponse .isTimedOut (), equalTo (false ));
232+
233+ logger .info ("--> verifying shard primary has relocated ..." );
234+ indicesService = internalCluster ().getInstance (IndicesService .class , node_2 );
235+ shard = indicesService .indexServiceSafe (resolveIndex ("test" )).getShard (0 );
236+ assertThat (shard .routingEntry ().primary (), equalTo (true ));
237+ engine = shard .getEngineOrNull ();
238+ assertThat (engine != null && engine .isThrottled (), equalTo (false ));
239+ logger .info ("--> verifying count after relocation ..." );
240+ future .actionGet ();
241+ indicesAdmin ().prepareRefresh ().get ();
242+ assertHitCount (prepareSearch ("test" ).setSize (0 ), numDocs + 1 );
243+ }
244+
164245 public void testRelocationWhileIndexingRandom () throws Exception {
165246 int numberOfRelocations = scaledRandomIntBetween (1 , rarely () ? 10 : 4 );
166247 int numberOfReplicas = randomBoolean () ? 0 : 1 ;
167248 int numberOfNodes = numberOfReplicas == 0 ? 2 : 3 ;
249+ boolean throttleIndexing = randomBoolean ();
168250
169251 logger .info (
170252 "testRelocationWhileIndexingRandom(numRelocations={}, numberOfReplicas={}, numberOfNodes={})" ,
@@ -173,16 +255,22 @@ public void testRelocationWhileIndexingRandom() throws Exception {
173255 numberOfNodes
174256 );
175257
258+ // Randomly use pause throttling vs lock throttling, to verify that relocations proceed regardless
176259 String [] nodes = new String [numberOfNodes ];
177260 logger .info ("--> starting [node1] ..." );
178- nodes [0 ] = internalCluster ().startNode ();
261+ nodes [0 ] = internalCluster ().startNode (
262+ Settings .builder ().put (IndexingMemoryController .PAUSE_INDEXING_ON_THROTTLE .getKey (), randomBoolean ())
263+ );
179264
180265 logger .info ("--> creating test index ..." );
181266 prepareCreate ("test" , indexSettings (1 , numberOfReplicas )).get ();
182267
268+ // Randomly use pause throttling vs lock throttling, to verify that relocations proceed regardless
183269 for (int i = 2 ; i <= numberOfNodes ; i ++) {
184270 logger .info ("--> starting [node{}] ..." , i );
185- nodes [i - 1 ] = internalCluster ().startNode ();
271+ nodes [i - 1 ] = internalCluster ().startNode (
272+ Settings .builder ().put (IndexingMemoryController .PAUSE_INDEXING_ON_THROTTLE .getKey (), randomBoolean ())
273+ );
186274 if (i != numberOfNodes ) {
187275 ClusterHealthResponse healthResponse = clusterAdmin ().prepareHealth (TEST_REQUEST_TIMEOUT )
188276 .setWaitForEvents (Priority .LANGUID )
@@ -200,17 +288,37 @@ public void testRelocationWhileIndexingRandom() throws Exception {
200288 logger .info ("--> {} docs indexed" , numDocs );
201289
202290 logger .info ("--> starting relocations..." );
203- int nodeShiftBased = numberOfReplicas ; // if we have replicas shift those
291+
292+ // When we have a replica, the primary is on node 0 and replica is on node 1. We cannot move primary
293+ // to a node containing the replica, so relocation of primary needs to happen between node 0 and 2.
294+ // When there is no replica, we only have 2 nodes and primary relocates back and forth between node 0 and 1.
204295 for (int i = 0 ; i < numberOfRelocations ; i ++) {
205296 int fromNode = (i % 2 );
206297 int toNode = fromNode == 0 ? 1 : 0 ;
207- fromNode += nodeShiftBased ;
208- toNode += nodeShiftBased ;
298+ if (numberOfReplicas == 1 ) {
299+ fromNode = fromNode == 1 ? 2 : 0 ;
300+ toNode = toNode == 1 ? 2 : 0 ;
301+ }
302+
209303 numDocs = scaledRandomIntBetween (200 , 1000 );
304+
305+ // Throttle indexing on primary shard
306+ if (throttleIndexing ) {
307+ IndicesService indicesService = internalCluster ().getInstance (IndicesService .class , nodes [fromNode ]);
308+ IndexShard shard = indicesService .indexServiceSafe (resolveIndex ("test" )).getShard (0 );
309+ // Activate index throttling on "test" index primary shard
310+ logger .info ("--> activate throttling for shard on node {}..." , nodes [fromNode ]);
311+ shard .activateThrottling ();
312+ // Verify that indexing is throttled for this shard
313+ Engine engine = shard .getEngineOrNull ();
314+ assertThat (engine != null && engine .isThrottled (), equalTo (true ));
315+ }
210316 logger .debug ("--> Allow indexer to index [{}] documents" , numDocs );
211317 indexer .continueIndexing (numDocs );
212318 logger .info ("--> START relocate the shard from {} to {}" , nodes [fromNode ], nodes [toNode ]);
319+
213320 ClusterRerouteUtils .reroute (client (), new MoveAllocationCommand ("test" , 0 , nodes [fromNode ], nodes [toNode ]));
321+
214322 if (rarely ()) {
215323 logger .debug ("--> flushing" );
216324 indicesAdmin ().prepareFlush ().get ();
@@ -219,11 +327,13 @@ public void testRelocationWhileIndexingRandom() throws Exception {
219327 .setWaitForEvents (Priority .LANGUID )
220328 .setWaitForNoRelocatingShards (true )
221329 .setTimeout (ACCEPTABLE_RELOCATION_TIME )
330+ .setWaitForGreenStatus ()
222331 .get ();
223332 assertThat (clusterHealthResponse .isTimedOut (), equalTo (false ));
224333 indexer .pauseIndexing ();
225334 logger .info ("--> DONE relocate the shard from {} to {}" , fromNode , toNode );
226335 }
336+
227337 logger .info ("--> done relocations" );
228338 logger .info ("--> waiting for indexing threads to stop ..." );
229339 indexer .stopAndAwaitStopped ();
0 commit comments