Skip to content

Commit a2254b4

Browse files
authored
Fix off-by-two with task action retry count. (#18755)
The property druid.peon.taskActionClient.retry.maxRetryCount was being decremented prior to being passed to StandardRetryPolicy as the value of maxAttempts. It should actually be incremented: if there are N retries that means N + 1 total attempts.
1 parent 0ee6855 commit a2254b4

File tree

2 files changed

+103
-5
lines changed

2 files changed

+103
-5
lines changed

indexing-service/src/main/java/org/apache/druid/indexing/common/actions/RemoteTaskActionClientFactory.java

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import org.apache.druid.rpc.StandardRetryPolicy;
3434

3535
/**
36+
*
3637
*/
3738
public class RemoteTaskActionClientFactory implements TaskActionClientFactory
3839
{
@@ -50,11 +51,7 @@ public RemoteTaskActionClientFactory(
5051
this.overlordClient = clientFactory.makeClient(
5152
NodeRole.OVERLORD.toString(),
5253
serviceLocator,
53-
StandardRetryPolicy.builder()
54-
.maxAttempts(retryPolicyConfig.getMaxRetryCount() - 1)
55-
.minWaitMillis(retryPolicyConfig.getMinWait().toStandardDuration().getMillis())
56-
.maxWaitMillis(retryPolicyConfig.getMaxWait().toStandardDuration().getMillis())
57-
.build()
54+
buildRetryPolicy(retryPolicyConfig)
5855
);
5956
this.jsonMapper = jsonMapper;
6057
}
@@ -64,4 +61,20 @@ public TaskActionClient create(Task task)
6461
{
6562
return new RemoteTaskActionClient(task, overlordClient, jsonMapper);
6663
}
64+
65+
/**
66+
* Converts a {@link RetryPolicyConfig} to a {@link StandardRetryPolicy}.
67+
*
68+
* @param retryPolicyConfig the retry policy configuration
69+
*
70+
* @return the standard retry policy
71+
*/
72+
static StandardRetryPolicy buildRetryPolicy(final RetryPolicyConfig retryPolicyConfig)
73+
{
74+
return StandardRetryPolicy.builder()
75+
.maxAttempts(retryPolicyConfig.getMaxRetryCount() + 1)
76+
.minWaitMillis(retryPolicyConfig.getMinWait().toStandardDuration().getMillis())
77+
.maxWaitMillis(retryPolicyConfig.getMaxWait().toStandardDuration().getMillis())
78+
.build();
79+
}
6780
}
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.druid.indexing.common.actions;
21+
22+
import org.apache.druid.indexing.common.RetryPolicyConfig;
23+
import org.apache.druid.rpc.StandardRetryPolicy;
24+
import org.joda.time.Period;
25+
import org.junit.Assert;
26+
import org.junit.Test;
27+
28+
public class RemoteTaskActionClientFactoryTest
29+
{
30+
@Test
31+
public void test_buildRetryPolicy_withDefaultConfig()
32+
{
33+
final RetryPolicyConfig config = new RetryPolicyConfig();
34+
final StandardRetryPolicy retryPolicy = RemoteTaskActionClientFactory.buildRetryPolicy(config);
35+
36+
// Default maxRetryCount is 13, so maxAttempts should be 14 (13 retries + 1 initial attempt)
37+
Assert.assertEquals(14, retryPolicy.maxAttempts());
38+
39+
// Default minWait is PT5S (5 seconds)
40+
Assert.assertEquals(5000, retryPolicy.minWaitMillis());
41+
42+
// Default maxWait is PT1M (1 minute)
43+
Assert.assertEquals(60000, retryPolicy.maxWaitMillis());
44+
}
45+
46+
@Test
47+
public void test_buildRetryPolicy_withCustomConfig()
48+
{
49+
final RetryPolicyConfig config = new RetryPolicyConfig()
50+
.setMaxRetryCount(5)
51+
.setMinWait(new Period("PT10S"))
52+
.setMaxWait(new Period("PT2M"));
53+
54+
final StandardRetryPolicy retryPolicy = RemoteTaskActionClientFactory.buildRetryPolicy(config);
55+
56+
// maxRetryCount is 5, so maxAttempts should be 6 (5 retries + 1 initial attempt)
57+
Assert.assertEquals(6, retryPolicy.maxAttempts());
58+
59+
// minWait is PT10S (10 seconds)
60+
Assert.assertEquals(10000, retryPolicy.minWaitMillis());
61+
62+
// maxWait is PT2M (2 minutes)
63+
Assert.assertEquals(120000, retryPolicy.maxWaitMillis());
64+
}
65+
66+
@Test
67+
public void test_buildRetryPolicy_withZeroRetries()
68+
{
69+
final RetryPolicyConfig config = new RetryPolicyConfig()
70+
.setMaxRetryCount(0)
71+
.setMinWait(new Period("PT1S"))
72+
.setMaxWait(new Period("PT30S"));
73+
74+
final StandardRetryPolicy retryPolicy = RemoteTaskActionClientFactory.buildRetryPolicy(config);
75+
76+
// maxRetryCount is 0, so maxAttempts should be 1 (0 retries + 1 initial attempt)
77+
Assert.assertEquals(1, retryPolicy.maxAttempts());
78+
79+
// minWait is PT1S (1 second)
80+
Assert.assertEquals(1000, retryPolicy.minWaitMillis());
81+
82+
// maxWait is PT30S (30 seconds)
83+
Assert.assertEquals(30000, retryPolicy.maxWaitMillis());
84+
}
85+
}

0 commit comments

Comments
 (0)