2626import org .apache .druid .indexing .overlord .supervisor .SupervisorStatus ;
2727import org .apache .druid .indexing .seekablestream .supervisor .autoscaler .CostBasedAutoScalerConfig ;
2828import org .apache .druid .java .util .common .StringUtils ;
29- import org .apache .druid .query .DruidMetrics ;
3029import org .apache .druid .testing .embedded .EmbeddedBroker ;
3130import org .apache .druid .testing .embedded .EmbeddedClusterApis ;
3231import org .apache .druid .testing .embedded .EmbeddedCoordinator ;
3534import org .apache .druid .testing .embedded .EmbeddedIndexer ;
3635import org .apache .druid .testing .embedded .EmbeddedOverlord ;
3736import org .apache .druid .testing .embedded .EmbeddedRouter ;
38- import org .apache .druid .testing .embedded .emitter .LatchableEmitterModule ;
3937import org .apache .druid .testing .embedded .indexing .MoreResources ;
4038import org .apache .druid .testing .embedded .junit5 .EmbeddedClusterTestBase ;
4139import org .apache .kafka .clients .producer .ProducerRecord ;
4240import org .hamcrest .Matchers ;
4341import org .joda .time .DateTime ;
4442import org .joda .time .DateTimeZone ;
4543import org .junit .jupiter .api .Assertions ;
46- import org .junit .jupiter .api .Disabled ;
4744import org .junit .jupiter .api .Test ;
4845import org .junit .jupiter .api .Timeout ;
4946
5249import java .util .stream .Collectors ;
5350import java .util .stream .IntStream ;
5451
55- import static org .apache .druid .indexing .seekablestream .supervisor .SeekableStreamSupervisor .AUTOSCALER_REQUIRED_TASKS_METRIC ;
56-
5752/**
5853 * Integration test for {@link org.apache.druid.indexing.seekablestream.supervisor.autoscaler.CostBasedAutoScaler}.
5954 * <p>
60- * Tests the autoscaler's ability to compute optimal task counts based
61- * on partition count and cost metrics (lag and idle time).
55+ * Tests the autoscaler's ability to compute optimal task counts based on partition count and cost metrics (lag and idle time).
6256 */
6357public class CostBasedAutoScalerIntegrationTest extends EmbeddedClusterTestBase
6458{
6559 private static final String TOPIC = EmbeddedClusterApis .createTestDatasourceName ();
6660 private static final String EVENT_TEMPLATE = "{\" timestamp\" :\" %s\" ,\" dimension\" :\" value%d\" ,\" metric\" :%d}" ;
67- ;
68- private static final int PARTITION_COUNT = 100 ;
69- private static final int INITIAL_TASK_COUNT = 25 ;
61+ private static final int PARTITION_COUNT = 10 ;
62+ private static final int INITIAL_TASK_COUNT = 10 ;
7063
7164 private final EmbeddedBroker broker = new EmbeddedBroker ();
7265 private final EmbeddedIndexer indexer = new EmbeddedIndexer ();
@@ -87,6 +80,7 @@ public void start()
8780 {
8881 super .start ();
8982 createTopicWithPartitions (TOPIC , PARTITION_COUNT );
83+ produceRecordsToKafka (500 );
9084 }
9185
9286 @ Override
@@ -107,31 +101,27 @@ public void stop()
107101
108102 coordinator .addProperty ("druid.manager.segments.useIncrementalCache" , "ifSynced" );
109103
110- cluster .addExtension (KafkaIndexTaskModule .class )
111- .addExtension (LatchableEmitterModule .class )
112- .useDefaultTimeoutForLatchableEmitter (300 )
113- .addCommonProperty ("druid.emitter" , "latching" )
114- .addCommonProperty ("druid.monitoring.emissionPeriod" , "PT0.1s" )
115- .addResource (kafkaServer )
104+ cluster .useLatchableEmitter ()
116105 .addServer (coordinator )
117106 .addServer (overlord )
118107 .addServer (indexer )
119108 .addServer (broker )
120109 .addServer (historical )
110+ .addExtension (KafkaIndexTaskModule .class )
111+ .addCommonProperty ("druid.monitoring.emissionPeriod" , "PT0.5s" )
112+ .addResource (kafkaServer )
121113 .addServer (new EmbeddedRouter ());
122114
123115 return cluster ;
124116 }
125117
126- @ Disabled
127118 @ Test
128119 @ Timeout (45 )
129120 public void test_autoScaler_computesOptimalTaskCountAndProduceScaleDown ()
130121 {
131- final String supervisorId = dataSource + "_supe " ;
122+ final String superId = dataSource + "_super " ;
132123
133- // Produce some amount of data to kafka, to trigger a 'scale down' decision to 17 tasks.
134- produceRecordsToKafka (50 );
124+ // Produce some amount of data to kafka, to trigger a 'scale down' decision to 4 tasks.
135125
136126 final CostBasedAutoScalerConfig autoScalerConfig = CostBasedAutoScalerConfig
137127 .builder ()
@@ -140,35 +130,27 @@ public void test_autoScaler_computesOptimalTaskCountAndProduceScaleDown()
140130 .taskCountMax (100 )
141131 .taskCountStart (INITIAL_TASK_COUNT )
142132 .metricsCollectionIntervalMillis (3000 )
143- .metricsCollectionRangeMillis (2000 )
144- .scaleActionStartDelayMillis (3000 )
145- .scaleActionPeriodMillis (2000 )
146- .minTriggerScaleActionFrequencyMillis (3000 )
133+ .scaleActionStartDelayMillis (5000 )
134+ .scaleActionPeriodMillis (5000 )
135+ .minTriggerScaleActionFrequencyMillis (5000 )
147136 // Weight configuration: strongly favor lag reduction over idle time
148137 .lagWeight (0.9 )
149138 .idleWeight (0.1 )
150139 .build ();
151140
152- final KafkaSupervisorSpec kafkaSupervisorSpec = createKafkaSupervisorWithAutoScaler (
153- supervisorId ,
154- autoScalerConfig
155- );
141+ final KafkaSupervisorSpec kafkaSupervisorSpec = createKafkaSupervisorWithAutoScaler (superId , autoScalerConfig );
156142
157143 // Submit the supervisor
158- Assertions .assertEquals (
159- supervisorId ,
160- cluster .callApi ().postSupervisor (kafkaSupervisorSpec )
161- );
144+ Assertions .assertEquals (superId , cluster .callApi ().postSupervisor (kafkaSupervisorSpec ));
162145
163146 // Wait for the supervisor to be healthy and running
164- waitForSupervisorRunning (supervisorId );
147+ waitForSupervisorRunning (superId );
165148
166- // Wait for autoscaler to emit optimalTaskCount metric indicating scale-up
167- // We expect the optimal task count to be either 34 or 50.
149+ // Wait for autoscaler to emit optimalTaskCount metric indicating scale-down
150+ // We expect the optimal task count to 2
168151 overlord .latchableEmitter ().waitForEvent (
169- event -> event .hasMetricName (AUTOSCALER_REQUIRED_TASKS_METRIC )
170- .hasDimension (DruidMetrics .DATASOURCE , dataSource )
171- .hasValueMatching (Matchers .equalTo (17L ))
152+ event -> event .hasMetricName ("task/autoScaler/costBased/optimalTaskCount" )
153+ .hasValueMatching (Matchers .equalTo (4L ))
172154 );
173155
174156 // Suspend the supervisor
@@ -177,7 +159,7 @@ public void test_autoScaler_computesOptimalTaskCountAndProduceScaleDown()
177159
178160 private void waitForSupervisorRunning (String supervisorId )
179161 {
180- int maxAttempts = 120 ;
162+ int maxAttempts = 10 ;
181163 int attempt = 0 ;
182164 while (attempt < maxAttempts ) {
183165 SupervisorStatus status = cluster .callApi ().getSupervisorStatus (supervisorId );
@@ -186,7 +168,7 @@ private void waitForSupervisorRunning(String supervisorId)
186168 }
187169 attempt ++;
188170 try {
189- Thread .sleep (3000 );
171+ Thread .sleep (1000 );
190172 }
191173 catch (InterruptedException e ) {
192174 Thread .currentThread ().interrupt ();
@@ -222,7 +204,7 @@ private KafkaSupervisorSpec createKafkaSupervisorWithAutoScaler(
222204 return MoreResources .Supervisor .KAFKA_JSON
223205 .get ()
224206 .withDataSchema (schema -> schema .withTimestamp (new TimestampSpec ("timestamp" , "iso" , null )))
225- .withTuningConfig (tuningConfig -> tuningConfig .withMaxRowsPerSegment (1000 ))
207+ .withTuningConfig (tuningConfig -> tuningConfig .withMaxRowsPerSegment (100 ))
226208 .withIoConfig (
227209 ioConfig -> ioConfig
228210 .withConsumerProperties (kafkaServer .consumerProperties ())
0 commit comments