|
58 | 58 | import java.util.HashSet;
|
59 | 59 | import java.util.List;
|
60 | 60 | import java.util.Map;
|
| 61 | +import java.util.Objects; |
61 | 62 | import java.util.Optional;
|
62 | 63 | import java.util.Set;
|
63 | 64 | import java.util.SortedMap;
|
64 | 65 | import java.util.concurrent.ConcurrentHashMap;
|
65 | 66 | import java.util.concurrent.TimeUnit;
|
66 | 67 | import java.util.function.Supplier;
|
| 68 | +import java.util.regex.Matcher; |
67 | 69 | import java.util.regex.Pattern;
|
68 | 70 | import java.util.stream.Collectors;
|
69 | 71 | import java.util.stream.Stream;
|
@@ -202,7 +204,7 @@ protected JobTopology getJobTopology(
|
202 | 204 |
|
203 | 205 | Set<JobVertexID> vertexSet = Set.copyOf(t.getVerticesInTopologicalOrder());
|
204 | 206 | updateVertexList(stateStore, ctx, clock.instant(), vertexSet);
|
205 |
| - updateKafkaSourceMaxParallelisms(ctx, jobDetailsInfo.getJobId(), t); |
| 207 | + updateKafkaPulsarSourceMaxParallelisms(ctx, jobDetailsInfo.getJobId(), t); |
206 | 208 | excludeVerticesFromScaling(ctx.getConfiguration(), t.getFinishedVertices());
|
207 | 209 | return t;
|
208 | 210 | }
|
@@ -247,17 +249,36 @@ protected JobTopology getJobTopology(JobDetailsInfo jobDetailsInfo) {
|
247 | 249 | json, slotSharingGroupIdMap, maxParallelismMap, metrics, finished);
|
248 | 250 | }
|
249 | 251 |
|
250 |
| - private void updateKafkaSourceMaxParallelisms(Context ctx, JobID jobId, JobTopology topology) |
251 |
| - throws Exception { |
| 252 | + private void updateKafkaPulsarSourceMaxParallelisms( |
| 253 | + Context ctx, JobID jobId, JobTopology topology) throws Exception { |
252 | 254 | try (var restClient = ctx.getRestClusterClient()) {
|
253 |
| - var partitionRegex = Pattern.compile("^.*\\.partition\\.\\d+\\.currentOffset$"); |
| 255 | + Pattern partitionRegex = |
| 256 | + Pattern.compile( |
| 257 | + "^.*\\.KafkaSourceReader\\.topic\\.(?<kafkaTopic>.+)\\.partition\\.(?<kafkaId>\\d+)\\.currentOffset$" |
| 258 | + + "|^.*\\.PulsarConsumer\\.(?<pulsarTopic>.+)-partition-(?<pulsarId>\\d+)\\..*\\.numMsgsReceived$"); |
254 | 259 | for (var vertexInfo : topology.getVertexInfos().values()) {
|
255 | 260 | if (vertexInfo.getInputs().isEmpty()) {
|
256 | 261 | var sourceVertex = vertexInfo.getId();
|
257 | 262 | var numPartitions =
|
258 | 263 | queryAggregatedMetricNames(restClient, jobId, sourceVertex).stream()
|
259 |
| - .filter(partitionRegex.asMatchPredicate()) |
260 |
| - .count(); |
| 264 | + .map( |
| 265 | + v -> { |
| 266 | + Matcher matcher = partitionRegex.matcher(v); |
| 267 | + if (matcher.matches()) { |
| 268 | + String kafkaTopic = matcher.group("kafkaTopic"); |
| 269 | + String kafkaId = matcher.group("kafkaId"); |
| 270 | + String pulsarTopic = |
| 271 | + matcher.group("pulsarTopic"); |
| 272 | + String pulsarId = matcher.group("pulsarId"); |
| 273 | + return kafkaTopic != null |
| 274 | + ? kafkaTopic + "-" + kafkaId |
| 275 | + : pulsarTopic + "-" + pulsarId; |
| 276 | + } |
| 277 | + return null; |
| 278 | + }) |
| 279 | + .filter(Objects::nonNull) |
| 280 | + .collect(Collectors.toSet()) |
| 281 | + .size(); |
261 | 282 | if (numPartitions > 0) {
|
262 | 283 | LOG.debug(
|
263 | 284 | "Updating source {} max parallelism based on available partitions to {}",
|
|
0 commit comments