Skip to content

Commit 59b698d

Browse files
authored
Bulk add to timeline in DataSourceCompactibleSegmentIterator. (#18774)
Calling "addAll" is more efficient than calling "add" in a loop, it is O(N) instead of O(N^2).
1 parent 7b16171 commit 59b698d

File tree

2 files changed

+33
-13
lines changed

2 files changed

+33
-13
lines changed

processing/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -176,14 +176,20 @@ public Set<ObjectType> findNonOvershadowedObjectsInInterval(Interval interval, P
176176
.toSet();
177177
}
178178

179+
/**
180+
* Add a single partition chunk entry to this timeline. Avoid calling this in a loop, since it
181+
* is O(objects in holder) and can therefore creates O(N^2) situations. Instead use {@link #addAll(Iterator)}
182+
* if you have many objects to add.
183+
*/
179184
public void add(final Interval interval, VersionType version, PartitionChunk<ObjectType> object)
180185
{
181186
addAll(Iterators.singletonIterator(new PartitionChunkEntry<>(interval, version, object)));
182187
}
183188

184-
public void addAll(
185-
final Iterator<PartitionChunkEntry<VersionType, ObjectType>> objects
186-
)
189+
/**
190+
* Adds partition chunk entries to this timeline.
191+
*/
192+
public void addAll(final Iterator<PartitionChunkEntry<VersionType, ObjectType>> objects)
187193
{
188194
lock.writeLock().lock();
189195

server/src/main/java/org/apache/druid/server/compaction/DataSourceCompactibleSegmentIterator.java

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@
2020
package org.apache.druid.server.compaction;
2121

2222
import com.google.common.annotations.VisibleForTesting;
23+
import com.google.common.base.Function;
2324
import com.google.common.base.Preconditions;
25+
import com.google.common.collect.Iterators;
2426
import com.google.common.collect.Lists;
2527
import org.apache.druid.java.util.common.DateTimes;
2628
import org.apache.druid.java.util.common.Intervals;
@@ -33,6 +35,7 @@
3335
import org.apache.druid.timeline.Partitions;
3436
import org.apache.druid.timeline.SegmentTimeline;
3537
import org.apache.druid.timeline.TimelineObjectHolder;
38+
import org.apache.druid.timeline.VersionedIntervalTimeline;
3639
import org.apache.druid.timeline.partition.NumberedPartitionChunk;
3740
import org.apache.druid.timeline.partition.NumberedShardSpec;
3841
import org.apache.druid.timeline.partition.PartitionChunk;
@@ -137,18 +140,29 @@ private void populateQueue(SegmentTimeline timeline, List<Interval> skipInterval
137140
final String temporaryVersion = DateTimes.nowUtc().toString();
138141
for (Map.Entry<Interval, Set<DataSegment>> partitionsPerInterval : intervalToPartitionMap.entrySet()) {
139142
Interval interval = partitionsPerInterval.getKey();
140-
int partitionNum = 0;
141143
Set<DataSegment> segmentSet = partitionsPerInterval.getValue();
142144
int partitions = segmentSet.size();
143-
for (DataSegment segment : segmentSet) {
144-
DataSegment segmentsForCompact = segment.withShardSpec(new NumberedShardSpec(partitionNum, partitions));
145-
timelineWithConfiguredSegmentGranularity.add(
146-
interval,
147-
temporaryVersion,
148-
NumberedPartitionChunk.make(partitionNum, partitions, segmentsForCompact)
149-
);
150-
partitionNum += 1;
151-
}
145+
timelineWithConfiguredSegmentGranularity.addAll(
146+
Iterators.transform(
147+
segmentSet.iterator(),
148+
new Function<>()
149+
{
150+
int partitionNum = 0;
151+
152+
@Override
153+
public VersionedIntervalTimeline.PartitionChunkEntry<String, DataSegment> apply(DataSegment segment)
154+
{
155+
final DataSegment segmentForCompact =
156+
segment.withShardSpec(new NumberedShardSpec(partitionNum, partitions));
157+
return new VersionedIntervalTimeline.PartitionChunkEntry<>(
158+
interval,
159+
temporaryVersion,
160+
NumberedPartitionChunk.make(partitionNum++, partitions, segmentForCompact)
161+
);
162+
}
163+
}
164+
)
165+
);
152166
}
153167
// PartitionHolder can only holds chunks of one partition space
154168
// However, partition in the new timeline (timelineWithConfiguredSegmentGranularity) can be hold multiple

0 commit comments

Comments
 (0)