Skip to content

Commit 87dc631

Browse files
authored
[opt](hive) use binary search to prune hive partitions (#58877)
Followup #44586 Enable binary search partition pruning optimization for Hive external tables. This PR adds binary search partition pruning support for Hive tables by: - Adding `getSortedPartitionRanges()` method to `ExternalTable` base class - Maintaining sorted partition ranges directly in `HivePartitionValues` for cache lifecycle consistency - Overriding `getSortedPartitionRanges()` in `HMSExternalTable` to provide sorted ranges **Performance improvement (20000 partitions, 1000 queries):** - Binary search enabled: **4.548 seconds** - Binary search disabled: **12.849 seconds** - **~2.8x faster**
1 parent e06abed commit 87dc631

File tree

6 files changed

+129
-138
lines changed

6 files changed

+129
-138
lines changed

fe/fe-core/src/main/java/org/apache/doris/common/cache/NereidsSortedPartitionsCacheManager.java

Lines changed: 3 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,7 @@
2424
import org.apache.doris.common.Config;
2525
import org.apache.doris.common.ConfigBase.DefaultConfHandler;
2626
import org.apache.doris.datasource.CatalogIf;
27-
import org.apache.doris.nereids.rules.expression.rules.MultiColumnBound;
28-
import org.apache.doris.nereids.rules.expression.rules.PartitionItemToRange;
2927
import org.apache.doris.nereids.rules.expression.rules.SortedPartitionRanges;
30-
import org.apache.doris.nereids.rules.expression.rules.SortedPartitionRanges.PartitionItemAndId;
31-
import org.apache.doris.nereids.rules.expression.rules.SortedPartitionRanges.PartitionItemAndRange;
3228
import org.apache.doris.nereids.trees.plans.algebra.CatalogRelation;
3329
import org.apache.doris.qe.ConnectContext;
3430
import org.apache.doris.qe.SessionVariable;
@@ -37,18 +33,14 @@
3733
import com.github.benmanes.caffeine.cache.Cache;
3834
import com.github.benmanes.caffeine.cache.Caffeine;
3935
import com.google.common.annotations.VisibleForTesting;
40-
import com.google.common.collect.Range;
4136
import lombok.AllArgsConstructor;
4237
import lombok.Data;
43-
import org.apache.hadoop.util.Lists;
4438
import org.apache.logging.log4j.LogManager;
4539
import org.apache.logging.log4j.Logger;
4640

4741
import java.lang.reflect.Field;
4842
import java.time.Duration;
49-
import java.util.List;
5043
import java.util.Map;
51-
import java.util.Map.Entry;
5244
import java.util.Objects;
5345
import java.util.Optional;
5446

@@ -134,35 +126,10 @@ private SortedPartitionRanges<?> loadCache(
134126
}
135127

136128
Map<?, PartitionItem> unsortedMap = table.getOriginPartitions(scan);
137-
List<Entry<?, PartitionItem>> unsortedList = Lists.newArrayList(unsortedMap.entrySet());
138-
List<PartitionItemAndRange<?>> sortedRanges = Lists.newArrayListWithCapacity(unsortedMap.size());
139-
List<PartitionItemAndId<?>> defaultPartitions = Lists.newArrayList();
140-
for (Entry<?, PartitionItem> entry : unsortedList) {
141-
PartitionItem partitionItem = entry.getValue();
142-
Object id = entry.getKey();
143-
if (!partitionItem.isDefaultPartition()) {
144-
List<Range<MultiColumnBound>> ranges = PartitionItemToRange.toRanges(partitionItem);
145-
for (Range<MultiColumnBound> range : ranges) {
146-
sortedRanges.add(new PartitionItemAndRange<>(id, partitionItem, range));
147-
}
148-
} else {
149-
defaultPartitions.add(new PartitionItemAndId<>(id, partitionItem));
150-
}
129+
SortedPartitionRanges<?> sortedPartitionRanges = SortedPartitionRanges.build(unsortedMap);
130+
if (sortedPartitionRanges == null) {
131+
return null;
151132
}
152-
153-
sortedRanges.sort((o1, o2) -> {
154-
Range<MultiColumnBound> span1 = o1.range;
155-
Range<MultiColumnBound> span2 = o2.range;
156-
int result = span1.lowerEndpoint().compareTo(span2.lowerEndpoint());
157-
if (result != 0) {
158-
return result;
159-
}
160-
result = span1.upperEndpoint().compareTo(span2.upperEndpoint());
161-
return result;
162-
});
163-
SortedPartitionRanges<?> sortedPartitionRanges = new SortedPartitionRanges(
164-
sortedRanges, defaultPartitions
165-
);
166133
PartitionCacheContext context = new PartitionCacheContext(
167134
table.getId(), table.getPartitionMetaVersion(scan), sortedPartitionRanges);
168135
partitionCaches.put(key, context);

fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@
3333
import org.apache.doris.common.util.Util;
3434
import org.apache.doris.datasource.ExternalSchemaCache.SchemaCacheKey;
3535
import org.apache.doris.datasource.mvcc.MvccSnapshot;
36+
import org.apache.doris.nereids.rules.expression.rules.SortedPartitionRanges;
37+
import org.apache.doris.nereids.trees.plans.algebra.CatalogRelation;
3638
import org.apache.doris.nereids.trees.plans.logical.LogicalFileScan.SelectedPartitions;
3739
import org.apache.doris.persist.gson.GsonPostProcessable;
3840
import org.apache.doris.persist.gson.GsonUtils;
@@ -453,6 +455,18 @@ public boolean supportInternalPartitionPruned() {
453455
return false;
454456
}
455457

458+
/**
459+
* Get sorted partition ranges for binary search filtering.
460+
* Subclasses can override this method to provide sorted partition ranges
461+
* for efficient partition pruning.
462+
*
463+
* @param scan the catalog relation
464+
* @return sorted partition ranges, or empty if not supported
465+
*/
466+
public Optional<SortedPartitionRanges<String>> getSortedPartitionRanges(CatalogRelation scan) {
467+
return Optional.empty();
468+
}
469+
456470
@Override
457471
public boolean equals(Object o) {
458472
if (this == o) {

fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@
5656
import org.apache.doris.mtmv.MTMVRelatedTableIf;
5757
import org.apache.doris.mtmv.MTMVSnapshotIf;
5858
import org.apache.doris.nereids.exceptions.NotSupportedException;
59+
import org.apache.doris.nereids.rules.expression.rules.SortedPartitionRanges;
60+
import org.apache.doris.nereids.trees.plans.algebra.CatalogRelation;
5961
import org.apache.doris.nereids.trees.plans.logical.LogicalFileScan.SelectedPartitions;
6062
import org.apache.doris.qe.GlobalVariable;
6163
import org.apache.doris.qe.SessionVariable;
@@ -388,6 +390,19 @@ public boolean supportInternalPartitionPruned() {
388390
return getDlaType() == DLAType.HIVE || getDlaType() == DLAType.HUDI;
389391
}
390392

393+
@Override
394+
public Optional<SortedPartitionRanges<String>> getSortedPartitionRanges(CatalogRelation scan) {
395+
if (getDlaType() != DLAType.HIVE) {
396+
return Optional.empty();
397+
}
398+
if (CollectionUtils.isEmpty(this.getPartitionColumns())) {
399+
return Optional.empty();
400+
}
401+
HiveMetaStoreCache.HivePartitionValues hivePartitionValues = getHivePartitionValues(
402+
MvccUtil.getSnapshotFromContext(this));
403+
return hivePartitionValues.getSortedPartitionRanges();
404+
}
405+
391406
public SelectedPartitions initHudiSelectedPartitions(Optional<TableSnapshot> tableSnapshot) {
392407
if (getDlaType() != DLAType.HUDI) {
393408
return SelectedPartitions.NOT_PRUNED;

fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java

Lines changed: 49 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,8 @@
5252
import org.apache.doris.metric.Metric;
5353
import org.apache.doris.metric.MetricLabel;
5454
import org.apache.doris.metric.MetricRepo;
55-
import org.apache.doris.planner.ColumnBound;
55+
import org.apache.doris.nereids.rules.expression.rules.SortedPartitionRanges;
5656
import org.apache.doris.planner.ListPartitionPrunerV2;
57-
import org.apache.doris.planner.PartitionPrunerV2Base.UniqueId;
5857

5958
import com.github.benmanes.caffeine.cache.CacheLoader;
6059
import com.github.benmanes.caffeine.cache.LoadingCache;
@@ -66,10 +65,7 @@
6665
import com.google.common.collect.Iterables;
6766
import com.google.common.collect.Lists;
6867
import com.google.common.collect.Maps;
69-
import com.google.common.collect.Range;
70-
import com.google.common.collect.RangeMap;
7168
import com.google.common.collect.Streams;
72-
import com.google.common.collect.TreeRangeMap;
7369
import lombok.Data;
7470
import org.apache.commons.lang3.math.NumberUtils;
7571
import org.apache.hadoop.fs.BlockLocation;
@@ -251,7 +247,6 @@ private HivePartitionValues loadPartitionValues(PartitionValueCacheKey key) {
251247
}
252248
Map<Long, PartitionItem> idToPartitionItem = Maps.newHashMapWithExpectedSize(partitionNames.size());
253249
BiMap<String, Long> partitionNameToIdMap = HashBiMap.create(partitionNames.size());
254-
Map<Long, List<UniqueId>> idToUniqueIdsMap = Maps.newHashMapWithExpectedSize(partitionNames.size());
255250
for (String partitionName : partitionNames) {
256251
long partitionId = Util.genIdByName(catalog.getName(), nameMapping.getLocalDbName(),
257252
nameMapping.getLocalTblName(), partitionName);
@@ -260,23 +255,8 @@ private HivePartitionValues loadPartitionValues(PartitionValueCacheKey key) {
260255
partitionNameToIdMap.put(partitionName, partitionId);
261256
}
262257

263-
Map<UniqueId, Range<PartitionKey>> uidToPartitionRange = null;
264-
Map<Range<PartitionKey>, UniqueId> rangeToId = null;
265-
RangeMap<ColumnBound, UniqueId> singleColumnRangeMap = null;
266-
Map<UniqueId, Range<ColumnBound>> singleUidToColumnRangeMap = null;
267-
if (key.types.size() > 1) {
268-
// uidToPartitionRange and rangeToId are only used for multi-column partition
269-
uidToPartitionRange = ListPartitionPrunerV2.genUidToPartitionRange(idToPartitionItem, idToUniqueIdsMap);
270-
rangeToId = ListPartitionPrunerV2.genRangeToId(uidToPartitionRange);
271-
} else {
272-
Preconditions.checkState(key.types.size() == 1, key.types);
273-
// singleColumnRangeMap is only used for single-column partition
274-
singleColumnRangeMap = ListPartitionPrunerV2.genSingleColumnRangeMap(idToPartitionItem, idToUniqueIdsMap);
275-
singleUidToColumnRangeMap = ListPartitionPrunerV2.genSingleUidToColumnRange(singleColumnRangeMap);
276-
}
277258
Map<Long, List<String>> partitionValuesMap = ListPartitionPrunerV2.getPartitionValuesMap(idToPartitionItem);
278-
return new HivePartitionValues(idToPartitionItem, uidToPartitionRange, rangeToId, singleColumnRangeMap,
279-
partitionNameToIdMap, idToUniqueIdsMap, singleUidToColumnRangeMap, partitionValuesMap);
259+
return new HivePartitionValues(idToPartitionItem, partitionNameToIdMap, partitionValuesMap);
280260
}
281261

282262
private ListPartitionItem toListPartitionItem(String partitionName, List<Type> types) {
@@ -661,7 +641,6 @@ public void addPartitionsCache(NameMapping nameMapping, List<String> partitionNa
661641
HivePartitionValues copy = partitionValues.copy();
662642
Map<Long, PartitionItem> idToPartitionItemBefore = copy.getIdToPartitionItem();
663643
Map<String, Long> partitionNameToIdMapBefore = copy.getPartitionNameToIdMap();
664-
Map<Long, List<UniqueId>> idToUniqueIdsMap = copy.getIdToUniqueIdsMap();
665644
Map<Long, PartitionItem> idToPartitionItem = new HashMap<>();
666645
String localDbName = nameMapping.getLocalDbName();
667646
String localTblName = nameMapping.getLocalTblName();
@@ -679,28 +658,8 @@ public void addPartitionsCache(NameMapping nameMapping, List<String> partitionNa
679658
Map<Long, List<String>> partitionValuesMapBefore = copy.getPartitionValuesMap();
680659
Map<Long, List<String>> partitionValuesMap = ListPartitionPrunerV2.getPartitionValuesMap(idToPartitionItem);
681660
partitionValuesMapBefore.putAll(partitionValuesMap);
682-
if (key.types.size() > 1) {
683-
Map<UniqueId, Range<PartitionKey>> uidToPartitionRangeBefore = copy.getUidToPartitionRange();
684-
// uidToPartitionRange and rangeToId are only used for multi-column partition
685-
Map<UniqueId, Range<PartitionKey>> uidToPartitionRange = ListPartitionPrunerV2
686-
.genUidToPartitionRange(idToPartitionItem, idToUniqueIdsMap);
687-
uidToPartitionRangeBefore.putAll(uidToPartitionRange);
688-
Map<Range<PartitionKey>, UniqueId> rangeToIdBefore = copy.getRangeToId();
689-
Map<Range<PartitionKey>, UniqueId> rangeToId = ListPartitionPrunerV2.genRangeToId(uidToPartitionRange);
690-
rangeToIdBefore.putAll(rangeToId);
691-
} else {
692-
Preconditions.checkState(key.types.size() == 1, key.types);
693-
// singleColumnRangeMap is only used for single-column partition
694-
RangeMap<ColumnBound, UniqueId> singleColumnRangeMapBefore = copy.getSingleColumnRangeMap();
695-
RangeMap<ColumnBound, UniqueId> singleColumnRangeMap = ListPartitionPrunerV2
696-
.genSingleColumnRangeMap(idToPartitionItem, idToUniqueIdsMap);
697-
singleColumnRangeMapBefore.putAll(singleColumnRangeMap);
698-
Map<UniqueId, Range<ColumnBound>> singleUidToColumnRangeMapBefore = copy
699-
.getSingleUidToColumnRangeMap();
700-
Map<UniqueId, Range<ColumnBound>> singleUidToColumnRangeMap = ListPartitionPrunerV2
701-
.genSingleUidToColumnRange(singleColumnRangeMap);
702-
singleUidToColumnRangeMapBefore.putAll(singleUidToColumnRangeMap);
703-
}
661+
// Rebuild sorted partition ranges after adding partitions
662+
copy.rebuildSortedPartitionRanges();
704663
HivePartitionValues partitionValuesCur = partitionValuesCache.getIfPresent(key);
705664
if (partitionValuesCur == partitionValues) {
706665
partitionValuesCache.put(key, copy);
@@ -718,11 +677,6 @@ public void dropPartitionsCache(ExternalTable dorisTable, List<String> partition
718677
HivePartitionValues copy = partitionValues.copy();
719678
Map<String, Long> partitionNameToIdMapBefore = copy.getPartitionNameToIdMap();
720679
Map<Long, PartitionItem> idToPartitionItemBefore = copy.getIdToPartitionItem();
721-
Map<Long, List<UniqueId>> idToUniqueIdsMapBefore = copy.getIdToUniqueIdsMap();
722-
Map<UniqueId, Range<PartitionKey>> uidToPartitionRangeBefore = copy.getUidToPartitionRange();
723-
Map<Range<PartitionKey>, UniqueId> rangeToIdBefore = copy.getRangeToId();
724-
RangeMap<ColumnBound, UniqueId> singleColumnRangeMapBefore = copy.getSingleColumnRangeMap();
725-
Map<UniqueId, Range<ColumnBound>> singleUidToColumnRangeMapBefore = copy.getSingleUidToColumnRangeMap();
726680
Map<Long, List<String>> partitionValuesMap = copy.getPartitionValuesMap();
727681
for (String partitionName : partitionNames) {
728682
if (!partitionNameToIdMapBefore.containsKey(partitionName)) {
@@ -733,27 +687,13 @@ public void dropPartitionsCache(ExternalTable dorisTable, List<String> partition
733687
Long partitionId = partitionNameToIdMapBefore.remove(partitionName);
734688
idToPartitionItemBefore.remove(partitionId);
735689
partitionValuesMap.remove(partitionId);
736-
List<UniqueId> uniqueIds = idToUniqueIdsMapBefore.remove(partitionId);
737-
for (UniqueId uniqueId : uniqueIds) {
738-
if (uidToPartitionRangeBefore != null) {
739-
Range<PartitionKey> range = uidToPartitionRangeBefore.remove(uniqueId);
740-
if (range != null) {
741-
rangeToIdBefore.remove(range);
742-
}
743-
}
744-
745-
if (singleUidToColumnRangeMapBefore != null) {
746-
Range<ColumnBound> range = singleUidToColumnRangeMapBefore.remove(uniqueId);
747-
if (range != null) {
748-
singleColumnRangeMapBefore.remove(range);
749-
}
750-
}
751-
}
752690

753691
if (invalidPartitionCache) {
754692
invalidatePartitionCache(dorisTable, partitionName);
755693
}
756694
}
695+
// Rebuild sorted partition ranges after dropping partitions
696+
copy.rebuildSortedPartitionRanges();
757697
HivePartitionValues partitionValuesCur = partitionValuesCache.getIfPresent(key);
758698
if (partitionValuesCur == partitionValues) {
759699
partitionValuesCache.put(key, copy);
@@ -933,7 +873,7 @@ public boolean equals(Object obj) {
933873
return dummyKey == ((FileCacheKey) obj).dummyKey;
934874
}
935875
return location.equals(((FileCacheKey) obj).location)
936-
&& Objects.equals(partitionValues, ((FileCacheKey) obj).partitionValues);
876+
&& Objects.equals(partitionValues, ((FileCacheKey) obj).partitionValues);
937877
}
938878

939879
boolean isSameTable(long id) {
@@ -1031,54 +971,69 @@ public static class HiveFileStatus {
1031971
@Data
1032972
public static class HivePartitionValues {
1033973
private BiMap<String, Long> partitionNameToIdMap;
1034-
private Map<Long, List<UniqueId>> idToUniqueIdsMap;
1035974
private Map<Long, PartitionItem> idToPartitionItem;
1036975
private Map<Long, List<String>> partitionValuesMap;
1037-
//multi pair
1038-
private Map<UniqueId, Range<PartitionKey>> uidToPartitionRange;
1039-
private Map<Range<PartitionKey>, UniqueId> rangeToId;
1040-
//single pair
1041-
private RangeMap<ColumnBound, UniqueId> singleColumnRangeMap;
1042-
private Map<UniqueId, Range<ColumnBound>> singleUidToColumnRangeMap;
976+
977+
// Sorted partition ranges for binary search filtering.
978+
// Built at construction time, shares the same lifecycle with HivePartitionValues.
979+
private SortedPartitionRanges<String> sortedPartitionRanges;
1043980

1044981
public HivePartitionValues() {
1045982
}
1046983

1047984
public HivePartitionValues(Map<Long, PartitionItem> idToPartitionItem,
1048-
Map<UniqueId, Range<PartitionKey>> uidToPartitionRange,
1049-
Map<Range<PartitionKey>, UniqueId> rangeToId,
1050-
RangeMap<ColumnBound, UniqueId> singleColumnRangeMap,
1051985
BiMap<String, Long> partitionNameToIdMap,
1052-
Map<Long, List<UniqueId>> idToUniqueIdsMap,
1053-
Map<UniqueId, Range<ColumnBound>> singleUidToColumnRangeMap,
1054986
Map<Long, List<String>> partitionValuesMap) {
1055987
this.idToPartitionItem = idToPartitionItem;
1056-
this.uidToPartitionRange = uidToPartitionRange;
1057-
this.rangeToId = rangeToId;
1058-
this.singleColumnRangeMap = singleColumnRangeMap;
1059988
this.partitionNameToIdMap = partitionNameToIdMap;
1060-
this.idToUniqueIdsMap = idToUniqueIdsMap;
1061-
this.singleUidToColumnRangeMap = singleUidToColumnRangeMap;
1062989
this.partitionValuesMap = partitionValuesMap;
990+
this.sortedPartitionRanges = buildSortedPartitionRanges();
1063991
}
1064992

993+
/**
994+
* Create a copy for incremental updates (add/drop partitions).
995+
* The sortedPartitionRanges will be rebuilt after the caller modifies the partition data.
996+
*/
1065997
public HivePartitionValues copy() {
1066998
HivePartitionValues copy = new HivePartitionValues();
1067999
copy.setPartitionNameToIdMap(partitionNameToIdMap == null ? null : HashBiMap.create(partitionNameToIdMap));
1068-
copy.setIdToUniqueIdsMap(idToUniqueIdsMap == null ? null : Maps.newHashMap(idToUniqueIdsMap));
10691000
copy.setIdToPartitionItem(idToPartitionItem == null ? null : Maps.newHashMap(idToPartitionItem));
10701001
copy.setPartitionValuesMap(partitionValuesMap == null ? null : Maps.newHashMap(partitionValuesMap));
1071-
copy.setUidToPartitionRange(uidToPartitionRange == null ? null : Maps.newHashMap(uidToPartitionRange));
1072-
copy.setRangeToId(rangeToId == null ? null : Maps.newHashMap(rangeToId));
1073-
copy.setSingleUidToColumnRangeMap(
1074-
singleUidToColumnRangeMap == null ? null : Maps.newHashMap(singleUidToColumnRangeMap));
1075-
if (singleColumnRangeMap != null) {
1076-
RangeMap<ColumnBound, UniqueId> copySingleColumnRangeMap = TreeRangeMap.create();
1077-
copySingleColumnRangeMap.putAll(singleColumnRangeMap);
1078-
copy.setSingleColumnRangeMap(copySingleColumnRangeMap);
1079-
}
1002+
// sortedPartitionRanges is not copied here, caller should call rebuildSortedPartitionRanges()
1003+
// after modifying partition data
10801004
return copy;
10811005
}
1006+
1007+
/**
1008+
* Rebuild sorted partition ranges after incremental updates.
1009+
* Should be called after add/drop partitions.
1010+
*/
1011+
public void rebuildSortedPartitionRanges() {
1012+
this.sortedPartitionRanges = buildSortedPartitionRanges();
1013+
}
1014+
1015+
/**
1016+
* Get sorted partition ranges for binary search filtering.
1017+
*/
1018+
public Optional<SortedPartitionRanges<String>> getSortedPartitionRanges() {
1019+
return Optional.ofNullable(sortedPartitionRanges);
1020+
}
1021+
1022+
private SortedPartitionRanges<String> buildSortedPartitionRanges() {
1023+
if (partitionNameToIdMap == null || partitionNameToIdMap.isEmpty()) {
1024+
return null;
1025+
}
1026+
1027+
// Build name to partition item map for SortedPartitionRanges.buildFrom
1028+
BiMap<Long, String> idToName = partitionNameToIdMap.inverse();
1029+
Map<String, PartitionItem> nameToPartitionItem = Maps.newHashMapWithExpectedSize(idToPartitionItem.size());
1030+
for (Map.Entry<Long, PartitionItem> entry : idToPartitionItem.entrySet()) {
1031+
String partitionName = idToName.get(entry.getKey());
1032+
nameToPartitionItem.put(partitionName, entry.getValue());
1033+
}
1034+
1035+
return SortedPartitionRanges.build(nameToPartitionItem);
1036+
}
10821037
}
10831038

10841039
/**

0 commit comments

Comments
 (0)