Skip to content

Commit 574f0f6

Browse files
authored
[Bug](catalog) fix runtime filter partition pruning error with binary type (#59564)
### What problem does this PR solve? Related PR: #53399 Problem Summary: the serializePartitionValue function will return String value. But the binary type use String with utf8 will be cause data corrupted, and it is not same with origin data.
1 parent 2855863 commit 574f0f6

File tree

8 files changed

+53
-33
lines changed

8 files changed

+53
-33
lines changed

docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run22.sql

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,4 +88,15 @@ USING iceberg
8888
TBLPROPERTIES(
8989
'write.format.default' = 'parquet',
9090
'format-version' = '1'
91-
);
91+
);
92+
93+
CREATE TABLE binary_partitioned_table (
94+
id BIGINT,
95+
name STRING,
96+
partition_bin BINARY
97+
)
98+
USING iceberg
99+
PARTITIONED BY (partition_bin);
100+
101+
insert into binary_partitioned_table values
102+
(1, 'a', X"0FF102FDFEFF");

fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,6 @@
117117
import java.io.IOException;
118118
import java.math.BigDecimal;
119119
import java.nio.ByteBuffer;
120-
import java.nio.charset.StandardCharsets;
121120
import java.time.DateTimeException;
122121
import java.time.Instant;
123122
import java.time.LocalDate;
@@ -686,16 +685,8 @@ private static String serializePartitionValue(org.apache.iceberg.types.Type type
686685
return null;
687686
}
688687
return value.toString();
689-
case FIXED:
690-
case BINARY:
691-
if (value == null) {
692-
return null;
693-
}
694-
// Fixed and binary types are stored as ByteBuffer
695-
ByteBuffer buffer = (ByteBuffer) value;
696-
byte[] res = new byte[buffer.limit()];
697-
buffer.get(res);
698-
return new String(res, StandardCharsets.UTF_8);
688+
// case binary, fixed should not supported, because if return string with utf8,
689+
// the data maybe be corrupted
699690
case DATE:
700691
if (value == null) {
701692
return null;

fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonUtil.java

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,6 @@
7979

8080
import java.io.FileNotFoundException;
8181
import java.io.IOException;
82-
import java.nio.charset.StandardCharsets;
8382
import java.time.DateTimeException;
8483
import java.time.LocalDate;
8584
import java.time.LocalTime;
@@ -456,12 +455,8 @@ private static String serializePartitionValue(org.apache.paimon.types.DataType t
456455
return null;
457456
}
458457
return value.toString();
459-
case BINARY:
460-
case VARBINARY:
461-
if (value == null) {
462-
return null;
463-
}
464-
return new String((byte[]) value, StandardCharsets.UTF_8);
458+
// case binary, varbinary should not supported, because if return string with utf8,
459+
// the data maybe be corrupted
465460
case DATE:
466461
if (value == null) {
467462
return null;

fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,6 @@
2525
import org.apache.doris.common.DdlException;
2626
import org.apache.doris.common.Pair;
2727
import org.apache.doris.common.util.MasterDaemon;
28-
import org.apache.doris.datasource.ExternalTable;
29-
import org.apache.doris.datasource.hive.HMSExternalTable;
3028
import org.apache.doris.datasource.iceberg.IcebergExternalTable;
3129
import org.apache.doris.info.TableNameInfo;
3230
import org.apache.doris.persist.TableStatsDeletionLog;
@@ -150,7 +148,7 @@ protected void processOneJob(TableIf table, Set<Pair<String, String>> columns, J
150148
if (StatisticsUtil.enablePartitionAnalyze() && table.isPartitionedTable()) {
151149
analysisMethod = AnalysisMethod.FULL;
152150
}
153-
if (table instanceof ExternalTable) { // External table only support full analyze now
151+
if (table instanceof IcebergExternalTable) { // IcebergExternalTable table only support full analyze now
154152
analysisMethod = AnalysisMethod.FULL;
155153
}
156154
boolean isSampleAnalyze = analysisMethod.equals(AnalysisMethod.SAMPLE);
@@ -234,9 +232,7 @@ protected boolean supportAutoAnalyze(TableIf tableIf) {
234232
if (tableIf == null) {
235233
return false;
236234
}
237-
return tableIf instanceof OlapTable || tableIf instanceof IcebergExternalTable
238-
|| tableIf instanceof HMSExternalTable
239-
&& ((HMSExternalTable) tableIf).getDlaType().equals(HMSExternalTable.DLAType.HIVE);
235+
return StatisticsUtil.supportAutoAnalyze(tableIf);
240236
}
241237

242238
protected AnalysisInfo createAnalyzeJobForTbl(TableIf table, Set<Pair<String, String>> jobColumns,

fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1131,22 +1131,45 @@ public static boolean needAnalyzeColumn(TableIf table, Pair<String, String> colu
11311131
// 3. Check partition
11321132
return needAnalyzePartition(olapTable, tableStatsStatus, columnStatsMeta);
11331133
} else {
1134-
if (!(table instanceof HMSExternalTable || (table instanceof IcebergExternalTable))) {
1134+
if (!StatisticsUtil.supportAutoAnalyze(table)) {
11351135
return false;
11361136
}
1137-
if (table instanceof HMSExternalTable) {
1138-
HMSExternalTable hmsTable = (HMSExternalTable) table;
1139-
if (!hmsTable.getDlaType().equals(DLAType.HIVE) && !hmsTable.getDlaType().equals(DLAType.ICEBERG)) {
1140-
return false;
1141-
}
1142-
}
11431137
// External is hard to calculate change rate, use time interval to control
11441138
// analyze frequency.
11451139
return System.currentTimeMillis()
11461140
- tableStatsStatus.lastAnalyzeTime > StatisticsUtil.getExternalTableAutoAnalyzeIntervalInMillis();
11471141
}
11481142
}
11491143

1144+
/**
1145+
* Check if the table supports auto analyze feature.
1146+
* @param table The table to check
1147+
* @return true if the table supports auto analyze, false otherwise
1148+
*/
1149+
public static boolean supportAutoAnalyze(TableIf table) {
1150+
if (table == null) {
1151+
return false;
1152+
}
1153+
1154+
// Support OLAP table
1155+
if (table instanceof OlapTable) {
1156+
return true;
1157+
}
1158+
1159+
// Support Iceberg table
1160+
if (table instanceof IcebergExternalTable) {
1161+
return true;
1162+
}
1163+
1164+
// Support HMS table (only HIVE and ICEBERG types)
1165+
if (table instanceof HMSExternalTable) {
1166+
HMSExternalTable hmsTable = (HMSExternalTable) table;
1167+
DLAType dlaType = hmsTable.getDlaType();
1168+
return dlaType.equals(DLAType.HIVE) || dlaType.equals(DLAType.ICEBERG);
1169+
}
1170+
return false;
1171+
}
1172+
11501173
public static boolean needAnalyzePartition(OlapTable table, TableStatsMeta tableStatsStatus,
11511174
ColStatsMeta columnStatsMeta) {
11521175
if (!StatisticsUtil.enablePartitionAnalyze() || !table.isPartitionedTable()) {

fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ public DLAType getDlaType() {
145145
HMSExternalCatalog hmsCatalog = new HMSExternalCatalog(0, "jdbc_ctl", null, Maps.newHashMap(), "");
146146
ExternalTable icebergExternalTable = new HMSExternalTable(1, "hmsTable", "hmsDb", hmsCatalog,
147147
hmsExternalDatabase);
148-
Assertions.assertFalse(collector.supportAutoAnalyze(icebergExternalTable));
148+
Assertions.assertTrue(collector.supportAutoAnalyze(icebergExternalTable));
149149

150150
new MockUp<HMSExternalTable>() {
151151
@Mock
Binary file not shown.

regression-test/suites/external_table_p0/iceberg/test_iceberg_varbinary.groovy

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,4 +157,8 @@ suite("test_iceberg_varbinary", "p0,external,doris,external_docker,external_dock
157157
qt_select22 """
158158
select multi_distinct_count(col2),multi_distinct_count(col1) from test_ice_uuid_parquet;
159159
"""
160+
161+
qt_select23 """
162+
select * from binary_partitioned_table where from_hex(partition_bin)="0FF102FDFEFF";
163+
"""
160164
}

0 commit comments

Comments
 (0)