diff --git a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run22.sql b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run22.sql index 4c0d5883089240..b92bfcf7fc6889 100644 --- a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run22.sql +++ b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run22.sql @@ -88,4 +88,15 @@ USING iceberg TBLPROPERTIES( 'write.format.default' = 'parquet', 'format-version' = '1' -); \ No newline at end of file +); + +CREATE TABLE binary_partitioned_table ( + id BIGINT, + name STRING, + partition_bin BINARY +) +USING iceberg +PARTITIONED BY (partition_bin); + +insert into binary_partitioned_table values +(1, 'a', X"0FF102FDFEFF"); \ No newline at end of file diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java index a7dd141357ce90..c381239bc0cc8c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java @@ -117,7 +117,6 @@ import java.io.IOException; import java.math.BigDecimal; import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; import java.time.DateTimeException; import java.time.Instant; import java.time.LocalDate; @@ -686,16 +685,8 @@ private static String serializePartitionValue(org.apache.iceberg.types.Type type return null; } return value.toString(); - case FIXED: - case BINARY: - if (value == null) { - return null; - } - // Fixed and binary types are stored as ByteBuffer - ByteBuffer buffer = (ByteBuffer) value; - byte[] res = new byte[buffer.limit()]; - buffer.get(res); - return new String(res, StandardCharsets.UTF_8); + // case binary, fixed should not supported, because if return string with utf8, + // the data maybe be corrupted case DATE: if (value == null) { return null; diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonUtil.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonUtil.java index 38ba98af038afe..730ba2ff810b45 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonUtil.java @@ -79,7 +79,6 @@ import java.io.FileNotFoundException; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.time.DateTimeException; import java.time.LocalDate; import java.time.LocalTime; @@ -456,12 +455,8 @@ private static String serializePartitionValue(org.apache.paimon.types.DataType t return null; } return value.toString(); - case BINARY: - case VARBINARY: - if (value == null) { - return null; - } - return new String((byte[]) value, StandardCharsets.UTF_8); + // case binary, varbinary should not supported, because if return string with utf8, + // the data maybe be corrupted case DATE: if (value == null) { return null; diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index 08c25db6212ec6..26bc81b985cfc7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -595,8 +595,7 @@ protected List getPendingJobs(Map>> entry : jobMap.entrySet()) { TableName table = entry.getKey(); if (tblName == null - || tblName.getCtl() == null && tblName.getDb() == null && tblName.getTbl() == null - || tblName.equals(table)) { + || matchesFilter(tblName, table)) { result.add(new AutoAnalysisPendingJob(table.getCtl(), table.getDb(), table.getTbl(), entry.getValue(), priority)); } @@ -605,6 +604,29 @@ protected List getPendingJobs(Map findAnalysisJobs(String state, String ctl, String db, String table, long jobId, boolean isAuto) { TableIf tbl = null; diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java index 8cd96bc4957966..2ce0f05229b22f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java @@ -26,7 +26,7 @@ import org.apache.doris.common.DdlException; import org.apache.doris.common.Pair; import org.apache.doris.common.util.MasterDaemon; -import org.apache.doris.datasource.hive.HMSExternalTable; +import org.apache.doris.datasource.iceberg.IcebergExternalTable; import org.apache.doris.persist.TableStatsDeletionLog; import org.apache.doris.statistics.AnalysisInfo.AnalysisMethod; import org.apache.doris.statistics.AnalysisInfo.JobType; @@ -146,6 +146,9 @@ protected void processOneJob(TableIf table, Set> columns, J if (StatisticsUtil.enablePartitionAnalyze() && table.isPartitionedTable()) { analysisMethod = AnalysisMethod.FULL; } + if (table instanceof IcebergExternalTable) { // IcebergExternalTable table only support full analyze now + analysisMethod = AnalysisMethod.FULL; + } boolean isSampleAnalyze = analysisMethod.equals(AnalysisMethod.SAMPLE); OlapTable olapTable = table instanceof OlapTable ? (OlapTable) table : null; AnalysisManager manager = Env.getServingEnv().getAnalysisManager(); @@ -227,9 +230,7 @@ protected boolean supportAutoAnalyze(TableIf tableIf) { if (tableIf == null) { return false; } - return tableIf instanceof OlapTable - || tableIf instanceof HMSExternalTable - && ((HMSExternalTable) tableIf).getDlaType().equals(HMSExternalTable.DLAType.HIVE); + return StatisticsUtil.supportAutoAnalyze(tableIf); } protected AnalysisInfo createAnalyzeJobForTbl(TableIf table, Set> jobColumns, diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java index 9e5fbfa4d4d59b..e28191ed075425 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java @@ -56,6 +56,7 @@ import org.apache.doris.datasource.InternalCatalog; import org.apache.doris.datasource.hive.HMSExternalTable; import org.apache.doris.datasource.hive.HMSExternalTable.DLAType; +import org.apache.doris.datasource.iceberg.IcebergExternalTable; import org.apache.doris.nereids.trees.expressions.literal.DateTimeLiteral; import org.apache.doris.nereids.trees.expressions.literal.IPv4Literal; import org.apache.doris.nereids.trees.expressions.literal.IPv6Literal; @@ -1148,20 +1149,45 @@ public static boolean needAnalyzeColumn(TableIf table, Pair colu // 3. Check partition return needAnalyzePartition(olapTable, tableStatsStatus, columnStatsMeta); } else { - // Now, we only support Hive external table auto analyze. - if (!(table instanceof HMSExternalTable)) { + if (!StatisticsUtil.supportAutoAnalyze(table)) { return false; } - HMSExternalTable hmsTable = (HMSExternalTable) table; - if (!hmsTable.getDlaType().equals(DLAType.HIVE)) { - return false; - } - // External is hard to calculate change rate, use time interval to control analyze frequency. + // External is hard to calculate change rate, use time interval to control + // analyze frequency. return System.currentTimeMillis() - tableStatsStatus.lastAnalyzeTime > StatisticsUtil.getExternalTableAutoAnalyzeIntervalInMillis(); } } + /** + * Check if the table supports auto analyze feature. + * @param table The table to check + * @return true if the table supports auto analyze, false otherwise + */ + public static boolean supportAutoAnalyze(TableIf table) { + if (table == null) { + return false; + } + + // Support OLAP table + if (table instanceof OlapTable) { + return true; + } + + // Support Iceberg table + if (table instanceof IcebergExternalTable) { + return true; + } + + // Support HMS table (only HIVE and ICEBERG types) + if (table instanceof HMSExternalTable) { + HMSExternalTable hmsTable = (HMSExternalTable) table; + DLAType dlaType = hmsTable.getDlaType(); + return dlaType.equals(DLAType.HIVE) || dlaType.equals(DLAType.ICEBERG); + } + return false; + } + public static boolean needAnalyzePartition(OlapTable table, TableStatsMeta tableStatsStatus, ColStatsMeta columnStatsMeta) { if (!StatisticsUtil.enablePartitionAnalyze() || !table.isPartitionedTable()) { diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java index 625fd904809339..9e7e3a627ab79f 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java @@ -145,7 +145,7 @@ public DLAType getDlaType() { HMSExternalCatalog hmsCatalog = new HMSExternalCatalog(0, "jdbc_ctl", null, Maps.newHashMap(), ""); ExternalTable icebergExternalTable = new HMSExternalTable(1, "hmsTable", "hmsDb", hmsCatalog, hmsExternalDatabase); - Assertions.assertFalse(collector.supportAutoAnalyze(icebergExternalTable)); + Assertions.assertTrue(collector.supportAutoAnalyze(icebergExternalTable)); new MockUp() { @Mock diff --git a/regression-test/data/external_table_p0/iceberg/test_iceberg_varbinary.out b/regression-test/data/external_table_p0/iceberg/test_iceberg_varbinary.out index 5845c71a460b70..3d224775383640 100644 Binary files a/regression-test/data/external_table_p0/iceberg/test_iceberg_varbinary.out and b/regression-test/data/external_table_p0/iceberg/test_iceberg_varbinary.out differ diff --git a/regression-test/suites/external_table_p0/iceberg/test_iceberg_varbinary.groovy b/regression-test/suites/external_table_p0/iceberg/test_iceberg_varbinary.groovy index 9db7b5fb9348e6..62d3d367f80582 100644 --- a/regression-test/suites/external_table_p0/iceberg/test_iceberg_varbinary.groovy +++ b/regression-test/suites/external_table_p0/iceberg/test_iceberg_varbinary.groovy @@ -157,4 +157,8 @@ suite("test_iceberg_varbinary", "p0,external,doris,external_docker,external_dock qt_select22 """ select multi_distinct_count(col2),multi_distinct_count(col1) from test_ice_uuid_parquet; """ + + qt_select23 """ + select * from binary_partitioned_table where from_hex(partition_bin)="0FF102FDFEFF"; + """ }