diff --git a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run22.sql b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run22.sql index 4c0d5883089240..b92bfcf7fc6889 100644 --- a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run22.sql +++ b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run22.sql @@ -88,4 +88,15 @@ USING iceberg TBLPROPERTIES( 'write.format.default' = 'parquet', 'format-version' = '1' -); \ No newline at end of file +); + +CREATE TABLE binary_partitioned_table ( + id BIGINT, + name STRING, + partition_bin BINARY +) +USING iceberg +PARTITIONED BY (partition_bin); + +insert into binary_partitioned_table values +(1, 'a', X"0FF102FDFEFF"); \ No newline at end of file diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java index a7dd141357ce90..c381239bc0cc8c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java @@ -117,7 +117,6 @@ import java.io.IOException; import java.math.BigDecimal; import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; import java.time.DateTimeException; import java.time.Instant; import java.time.LocalDate; @@ -686,16 +685,8 @@ private static String serializePartitionValue(org.apache.iceberg.types.Type type return null; } return value.toString(); - case FIXED: - case BINARY: - if (value == null) { - return null; - } - // Fixed and binary types are stored as ByteBuffer - ByteBuffer buffer = (ByteBuffer) value; - byte[] res = new byte[buffer.limit()]; - buffer.get(res); - return new String(res, StandardCharsets.UTF_8); + // case binary, fixed should not supported, because if return string with utf8, + // the data maybe be corrupted case DATE: if (value == null) { return null; diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonUtil.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonUtil.java index 38ba98af038afe..730ba2ff810b45 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonUtil.java @@ -79,7 +79,6 @@ import java.io.FileNotFoundException; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.time.DateTimeException; import java.time.LocalDate; import java.time.LocalTime; @@ -456,12 +455,8 @@ private static String serializePartitionValue(org.apache.paimon.types.DataType t return null; } return value.toString(); - case BINARY: - case VARBINARY: - if (value == null) { - return null; - } - return new String((byte[]) value, StandardCharsets.UTF_8); + // case binary, varbinary should not supported, because if return string with utf8, + // the data maybe be corrupted case DATE: if (value == null) { return null; diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java index 40b1a7022c82c3..690f9392f3aeac 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java @@ -25,8 +25,6 @@ import org.apache.doris.common.DdlException; import org.apache.doris.common.Pair; import org.apache.doris.common.util.MasterDaemon; -import org.apache.doris.datasource.ExternalTable; -import org.apache.doris.datasource.hive.HMSExternalTable; import org.apache.doris.datasource.iceberg.IcebergExternalTable; import org.apache.doris.info.TableNameInfo; import org.apache.doris.persist.TableStatsDeletionLog; @@ -150,7 +148,7 @@ protected void processOneJob(TableIf table, Set> columns, J if (StatisticsUtil.enablePartitionAnalyze() && table.isPartitionedTable()) { analysisMethod = AnalysisMethod.FULL; } - if (table instanceof ExternalTable) { // External table only support full analyze now + if (table instanceof IcebergExternalTable) { // IcebergExternalTable table only support full analyze now analysisMethod = AnalysisMethod.FULL; } boolean isSampleAnalyze = analysisMethod.equals(AnalysisMethod.SAMPLE); @@ -234,9 +232,7 @@ protected boolean supportAutoAnalyze(TableIf tableIf) { if (tableIf == null) { return false; } - return tableIf instanceof OlapTable || tableIf instanceof IcebergExternalTable - || tableIf instanceof HMSExternalTable - && ((HMSExternalTable) tableIf).getDlaType().equals(HMSExternalTable.DLAType.HIVE); + return StatisticsUtil.supportAutoAnalyze(tableIf); } protected AnalysisInfo createAnalyzeJobForTbl(TableIf table, Set> jobColumns, diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java index 9cf4d34f7669a5..08920176534496 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java @@ -1131,15 +1131,9 @@ public static boolean needAnalyzeColumn(TableIf table, Pair colu // 3. Check partition return needAnalyzePartition(olapTable, tableStatsStatus, columnStatsMeta); } else { - if (!(table instanceof HMSExternalTable || (table instanceof IcebergExternalTable))) { + if (!StatisticsUtil.supportAutoAnalyze(table)) { return false; } - if (table instanceof HMSExternalTable) { - HMSExternalTable hmsTable = (HMSExternalTable) table; - if (!hmsTable.getDlaType().equals(DLAType.HIVE) && !hmsTable.getDlaType().equals(DLAType.ICEBERG)) { - return false; - } - } // External is hard to calculate change rate, use time interval to control // analyze frequency. return System.currentTimeMillis() @@ -1147,6 +1141,35 @@ public static boolean needAnalyzeColumn(TableIf table, Pair colu } } + /** + * Check if the table supports auto analyze feature. + * @param table The table to check + * @return true if the table supports auto analyze, false otherwise + */ + public static boolean supportAutoAnalyze(TableIf table) { + if (table == null) { + return false; + } + + // Support OLAP table + if (table instanceof OlapTable) { + return true; + } + + // Support Iceberg table + if (table instanceof IcebergExternalTable) { + return true; + } + + // Support HMS table (only HIVE and ICEBERG types) + if (table instanceof HMSExternalTable) { + HMSExternalTable hmsTable = (HMSExternalTable) table; + DLAType dlaType = hmsTable.getDlaType(); + return dlaType.equals(DLAType.HIVE) || dlaType.equals(DLAType.ICEBERG); + } + return false; + } + public static boolean needAnalyzePartition(OlapTable table, TableStatsMeta tableStatsStatus, ColStatsMeta columnStatsMeta) { if (!StatisticsUtil.enablePartitionAnalyze() || !table.isPartitionedTable()) { diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java index 316ad8e364f0c9..c80982577cf132 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java @@ -145,7 +145,7 @@ public DLAType getDlaType() { HMSExternalCatalog hmsCatalog = new HMSExternalCatalog(0, "jdbc_ctl", null, Maps.newHashMap(), ""); ExternalTable icebergExternalTable = new HMSExternalTable(1, "hmsTable", "hmsDb", hmsCatalog, hmsExternalDatabase); - Assertions.assertFalse(collector.supportAutoAnalyze(icebergExternalTable)); + Assertions.assertTrue(collector.supportAutoAnalyze(icebergExternalTable)); new MockUp() { @Mock diff --git a/regression-test/data/external_table_p0/iceberg/test_iceberg_varbinary.out b/regression-test/data/external_table_p0/iceberg/test_iceberg_varbinary.out index 5845c71a460b70..3d224775383640 100644 Binary files a/regression-test/data/external_table_p0/iceberg/test_iceberg_varbinary.out and b/regression-test/data/external_table_p0/iceberg/test_iceberg_varbinary.out differ diff --git a/regression-test/suites/external_table_p0/iceberg/test_iceberg_varbinary.groovy b/regression-test/suites/external_table_p0/iceberg/test_iceberg_varbinary.groovy index 9db7b5fb9348e6..62d3d367f80582 100644 --- a/regression-test/suites/external_table_p0/iceberg/test_iceberg_varbinary.groovy +++ b/regression-test/suites/external_table_p0/iceberg/test_iceberg_varbinary.groovy @@ -157,4 +157,8 @@ suite("test_iceberg_varbinary", "p0,external,doris,external_docker,external_dock qt_select22 """ select multi_distinct_count(col2),multi_distinct_count(col1) from test_ice_uuid_parquet; """ + + qt_select23 """ + select * from binary_partitioned_table where from_hex(partition_bin)="0FF102FDFEFF"; + """ }