Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -88,4 +88,15 @@ USING iceberg
TBLPROPERTIES(
'write.format.default' = 'parquet',
'format-version' = '1'
);
);

CREATE TABLE binary_partitioned_table (
id BIGINT,
name STRING,
partition_bin BINARY
)
USING iceberg
PARTITIONED BY (partition_bin);

insert into binary_partitioned_table values
(1, 'a', X"0FF102FDFEFF");
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@
import java.io.IOException;
import java.math.BigDecimal;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.time.DateTimeException;
import java.time.Instant;
import java.time.LocalDate;
Expand Down Expand Up @@ -686,16 +685,8 @@ private static String serializePartitionValue(org.apache.iceberg.types.Type type
return null;
}
return value.toString();
case FIXED:
case BINARY:
if (value == null) {
return null;
}
// Fixed and binary types are stored as ByteBuffer
ByteBuffer buffer = (ByteBuffer) value;
byte[] res = new byte[buffer.limit()];
buffer.get(res);
return new String(res, StandardCharsets.UTF_8);
// case binary, fixed should not supported, because if return string with utf8,
// the data maybe be corrupted
case DATE:
if (value == null) {
return null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@

import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.time.DateTimeException;
import java.time.LocalDate;
import java.time.LocalTime;
Expand Down Expand Up @@ -456,12 +455,8 @@ private static String serializePartitionValue(org.apache.paimon.types.DataType t
return null;
}
return value.toString();
case BINARY:
case VARBINARY:
if (value == null) {
return null;
}
return new String((byte[]) value, StandardCharsets.UTF_8);
// case binary, varbinary should not supported, because if return string with utf8,
// the data maybe be corrupted
case DATE:
if (value == null) {
return null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -595,8 +595,7 @@ protected List<AutoAnalysisPendingJob> getPendingJobs(Map<TableName, Set<Pair<St
for (Entry<TableName, Set<Pair<String, String>>> entry : jobMap.entrySet()) {
TableName table = entry.getKey();
if (tblName == null
|| tblName.getCtl() == null && tblName.getDb() == null && tblName.getTbl() == null
|| tblName.equals(table)) {
|| matchesFilter(tblName, table)) {
result.add(new AutoAnalysisPendingJob(table.getCtl(),
table.getDb(), table.getTbl(), entry.getValue(), priority));
}
Expand All @@ -605,6 +604,29 @@ protected List<AutoAnalysisPendingJob> getPendingJobs(Map<TableName, Set<Pair<St
return result;
}

private boolean matchesFilter(TableName filter, TableName target) {
if (StringUtils.isEmpty(filter.getCtl())
&& StringUtils.isEmpty(filter.getDb())
&& StringUtils.isEmpty(filter.getTbl())) {
return true;
}

if (!StringUtils.isEmpty(filter.getCtl())
&& !filter.getCtl().equals(target.getCtl())) {
return false;
}
if (!StringUtils.isEmpty(filter.getDb())
&& !filter.getDb().equals(target.getDb())) {
return false;
}
if (!StringUtils.isEmpty(filter.getTbl())
&& !filter.getTbl().equals(target.getTbl())) {
return false;
}

return true;
}

public List<AnalysisInfo> findAnalysisJobs(String state, String ctl, String db,
String table, long jobId, boolean isAuto) {
TableIf tbl = null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import org.apache.doris.common.DdlException;
import org.apache.doris.common.Pair;
import org.apache.doris.common.util.MasterDaemon;
import org.apache.doris.datasource.hive.HMSExternalTable;
import org.apache.doris.datasource.iceberg.IcebergExternalTable;
import org.apache.doris.persist.TableStatsDeletionLog;
import org.apache.doris.statistics.AnalysisInfo.AnalysisMethod;
import org.apache.doris.statistics.AnalysisInfo.JobType;
Expand Down Expand Up @@ -146,6 +146,9 @@ protected void processOneJob(TableIf table, Set<Pair<String, String>> columns, J
if (StatisticsUtil.enablePartitionAnalyze() && table.isPartitionedTable()) {
analysisMethod = AnalysisMethod.FULL;
}
if (table instanceof IcebergExternalTable) { // IcebergExternalTable table only support full analyze now
analysisMethod = AnalysisMethod.FULL;
}
boolean isSampleAnalyze = analysisMethod.equals(AnalysisMethod.SAMPLE);
OlapTable olapTable = table instanceof OlapTable ? (OlapTable) table : null;
AnalysisManager manager = Env.getServingEnv().getAnalysisManager();
Expand Down Expand Up @@ -227,9 +230,7 @@ protected boolean supportAutoAnalyze(TableIf tableIf) {
if (tableIf == null) {
return false;
}
return tableIf instanceof OlapTable
|| tableIf instanceof HMSExternalTable
&& ((HMSExternalTable) tableIf).getDlaType().equals(HMSExternalTable.DLAType.HIVE);
return StatisticsUtil.supportAutoAnalyze(tableIf);
}

protected AnalysisInfo createAnalyzeJobForTbl(TableIf table, Set<Pair<String, String>> jobColumns,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
import org.apache.doris.datasource.InternalCatalog;
import org.apache.doris.datasource.hive.HMSExternalTable;
import org.apache.doris.datasource.hive.HMSExternalTable.DLAType;
import org.apache.doris.datasource.iceberg.IcebergExternalTable;
import org.apache.doris.nereids.trees.expressions.literal.DateTimeLiteral;
import org.apache.doris.nereids.trees.expressions.literal.IPv4Literal;
import org.apache.doris.nereids.trees.expressions.literal.IPv6Literal;
Expand Down Expand Up @@ -1148,20 +1149,45 @@ public static boolean needAnalyzeColumn(TableIf table, Pair<String, String> colu
// 3. Check partition
return needAnalyzePartition(olapTable, tableStatsStatus, columnStatsMeta);
} else {
// Now, we only support Hive external table auto analyze.
if (!(table instanceof HMSExternalTable)) {
if (!StatisticsUtil.supportAutoAnalyze(table)) {
return false;
}
HMSExternalTable hmsTable = (HMSExternalTable) table;
if (!hmsTable.getDlaType().equals(DLAType.HIVE)) {
return false;
}
// External is hard to calculate change rate, use time interval to control analyze frequency.
// External is hard to calculate change rate, use time interval to control
// analyze frequency.
return System.currentTimeMillis()
- tableStatsStatus.lastAnalyzeTime > StatisticsUtil.getExternalTableAutoAnalyzeIntervalInMillis();
}
}

/**
* Check if the table supports auto analyze feature.
* @param table The table to check
* @return true if the table supports auto analyze, false otherwise
*/
public static boolean supportAutoAnalyze(TableIf table) {
if (table == null) {
return false;
}

// Support OLAP table
if (table instanceof OlapTable) {
return true;
}

// Support Iceberg table
if (table instanceof IcebergExternalTable) {
return true;
}

// Support HMS table (only HIVE and ICEBERG types)
if (table instanceof HMSExternalTable) {
HMSExternalTable hmsTable = (HMSExternalTable) table;
DLAType dlaType = hmsTable.getDlaType();
return dlaType.equals(DLAType.HIVE) || dlaType.equals(DLAType.ICEBERG);
}
return false;
}

public static boolean needAnalyzePartition(OlapTable table, TableStatsMeta tableStatsStatus,
ColStatsMeta columnStatsMeta) {
if (!StatisticsUtil.enablePartitionAnalyze() || !table.isPartitionedTable()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ public DLAType getDlaType() {
HMSExternalCatalog hmsCatalog = new HMSExternalCatalog(0, "jdbc_ctl", null, Maps.newHashMap(), "");
ExternalTable icebergExternalTable = new HMSExternalTable(1, "hmsTable", "hmsDb", hmsCatalog,
hmsExternalDatabase);
Assertions.assertFalse(collector.supportAutoAnalyze(icebergExternalTable));
Assertions.assertTrue(collector.supportAutoAnalyze(icebergExternalTable));

new MockUp<HMSExternalTable>() {
@Mock
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -157,4 +157,8 @@ suite("test_iceberg_varbinary", "p0,external,doris,external_docker,external_dock
qt_select22 """
select multi_distinct_count(col2),multi_distinct_count(col1) from test_ice_uuid_parquet;
"""

qt_select23 """
select * from binary_partitioned_table where from_hex(partition_bin)="0FF102FDFEFF";
"""
}
Loading