diff --git a/docs/static/rest-catalog-open-api.yaml b/docs/static/rest-catalog-open-api.yaml index fcf5c53d11a0..0aa17e2b09f6 100644 --- a/docs/static/rest-catalog-open-api.yaml +++ b/docs/static/rest-catalog-open-api.yaml @@ -3252,6 +3252,9 @@ components: lastFileCreationTime: type: integer format: int64 + totalBuckets: + type: integer + format: int32 done: type: boolean createdAt: @@ -3285,6 +3288,9 @@ components: lastFileCreationTime: type: integer format: int64 + totalBuckets: + type: integer + format: int32 ####################################### # Examples of different values # ####################################### diff --git a/paimon-api/src/main/java/org/apache/paimon/partition/Partition.java b/paimon-api/src/main/java/org/apache/paimon/partition/Partition.java index 427b61464f4d..c9fa4e099752 100644 --- a/paimon-api/src/main/java/org/apache/paimon/partition/Partition.java +++ b/paimon-api/src/main/java/org/apache/paimon/partition/Partition.java @@ -81,13 +81,14 @@ public Partition( @JsonProperty(FIELD_FILE_SIZE_IN_BYTES) long fileSizeInBytes, @JsonProperty(FIELD_FILE_COUNT) long fileCount, @JsonProperty(FIELD_LAST_FILE_CREATION_TIME) long lastFileCreationTime, + @JsonProperty(FIELD_TOTAL_BUCKETS) int totalBuckets, @JsonProperty(FIELD_DONE) boolean done, @JsonProperty(FIELD_CREATED_AT) @Nullable Long createdAt, @JsonProperty(FIELD_CREATED_BY) @Nullable String createdBy, @JsonProperty(FIELD_UPDATED_AT) @Nullable Long updatedAt, @JsonProperty(FIELD_UPDATED_BY) @Nullable String updatedBy, @JsonProperty(FIELD_OPTIONS) @Nullable Map options) { - super(spec, recordCount, fileSizeInBytes, fileCount, lastFileCreationTime); + super(spec, recordCount, fileSizeInBytes, fileCount, lastFileCreationTime, totalBuckets); this.done = done; this.createdAt = createdAt; this.createdBy = createdBy; @@ -102,6 +103,7 @@ public Partition( long fileSizeInBytes, long fileCount, long lastFileCreationTime, + int totalBuckets, boolean done) { this( spec, @@ -109,6 +111,7 @@ public Partition( fileSizeInBytes, fileCount, lastFileCreationTime, + totalBuckets, done, null, null, @@ -188,6 +191,8 @@ public String toString() { + fileCount + ", lastFileCreationTime=" + lastFileCreationTime + + ", totalBuckets=" + + totalBuckets + ", done=" + done + ", createdAt=" diff --git a/paimon-api/src/main/java/org/apache/paimon/partition/PartitionStatistics.java b/paimon-api/src/main/java/org/apache/paimon/partition/PartitionStatistics.java index 44b61c19d5f5..3c322b5a39c6 100644 --- a/paimon-api/src/main/java/org/apache/paimon/partition/PartitionStatistics.java +++ b/paimon-api/src/main/java/org/apache/paimon/partition/PartitionStatistics.java @@ -44,6 +44,7 @@ public class PartitionStatistics implements Serializable { public static final String FIELD_FILE_SIZE_IN_BYTES = "fileSizeInBytes"; public static final String FIELD_FILE_COUNT = "fileCount"; public static final String FIELD_LAST_FILE_CREATION_TIME = "lastFileCreationTime"; + public static final String FIELD_TOTAL_BUCKETS = "totalBuckets"; @JsonProperty(FIELD_SPEC) protected final Map spec; @@ -60,18 +61,23 @@ public class PartitionStatistics implements Serializable { @JsonProperty(FIELD_LAST_FILE_CREATION_TIME) protected final long lastFileCreationTime; + @JsonProperty(FIELD_TOTAL_BUCKETS) + protected final int totalBuckets; + @JsonCreator public PartitionStatistics( @JsonProperty(FIELD_SPEC) Map spec, @JsonProperty(FIELD_RECORD_COUNT) long recordCount, @JsonProperty(FIELD_FILE_SIZE_IN_BYTES) long fileSizeInBytes, @JsonProperty(FIELD_FILE_COUNT) long fileCount, - @JsonProperty(FIELD_LAST_FILE_CREATION_TIME) long lastFileCreationTime) { + @JsonProperty(FIELD_LAST_FILE_CREATION_TIME) long lastFileCreationTime, + @JsonProperty(FIELD_TOTAL_BUCKETS) int totalBuckets) { this.spec = spec; this.recordCount = recordCount; this.fileSizeInBytes = fileSizeInBytes; this.fileCount = fileCount; this.lastFileCreationTime = lastFileCreationTime; + this.totalBuckets = totalBuckets; } @JsonGetter(FIELD_SPEC) @@ -99,6 +105,11 @@ public long lastFileCreationTime() { return lastFileCreationTime; } + @JsonGetter(FIELD_TOTAL_BUCKETS) + public int totalBuckets() { + return totalBuckets; + } + @Override public boolean equals(Object o) { if (this == o) { @@ -112,12 +123,14 @@ public boolean equals(Object o) { && fileSizeInBytes == that.fileSizeInBytes && fileCount == that.fileCount && lastFileCreationTime == that.lastFileCreationTime + && totalBuckets == that.totalBuckets && Objects.equals(spec, that.spec); } @Override public int hashCode() { - return Objects.hash(spec, recordCount, fileSizeInBytes, fileCount, lastFileCreationTime); + return Objects.hash( + spec, recordCount, fileSizeInBytes, fileCount, lastFileCreationTime, totalBuckets); } @Override @@ -133,6 +146,8 @@ public String toString() { + fileCount + ", lastFileCreationTime=" + lastFileCreationTime + + ", totalBuckets=" + + totalBuckets + '}'; } } diff --git a/paimon-api/src/test/java/org/apache/paimon/partition/PartitionTest.java b/paimon-api/src/test/java/org/apache/paimon/partition/PartitionTest.java index 7ec342d38deb..b589a75cf1b6 100644 --- a/paimon-api/src/test/java/org/apache/paimon/partition/PartitionTest.java +++ b/paimon-api/src/test/java/org/apache/paimon/partition/PartitionTest.java @@ -40,6 +40,7 @@ void testJsonSerializationWithNullValues() { 1024L, // fileSizeInBytes 2L, // fileCount System.currentTimeMillis(), // lastFileCreationTime + 10, // totalBuckets false, // done null, // createdAt null, // createdBy @@ -57,6 +58,7 @@ void testJsonSerializationWithNullValues() { assertThat(json).contains("done"); assertThat(json).contains("recordCount"); + assertThat(json).contains("totalBuckets"); } @Test @@ -69,6 +71,7 @@ void testJsonSerializationWithNonNullValues() { 1024L, 2L, System.currentTimeMillis(), + 10, // totalBuckets true, 1234567890L, // createdAt "user1", // createdBy @@ -78,6 +81,7 @@ void testJsonSerializationWithNonNullValues() { String json = JsonSerdeUtil.toFlatJson(partition); + assertThat(json).contains("totalBuckets"); assertThat(json).contains("createdAt"); assertThat(json).contains("createdBy"); assertThat(json).contains("updatedAt"); diff --git a/paimon-core/src/main/java/org/apache/paimon/catalog/Catalog.java b/paimon-core/src/main/java/org/apache/paimon/catalog/Catalog.java index 74e35dde3dac..f5118fb32c5a 100644 --- a/paimon-core/src/main/java/org/apache/paimon/catalog/Catalog.java +++ b/paimon-core/src/main/java/org/apache/paimon/catalog/Catalog.java @@ -1073,6 +1073,7 @@ List authTableQuery(Identifier identifier, @Nullable List select String NUM_FILES_PROP = "numFiles"; String TOTAL_SIZE_PROP = "totalSize"; String LAST_UPDATE_TIME_PROP = "lastUpdateTime"; + String TOTAL_BUCKETS = "totalBuckets"; // ======================= Exceptions =============================== diff --git a/paimon-core/src/main/java/org/apache/paimon/manifest/PartitionEntry.java b/paimon-core/src/main/java/org/apache/paimon/manifest/PartitionEntry.java index d3417bb71c33..aaee93602a9e 100644 --- a/paimon-core/src/main/java/org/apache/paimon/manifest/PartitionEntry.java +++ b/paimon-core/src/main/java/org/apache/paimon/manifest/PartitionEntry.java @@ -30,6 +30,7 @@ import java.util.HashMap; import java.util.Map; import java.util.Objects; +import java.util.Optional; import static org.apache.paimon.manifest.FileKind.ADD; import static org.apache.paimon.manifest.FileKind.DELETE; @@ -43,18 +44,21 @@ public class PartitionEntry { private final long fileSizeInBytes; private final long fileCount; private final long lastFileCreationTime; + private final int totalBuckets; public PartitionEntry( BinaryRow partition, long recordCount, long fileSizeInBytes, long fileCount, - long lastFileCreationTime) { + long lastFileCreationTime, + int totalBuckets) { this.partition = partition; this.recordCount = recordCount; this.fileSizeInBytes = fileSizeInBytes; this.fileCount = fileCount; this.lastFileCreationTime = lastFileCreationTime; + this.totalBuckets = totalBuckets; } public BinaryRow partition() { @@ -77,13 +81,18 @@ public long lastFileCreationTime() { return lastFileCreationTime; } + public int totalBuckets() { + return totalBuckets; + } + public PartitionEntry merge(PartitionEntry entry) { return new PartitionEntry( partition, recordCount + entry.recordCount, fileSizeInBytes + entry.fileSizeInBytes, fileCount + entry.fileCount, - Math.max(lastFileCreationTime, entry.lastFileCreationTime)); + Math.max(lastFileCreationTime, entry.lastFileCreationTime), + entry.totalBuckets); } public Partition toPartition(InternalRowPartitionComputer computer) { @@ -93,6 +102,7 @@ public Partition toPartition(InternalRowPartitionComputer computer) { fileSizeInBytes, fileCount, lastFileCreationTime, + totalBuckets, false); } @@ -102,15 +112,16 @@ public PartitionStatistics toPartitionStatistics(InternalRowPartitionComputer co recordCount, fileSizeInBytes, fileCount, - lastFileCreationTime); + lastFileCreationTime, + totalBuckets); } public static PartitionEntry fromManifestEntry(ManifestEntry entry) { - return fromDataFile(entry.partition(), entry.kind(), entry.file()); + return fromDataFile(entry.partition(), entry.kind(), entry.file(), entry.totalBuckets()); } public static PartitionEntry fromDataFile( - BinaryRow partition, FileKind kind, DataFileMeta file) { + BinaryRow partition, FileKind kind, DataFileMeta file, int totalBuckets) { long recordCount = file.rowCount(); long fileSizeInBytes = file.fileSize(); long fileCount = 1; @@ -120,7 +131,12 @@ public static PartitionEntry fromDataFile( fileCount = -fileCount; } return new PartitionEntry( - partition, recordCount, fileSizeInBytes, fileCount, file.creationTimeEpochMillis()); + partition, + recordCount, + fileSizeInBytes, + fileCount, + file.creationTimeEpochMillis(), + totalBuckets); } public static Collection merge(Collection fileEntries) { @@ -139,7 +155,12 @@ public static Collection mergeSplits(Collection split for (DataSplit split : splits) { BinaryRow partition = split.partition(); for (DataFileMeta file : split.dataFiles()) { - PartitionEntry partitionEntry = fromDataFile(partition, ADD, file); + PartitionEntry partitionEntry = + fromDataFile( + partition, + ADD, + file, + Optional.ofNullable(split.totalBuckets()).orElse(0)); partitions.compute( partition, (part, old) -> old == null ? partitionEntry : old.merge(partitionEntry)); @@ -170,12 +191,18 @@ public boolean equals(Object o) { && fileSizeInBytes == that.fileSizeInBytes && fileCount == that.fileCount && lastFileCreationTime == that.lastFileCreationTime + && totalBuckets == that.totalBuckets && Objects.equals(partition, that.partition); } @Override public int hashCode() { return Objects.hash( - partition, recordCount, fileSizeInBytes, fileCount, lastFileCreationTime); + partition, + recordCount, + fileSizeInBytes, + fileCount, + lastFileCreationTime, + totalBuckets); } } diff --git a/paimon-core/src/main/java/org/apache/paimon/table/format/FormatTableScan.java b/paimon-core/src/main/java/org/apache/paimon/table/format/FormatTableScan.java index 20cbb9b31d50..91040a19cd2b 100644 --- a/paimon-core/src/main/java/org/apache/paimon/table/format/FormatTableScan.java +++ b/paimon-core/src/main/java/org/apache/paimon/table/format/FormatTableScan.java @@ -107,7 +107,7 @@ public List listPartitionEntries() { List partitionEntries = new ArrayList<>(); for (Pair, Path> partition2Path : partition2Paths) { BinaryRow row = toPartitionRow(partition2Path.getKey()); - partitionEntries.add(new PartitionEntry(row, -1L, -1L, -1L, -1L)); + partitionEntries.add(new PartitionEntry(row, -1L, -1L, -1L, -1L, -1)); } return partitionEntries; } diff --git a/paimon-core/src/main/java/org/apache/paimon/table/system/PartitionsTable.java b/paimon-core/src/main/java/org/apache/paimon/table/system/PartitionsTable.java index d3314edfa3a7..22d2c852830a 100644 --- a/paimon-core/src/main/java/org/apache/paimon/table/system/PartitionsTable.java +++ b/paimon-core/src/main/java/org/apache/paimon/table/system/PartitionsTable.java @@ -301,7 +301,8 @@ private PartitionEntry toPartitionEntry(Partition partition) { partition.recordCount(), partition.fileSizeInBytes(), partition.fileCount(), - partition.lastFileCreationTime()); + partition.lastFileCreationTime(), + partition.totalBuckets()); } private Timestamp toTimestamp(Long epochMillis) { diff --git a/paimon-core/src/main/java/org/apache/paimon/utils/PartitionStatisticsReporter.java b/paimon-core/src/main/java/org/apache/paimon/utils/PartitionStatisticsReporter.java index c27e1768fd1f..da8d47ba8c3d 100644 --- a/paimon-core/src/main/java/org/apache/paimon/utils/PartitionStatisticsReporter.java +++ b/paimon-core/src/main/java/org/apache/paimon/utils/PartitionStatisticsReporter.java @@ -37,6 +37,7 @@ import java.util.Collections; import java.util.LinkedHashMap; import java.util.List; +import java.util.Optional; import static org.apache.paimon.utils.PartitionPathUtils.extractPartitionSpecFromPath; @@ -73,6 +74,7 @@ public void report(String partition, long modifyTimeMillis) throws Exception { long rowCount = 0; long totalSize = 0; long fileCount = 0; + int totalBuckets = 0; for (DataSplit split : splits) { List fileMetas = split.dataFiles(); fileCount += fileMetas.size(); @@ -80,11 +82,17 @@ public void report(String partition, long modifyTimeMillis) throws Exception { rowCount += fileMeta.rowCount(); totalSize += fileMeta.fileSize(); } + totalBuckets = Optional.ofNullable(split.totalBuckets()).orElse(0); } PartitionStatistics partitionStats = new PartitionStatistics( - partitionSpec, rowCount, totalSize, fileCount, modifyTimeMillis); + partitionSpec, + rowCount, + totalSize, + fileCount, + modifyTimeMillis, + totalBuckets); LOG.info("alter partition {} with statistic {}.", partitionSpec, partitionStats); partitionHandler.alterPartitions(Collections.singletonList(partitionStats)); } diff --git a/paimon-core/src/test/java/org/apache/paimon/partition/PartitionExpireTableTest.java b/paimon-core/src/test/java/org/apache/paimon/partition/PartitionExpireTableTest.java index b78506a825fa..47ba63d9fd92 100644 --- a/paimon-core/src/test/java/org/apache/paimon/partition/PartitionExpireTableTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/partition/PartitionExpireTableTest.java @@ -51,7 +51,7 @@ public void testCustomExpire() throws Exception { Table table = catalog.getTable(identifier()); String path = table.options().get("path"); - PartitionEntry expire = new PartitionEntry(BinaryRow.singleColumn(1), 1, 1, 1, 1); + PartitionEntry expire = new PartitionEntry(BinaryRow.singleColumn(1), 1, 1, 1, 1, 1); TABLE_EXPIRE_PARTITIONS.put(path, Collections.singletonList(expire)); write(table, GenericRow.of(1, 1)); write(table, GenericRow.of(2, 2)); diff --git a/paimon-core/src/test/java/org/apache/paimon/rest/MockRESTMessage.java b/paimon-core/src/test/java/org/apache/paimon/rest/MockRESTMessage.java index f487815e4d61..597202714325 100644 --- a/paimon-core/src/test/java/org/apache/paimon/rest/MockRESTMessage.java +++ b/paimon-core/src/test/java/org/apache/paimon/rest/MockRESTMessage.java @@ -150,7 +150,7 @@ public static AlterTableRequest alterTableRequest() { public static ListPartitionsResponse listPartitionsResponse() { Map spec = new HashMap<>(); spec.put("f0", "1"); - Partition partition = new Partition(spec, 1, 1, 1, 1, false); + Partition partition = new Partition(spec, 1, 1, 1, 1, 1, false); return new ListPartitionsResponse(ImmutableList.of(partition)); } diff --git a/paimon-core/src/test/java/org/apache/paimon/rest/RESTCatalogServer.java b/paimon-core/src/test/java/org/apache/paimon/rest/RESTCatalogServer.java index 544b4903850c..e84fd00f4559 100644 --- a/paimon-core/src/test/java/org/apache/paimon/rest/RESTCatalogServer.java +++ b/paimon-core/src/test/java/org/apache/paimon/rest/RESTCatalogServer.java @@ -2405,6 +2405,7 @@ private MockResponse commitSnapshot( .lastFileCreationTime(), stats .lastFileCreationTime()), + stats.totalBuckets(), oldPartition.done(), oldPartition.createdAt(), oldPartition.createdBy(), @@ -2630,6 +2631,7 @@ private Partition toPartition(PartitionStatistics stats) { stats.fileSizeInBytes(), stats.fileCount(), stats.lastFileCreationTime(), + stats.totalBuckets(), false, System.currentTimeMillis(), "created", diff --git a/paimon-core/src/test/java/org/apache/paimon/utils/PartitionStatisticsReporterTest.java b/paimon-core/src/test/java/org/apache/paimon/utils/PartitionStatisticsReporterTest.java index 5ffb50e95f5b..7517fac6f1ea 100644 --- a/paimon-core/src/test/java/org/apache/paimon/utils/PartitionStatisticsReporterTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/utils/PartitionStatisticsReporterTest.java @@ -135,7 +135,7 @@ public void close() { assertThat(partitionParams).containsKey("c1=a/"); assertThat(partitionParams.get("c1=a/").toString()) .isEqualTo( - "{spec={c1=a}, recordCount=2, fileSizeInBytes=705, fileCount=1, lastFileCreationTime=1729598544974}"); + "{spec={c1=a}, recordCount=2, fileSizeInBytes=705, fileCount=1, lastFileCreationTime=1729598544974, totalBuckets=-1}"); action.close(); assertThat(closed).isTrue(); } diff --git a/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/RESTCatalogITCase.java b/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/RESTCatalogITCase.java index d79aa713c9dd..034816d6a0fc 100644 --- a/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/RESTCatalogITCase.java +++ b/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/RESTCatalogITCase.java @@ -19,6 +19,7 @@ package org.apache.paimon.flink; import org.apache.paimon.catalog.Identifier; +import org.apache.paimon.partition.Partition; import org.apache.paimon.rest.RESTToken; import org.apache.paimon.shade.guava30.com.google.common.collect.ImmutableList; @@ -151,4 +152,51 @@ public void testFunction() throws Exception { sql(String.format("DROP FUNCTION %s.%s", DATABASE_NAME, functionName)); assertThat(catalog.functionExists(functionObjectPath)).isFalse(); } + + @Test + public void testTotalBucketsStatistics() throws Exception { + String fixedBucketTableName = "fixed_bucket_tbl"; + batchSql( + String.format( + "CREATE TABLE %s.%s (a INT, b INT, p INT) PARTITIONED BY (p) WITH ('bucket'='2', 'bucket-key'='a')", + DATABASE_NAME, fixedBucketTableName)); + batchSql( + String.format( + "INSERT INTO %s.%s VALUES (1, 10, 1), (2, 20, 1)", + DATABASE_NAME, fixedBucketTableName)); + validateTotalBuckets(DATABASE_NAME, fixedBucketTableName, 2); + + String dynamicBucketTableName = "dynamic_bucket_tbl"; + sql( + String.format( + "CREATE TABLE %s.%s (a INT, b INT, p INT) PARTITIONED BY (p) WITH ('bucket'='-1')", + DATABASE_NAME, dynamicBucketTableName)); + sql( + String.format( + "INSERT INTO %s.dynamic_bucket_tbl VALUES (1, 10, 1), (2, 20, 1)", + DATABASE_NAME)); + validateTotalBuckets(DATABASE_NAME, "dynamic_bucket_tbl", -1); + + String postponeBucketTableName = "postpone_bucket_tbl"; + batchSql( + String.format( + "CREATE TABLE %s.%s (a INT, b INT, p INT, PRIMARY KEY (p, a) NOT ENFORCED) PARTITIONED BY (p) WITH ('bucket'='-2')", + DATABASE_NAME, postponeBucketTableName)); + batchSql( + String.format( + "INSERT INTO %s.%s VALUES (1, 10, 1), (2, 20, 1)", + DATABASE_NAME, postponeBucketTableName)); + validateTotalBuckets(DATABASE_NAME, postponeBucketTableName, 1); + } + + private void validateTotalBuckets( + String databaseName, String tableName, Integer expectedTotalBuckets) throws Exception { + Catalog flinkCatalog = tEnv.getCatalog(tEnv.getCurrentCatalog()).get(); + try (org.apache.paimon.catalog.Catalog catalog = ((FlinkCatalog) flinkCatalog).catalog()) { + List partitions = + catalog.listPartitions(Identifier.create(databaseName, tableName)); + assertThat(partitions).isNotEmpty(); + assertThat(partitions.get(0).totalBuckets()).isEqualTo(expectedTotalBuckets); + } + } } diff --git a/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/HiveCatalog.java b/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/HiveCatalog.java index f63cd4846447..98c14def5820 100644 --- a/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/HiveCatalog.java +++ b/paimon-hive/paimon-hive-catalog/src/main/java/org/apache/paimon/hive/HiveCatalog.java @@ -479,6 +479,8 @@ && new CoreOptions(tableSchema.options()).partitionedTableInMetastore()) { String modifyTimeSeconds = String.valueOf(partition.lastFileCreationTime() / 1000); statistic.put(LAST_UPDATE_TIME_PROP, modifyTimeSeconds); + statistic.put(TOTAL_BUCKETS, String.valueOf(partition.totalBuckets())); + // just for being compatible with hive metastore statistic.put(HIVE_LAST_UPDATE_TIME_PROP, modifyTimeSeconds); @@ -558,6 +560,9 @@ public List listPartitions(Identifier ide parameters.getOrDefault( LAST_UPDATE_TIME_PROP, System.currentTimeMillis() + "")); + int totalBuckets = + Integer.parseInt( + parameters.getOrDefault(TOTAL_BUCKETS, "0")); return new org.apache.paimon.partition.Partition( Collections.singletonMap( tagToPartitionField, part.getValues().get(0)), @@ -565,6 +570,7 @@ public List listPartitions(Identifier ide fileSizeInBytes, fileCount, lastFileCreationTime, + totalBuckets, false); }) .collect(Collectors.toList()); diff --git a/paimon-hive/paimon-hive-catalog/src/test/java/org/apache/paimon/hive/HiveCatalogTest.java b/paimon-hive/paimon-hive-catalog/src/test/java/org/apache/paimon/hive/HiveCatalogTest.java index 5e196067168d..de04b0c8381b 100644 --- a/paimon-hive/paimon-hive-catalog/src/test/java/org/apache/paimon/hive/HiveCatalogTest.java +++ b/paimon-hive/paimon-hive-catalog/src/test/java/org/apache/paimon/hive/HiveCatalogTest.java @@ -530,7 +530,7 @@ public void testAlterPartitions() throws Exception { long fileCreationTime = System.currentTimeMillis(); PartitionStatistics partition = new PartitionStatistics( - Collections.singletonMap("dt", "20250101"), 1, 2, 3, fileCreationTime); + Collections.singletonMap("dt", "20250101"), 1, 2, 3, fileCreationTime, 4); catalog.alterPartitions(alterIdentifier, Collections.singletonList(partition)); Partition partitionFromServer = catalog.listPartitions(alterIdentifier).get(0); checkPartition( @@ -540,6 +540,7 @@ public void testAlterPartitions() throws Exception { 2, 3, fileCreationTime, + 4, false), partitionFromServer); diff --git a/paimon-python/pypaimon/snapshot/snapshot_commit.py b/paimon-python/pypaimon/snapshot/snapshot_commit.py index 50727b6ce37c..156036165a4e 100644 --- a/paimon-python/pypaimon/snapshot/snapshot_commit.py +++ b/paimon-python/pypaimon/snapshot/snapshot_commit.py @@ -39,11 +39,12 @@ class PartitionStatistics: file_size_in_bytes: int = json_field("fileSizeInBytes", default=0) file_count: int = json_field("fileCount", default=0) last_file_creation_time: int = json_field("lastFileCreationTime", default_factory=lambda: int(time.time() * 1000)) + total_buckets: int = json_field("totalBuckets", default=0) @classmethod def create(cls, partition_spec: Dict[str, str] = None, record_count: int = 0, file_count: int = 0, file_size_in_bytes: int = 0, - last_file_creation_time: int = None) -> 'PartitionStatistics': + last_file_creation_time: int = None, total_buckets: int = 0) -> 'PartitionStatistics': """ Factory method to create PartitionStatistics with backward compatibility. @@ -53,6 +54,7 @@ def create(cls, partition_spec: Dict[str, str] = None, record_count: int = 0, file_count: Number of files file_size_in_bytes: Total file size in bytes last_file_creation_time: Last file creation time in milliseconds + total_buckets: Total number of buckets in the partition Returns: PartitionStatistics instance @@ -62,7 +64,8 @@ def create(cls, partition_spec: Dict[str, str] = None, record_count: int = 0, record_count=record_count, file_count=file_count, file_size_in_bytes=file_size_in_bytes, - last_file_creation_time=last_file_creation_time or int(time.time() * 1000) + last_file_creation_time=last_file_creation_time or int(time.time() * 1000), + total_buckets=total_buckets ) diff --git a/paimon-python/pypaimon/write/file_store_commit.py b/paimon-python/pypaimon/write/file_store_commit.py index a5b9fd969349..6a1f4c50b15f 100644 --- a/paimon-python/pypaimon/write/file_store_commit.py +++ b/paimon-python/pypaimon/write/file_store_commit.py @@ -288,7 +288,8 @@ def _generate_partition_statistics(self, commit_entries: List[ManifestEntry]) -> 'record_count': 0, 'file_count': 0, 'file_size_in_bytes': 0, - 'last_file_creation_time': 0 + 'last_file_creation_time': 0, + 'total_buckets': entry.total_buckets } # Following Java implementation: PartitionEntry.fromDataFile() @@ -323,7 +324,8 @@ def _generate_partition_statistics(self, commit_entries: List[ManifestEntry]) -> record_count=stats['record_count'], file_count=stats['file_count'], file_size_in_bytes=stats['file_size_in_bytes'], - last_file_creation_time=stats['last_file_creation_time'] + last_file_creation_time=stats['last_file_creation_time'], + total_buckets=stats['total_buckets'] ) for stats in partition_stats.values() ]