Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
78e3498
perf: [Flink] Add FlinkSkipSingleFileClusteringPlanStrategy to skip c…
XianghuiBai Oct 29, 2025
ec3b35d
feat: introduce pk filter push-down to base file (#14183)
TheR1sing3un Nov 4, 2025
1239daa
docs: update javadoc of BucketIndexUtil (#14195)
voonhous Nov 5, 2025
8b04375
perf: reduce unnecessary row group metadata loading (#14208)
TheR1sing3un Nov 7, 2025
8dacb40
feat: Only when the target table to be inserted/merged is a hudi tabl…
TheR1sing3un Nov 8, 2025
50806df
feat: introduce pk filter to log file (#14205)
TheR1sing3un Nov 18, 2025
16858a0
chore: Update release candidate validation in Github action (#14295)
yihua Nov 18, 2025
05ae176
test: Clean up all the behaviors of directly setting spark conf in sp…
TheR1sing3un Nov 19, 2025
fcd89e9
[MINOR] Cleanup old spark3.5 version in pom.xml (#14304)
yongkyunlee Nov 20, 2025
a58190a
chore: Integration Test Flakiness: free more disk space before runnin…
the-other-tim-brown Nov 22, 2025
03d7133
refactor: Clean up Spurious log block handling in LogRecordReader (#1…
PavithranRick Nov 25, 2025
7f7acd8
fix: Fix duplicate field exception in hive query with where clause (#…
cshuo Nov 26, 2025
c4abf0c
fix: push down pk filters to log file when spark enable `parquetFilte…
TheR1sing3un Nov 26, 2025
04ba590
fix: Fix the mismatch between operation metrics and the actual operat…
TheR1sing3un Nov 27, 2025
9e00037
fix: Support handling complex data types in convertRowToJsonString fo…
cshuo Nov 27, 2025
4e309e3
fix: fix get empty completion time in corner case (#14379)
TheR1sing3un Nov 27, 2025
7831ad7
test: Fix test setup and assertions in TestTableColumnTypeMismatch (#…
nsivabalan Nov 29, 2025
57aa5b9
fix: Bump springboot version to fix CVE-2022-1471 (#14383)
CTTY Nov 29, 2025
77d6f18
fix: Only use index when index metadata is present (#14385)
CTTY Nov 29, 2025
68ff975
feat: Add storage in HoodieCatalogTable (#14386)
CTTY Nov 29, 2025
0cf8315
fix: Include parquet-format in Hive sync bundle (#13843)
gggyd123 Nov 29, 2025
48996f4
fix: Exclude guava from hive-metastore (#14388)
CTTY Nov 29, 2025
1aa11af
fix: Exclude jetty from javalin to fix CVE-2023-40167 (#14384)
CTTY Nov 29, 2025
6a7a7e1
feat: Use Storage from catalog table in drop table command (#14390)
CTTY Dec 1, 2025
6431199
feat: Use storage conf for alter rename command (#14389)
CTTY Dec 3, 2025
4c57c2b
feat: Change the config for record index max file group size to be a …
prashantwason Dec 3, 2025
657a3b5
Expand test coverage for parquet to spark conversion, fix bugs (#17450)
the-other-tim-brown Dec 3, 2025
c43a7f9
Fix flaky TestHoodieIndex#testCheckIfValidCommit test (#17484)
voonhous Dec 4, 2025
82dad09
chore: move disk space cleanup in integration-test CI module (#17496)
the-other-tim-brown Dec 5, 2025
77da434
fix: Remove explicit casting to HoodieWriteMergeHandle in Flink commi…
cshuo Dec 6, 2025
b52806c
fix: Fixing streaming writes to metadata table for perf regression (#…
nsivabalan Dec 7, 2025
b4941b6
chore: Update deploy script for release (#14296)
yihua Dec 8, 2025
8815340
Bumping release candidate number 1 for 1.1.1
nsivabalan Dec 8, 2025
57e0e7d
feat: Support TIMELINE_SERVER_BASED markers for flink writer (#14202)
cshuo Nov 8, 2025
dca14e5
Bumping release candidate number 2
nsivabalan Dec 12, 2025
3d3ebd5
Updating flink versions
nsivabalan Dec 12, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion .github/workflows/bot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1199,6 +1199,15 @@ jobs:
architecture: x64
- name: Check disk space
run: df -h
- name: 'Free space'
run: |
sudo rm -rf /usr/share/dotnet
sudo rm -rf /usr/local/lib/android
sudo rm -rf /opt/ghc
sudo rm -rf /usr/local/share/boost
docker system prune --all --force --volumes
- name: Check disk space after cleanup
run: df -h
- name: Build Project
env:
SPARK_PROFILE: ${{ matrix.sparkProfile }}
Expand All @@ -1225,7 +1234,6 @@ jobs:
SPARK_ARCHIVE_BASENAME=$(basename $SPARK_ARCHIVE)
export SPARK_HOME=$GITHUB_WORKSPACE/${SPARK_ARCHIVE_BASENAME%.*}
rm -f $GITHUB_WORKSPACE/$SPARK_ARCHIVE
docker system prune --all --force
mvn verify $SCALA_PROFILE -D"$SPARK_PROFILE" -Pintegration-tests -pl !hudi-flink-datasource/hudi-flink $MVN_ARGS

build-spark-java17:
Expand Down
78 changes: 72 additions & 6 deletions .github/workflows/release_candidate_validation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,11 @@ jobs:
runs-on: ubuntu-latest
if: false
env:
HUDI_VERSION: 1.0.0
STAGING_REPO_NUM: 1123
HUDI_VERSION: 1.1.0
STAGING_REPO_NUM: 1164
strategy:
matrix:
include:
- scalaProfile: 'scala-2.13'
flinkProfile: 'flink2.0'
sparkProfile: 'spark3.5'
sparkRuntime: 'spark3.5.1'
- scalaProfile: 'scala-2.13'
flinkProfile: 'flink1.20'
sparkProfile: 'spark3.5'
Expand Down Expand Up @@ -75,3 +71,73 @@ jobs:
SCALA_PROFILE: ${{ matrix.scalaProfile }}
run: |
./packaging/bundle-validation/ci_run.sh hudi_docker_java17 $HUDI_VERSION openjdk17 $STAGING_REPO_NUM

validate-release-candidate-bundles-spark4:
runs-on: ubuntu-latest
if: false
env:
HUDI_VERSION: 1.1.0
STAGING_REPO_NUM: 1164
strategy:
matrix:
include:
- scalaProfile: 'scala-2.13'
flinkProfile: 'flink1.20'
sparkProfile: 'spark4.0'
sparkRuntime: 'spark4.0.0'
steps:
- uses: actions/checkout@v3
- name: Set up JDK 17
uses: actions/setup-java@v3
with:
java-version: '17'
distribution: 'temurin'
architecture: x64
cache: maven
- name: IT - Bundle Validation - OpenJDK 17
env:
FLINK_PROFILE: ${{ matrix.flinkProfile }}
SPARK_PROFILE: ${{ matrix.sparkProfile }}
SPARK_RUNTIME: ${{ matrix.sparkRuntime }}
SCALA_PROFILE: ${{ matrix.scalaProfile }}
run: |
./packaging/bundle-validation/ci_run.sh hudi_docker_java17 $HUDI_VERSION openjdk17 $STAGING_REPO_NUM

validate-release-candidate-bundles-flink2:
runs-on: ubuntu-latest
if: false
env:
HUDI_VERSION: 1.1.0
STAGING_REPO_NUM: 1164
strategy:
matrix:
include:
- scalaProfile: 'scala-2.12'
flinkProfile: 'flink2.0'
sparkProfile: 'spark3.5'
sparkRuntime: 'spark3.5.1'
steps:
- uses: actions/checkout@v3
- name: Set up JDK 11
uses: actions/setup-java@v3
with:
java-version: '11'
distribution: 'temurin'
architecture: x64
cache: maven
- name: IT - Bundle Validation - OpenJDK 11
env:
FLINK_PROFILE: ${{ matrix.flinkProfile }}
SPARK_PROFILE: ${{ matrix.sparkProfile }}
SPARK_RUNTIME: ${{ matrix.sparkRuntime }}
SCALA_PROFILE: ${{ matrix.scalaProfile }}
run: |
./packaging/bundle-validation/ci_run.sh hudi_docker_java11 $HUDI_VERSION openjdk11 $STAGING_REPO_NUM
- name: IT - Bundle Validation - OpenJDK 17
env:
FLINK_PROFILE: ${{ matrix.flinkProfile }}
SPARK_PROFILE: ${{ matrix.sparkProfile }}
SPARK_RUNTIME: ${{ matrix.sparkRuntime }}
SCALA_PROFILE: ${{ matrix.scalaProfile }}
run: |
./packaging/bundle-validation/ci_run.sh hudi_docker_java17 $HUDI_VERSION openjdk17 $STAGING_REPO_NUM
2 changes: 1 addition & 1 deletion docker/hoodie/hadoop/base/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
<parent>
<artifactId>hudi-hadoop-docker</artifactId>
<groupId>org.apache.hudi</groupId>
<version>1.1.0</version>
<version>1.1.1-rc2</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging>
Expand Down
2 changes: 1 addition & 1 deletion docker/hoodie/hadoop/base_java11/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
<parent>
<artifactId>hudi-hadoop-docker</artifactId>
<groupId>org.apache.hudi</groupId>
<version>1.1.0</version>
<version>1.1.1-rc2</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging>
Expand Down
2 changes: 1 addition & 1 deletion docker/hoodie/hadoop/datanode/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
<parent>
<artifactId>hudi-hadoop-docker</artifactId>
<groupId>org.apache.hudi</groupId>
<version>1.1.0</version>
<version>1.1.1-rc2</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging>
Expand Down
2 changes: 1 addition & 1 deletion docker/hoodie/hadoop/historyserver/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
<parent>
<artifactId>hudi-hadoop-docker</artifactId>
<groupId>org.apache.hudi</groupId>
<version>1.1.0</version>
<version>1.1.1-rc2</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging>
Expand Down
2 changes: 1 addition & 1 deletion docker/hoodie/hadoop/hive_base/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
<parent>
<artifactId>hudi-hadoop-docker</artifactId>
<groupId>org.apache.hudi</groupId>
<version>1.1.0</version>
<version>1.1.1-rc2</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging>
Expand Down
2 changes: 1 addition & 1 deletion docker/hoodie/hadoop/namenode/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
<parent>
<artifactId>hudi-hadoop-docker</artifactId>
<groupId>org.apache.hudi</groupId>
<version>1.1.0</version>
<version>1.1.1-rc2</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging>
Expand Down
2 changes: 1 addition & 1 deletion docker/hoodie/hadoop/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
<parent>
<artifactId>hudi</artifactId>
<groupId>org.apache.hudi</groupId>
<version>1.1.0</version>
<version>1.1.1-rc2</version>
<relativePath>../../../pom.xml</relativePath>
</parent>
<modelVersion>4.0.0</modelVersion>
Expand Down
2 changes: 1 addition & 1 deletion docker/hoodie/hadoop/prestobase/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
<parent>
<artifactId>hudi-hadoop-docker</artifactId>
<groupId>org.apache.hudi</groupId>
<version>1.1.0</version>
<version>1.1.1-rc2</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging>
Expand Down
2 changes: 1 addition & 1 deletion docker/hoodie/hadoop/spark_base/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
<parent>
<artifactId>hudi-hadoop-docker</artifactId>
<groupId>org.apache.hudi</groupId>
<version>1.1.0</version>
<version>1.1.1-rc2</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging>
Expand Down
2 changes: 1 addition & 1 deletion docker/hoodie/hadoop/sparkadhoc/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
<parent>
<artifactId>hudi-hadoop-docker</artifactId>
<groupId>org.apache.hudi</groupId>
<version>1.1.0</version>
<version>1.1.1-rc2</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging>
Expand Down
2 changes: 1 addition & 1 deletion docker/hoodie/hadoop/sparkmaster/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
<parent>
<artifactId>hudi-hadoop-docker</artifactId>
<groupId>org.apache.hudi</groupId>
<version>1.1.0</version>
<version>1.1.1-rc2</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging>
Expand Down
2 changes: 1 addition & 1 deletion docker/hoodie/hadoop/sparkworker/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
<parent>
<artifactId>hudi-hadoop-docker</artifactId>
<groupId>org.apache.hudi</groupId>
<version>1.1.0</version>
<version>1.1.1-rc2</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging>
Expand Down
2 changes: 1 addition & 1 deletion docker/hoodie/hadoop/trinobase/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
<parent>
<artifactId>hudi-hadoop-docker</artifactId>
<groupId>org.apache.hudi</groupId>
<version>1.1.0</version>
<version>1.1.1-rc2</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging>
Expand Down
2 changes: 1 addition & 1 deletion docker/hoodie/hadoop/trinocoordinator/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
<parent>
<artifactId>hudi-hadoop-docker</artifactId>
<groupId>org.apache.hudi</groupId>
<version>1.1.0</version>
<version>1.1.1-rc2</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging>
Expand Down
2 changes: 1 addition & 1 deletion docker/hoodie/hadoop/trinoworker/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
<parent>
<artifactId>hudi-hadoop-docker</artifactId>
<groupId>org.apache.hudi</groupId>
<version>1.1.0</version>
<version>1.1.1-rc2</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging>
Expand Down
4 changes: 2 additions & 2 deletions hudi-aws/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@
<parent>
<artifactId>hudi</artifactId>
<groupId>org.apache.hudi</groupId>
<version>1.1.0</version>
<version>1.1.1-rc2</version>
</parent>
<modelVersion>4.0.0</modelVersion>

<artifactId>hudi-aws</artifactId>
<version>1.1.0</version>
<version>1.1.1-rc2</version>

<name>hudi-aws</name>
<packaging>jar</packaging>
Expand Down
2 changes: 1 addition & 1 deletion hudi-cli/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
<parent>
<artifactId>hudi</artifactId>
<groupId>org.apache.hudi</groupId>
<version>1.1.0</version>
<version>1.1.1-rc2</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
4 changes: 2 additions & 2 deletions hudi-client/hudi-client-common/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@
<parent>
<artifactId>hudi-client</artifactId>
<groupId>org.apache.hudi</groupId>
<version>1.1.0</version>
<version>1.1.1-rc2</version>
</parent>
<modelVersion>4.0.0</modelVersion>

<artifactId>hudi-client-common</artifactId>
<version>1.1.0</version>
<version>1.1.1-rc2</version>

<name>hudi-client-common</name>
<packaging>jar</packaging>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
import org.apache.hudi.common.table.marker.MarkerType;
import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
import org.apache.hudi.common.table.view.FileSystemViewStorageType;
import org.apache.hudi.common.util.ConfigUtils;
import org.apache.hudi.common.util.FileIOUtils;
import org.apache.hudi.common.util.HoodieRecordUtils;
Expand Down Expand Up @@ -1628,6 +1629,12 @@ public boolean isEmbeddedTimelineServerReuseEnabled() {
return getBoolean(EMBEDDED_TIMELINE_SERVER_REUSE_ENABLED);
}

public boolean isRemoteViewStorageType() {
FileSystemViewStorageType storageType = getViewStorageConfig().getStorageType();
return storageType == FileSystemViewStorageType.REMOTE_ONLY
|| storageType == FileSystemViewStorageType.REMOTE_FIRST;
}

public int getEmbeddedTimelineServerPort() {
return Integer.parseInt(getStringOrDefault(EMBEDDED_TIMELINE_SERVER_PORT_NUM));
}
Expand Down Expand Up @@ -2696,7 +2703,7 @@ public float getRecordIndexGrowthFactor() {
return metadataConfig.getRecordIndexGrowthFactor();
}

public int getRecordIndexMaxFileGroupSizeBytes() {
public long getRecordIndexMaxFileGroupSizeBytes() {
return metadataConfig.getRecordIndexMaxFileGroupSizeBytes();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -242,10 +242,6 @@ protected void populateIncomingRecordsMap(Iterator<HoodieRecord<T>> newRecordsIt
}
}

public boolean isEmptyNewRecords() {
return keyToNewRecords.isEmpty();
}

protected boolean writeUpdateRecord(HoodieRecord<T> newRecord, HoodieRecord<T> oldRecord, HoodieRecord combineRecord, Schema writerSchema) throws IOException {
boolean isDelete = false;
if (oldRecord.getData() != combineRecord.getData()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;

Expand Down Expand Up @@ -133,25 +132,23 @@ public HoodieCompactionPlan generateCompactionPlan(String compactionInstant) thr
.getLatestFileSlicesStateless(partitionPath)
.filter(slice -> filterFileSlice(slice, lastCompletedInstantTime, fgIdsInPendingCompactionAndClustering, instantRange))
.map(s -> {
List<HoodieLogFile> logFiles = s.getLogFiles()
// ==============================================================
// IMPORTANT
// ==============================================================
// Currently, our filesystem view could return a file slice with pending log files there,
// these files should be excluded from the plan, let's say we have such a sequence of actions

// t10: a delta commit starts,
// t20: the compaction is scheduled and the t10 delta commit is still pending, and the "fg_10.log" is included in the plan
// t25: the delta commit 10 finishes,
// t30: the compaction execution starts, now the reader considers the log file "fg_10.log" as valid.

// for both OCC and NB-CC, this is in-correct.
.filter(logFile -> completionTimeQueryView.isCompletedBefore(compactionInstant, logFile.getDeltaCommitTime()))
.sorted(HoodieLogFile.getLogFileComparator()).collect(toList());
if (logFiles.isEmpty()) {
// compaction is not needed if there is no log file.
return null;
}
// ==============================================================
// IMPORTANT
// ==============================================================
// Currently, our filesystem view could return a file slice with pending log files there,
// these files should be excluded from the plan, let's say we have such a sequence of actions

// t10: a delta commit starts,
// t20: the compaction is scheduled and the t10 delta commit is still pending, and the "fg_10.log" is included in the plan
// t25: the delta commit 10 finishes,
// t30: the compaction execution starts, now the reader considers the log file "fg_10.log" as valid.

// for both OCC and NB-CC, this is in-correct.
return s.filterLogFiles(logFile -> completionTimeQueryView.isCompletedBefore(compactionInstant, logFile.getDeltaCommitTime()));
})
.filter(FileSlice::hasLogFiles) // compaction is not needed if there is no log file.
.map(s -> {
List<HoodieLogFile> logFiles = s.getLogFiles().sorted(HoodieLogFile.getLogFileComparator()).collect(toList());
totalLogFiles.add(logFiles.size());
totalFileSlices.add(1L);
// Avro generated classes are not inheriting Serializable. Using CompactionOperation POJO
Expand All @@ -160,7 +157,7 @@ public HoodieCompactionPlan generateCompactionPlan(String compactionInstant) thr
Option<HoodieBaseFile> dataFile = s.getBaseFile();
return new CompactionOperation(dataFile, partitionPath, logFiles,
writeConfig.getCompactionStrategy().captureMetrics(writeConfig, s));
}).filter(Objects::nonNull), partitionPaths.size()).stream()
}), partitionPaths.size()).stream()
.map(CompactionUtils::buildHoodieCompactionOperation).collect(toList());

LOG.info("Total of {} compaction operations are retrieved for table {}", operations.size(), hoodieTable.getConfig().getBasePath());
Expand Down Expand Up @@ -194,7 +191,7 @@ public HoodieCompactionPlan generateCompactionPlan(String compactionInstant) thr

protected abstract List<String> getPartitions();

protected abstract HoodieCompactionPlan getCompactionPlan(HoodieTableMetaClient metaClient, List<HoodieCompactionOperation> operations, Pair<List<String>,List<String>> partitionPair);
protected abstract HoodieCompactionPlan getCompactionPlan(HoodieTableMetaClient metaClient, List<HoodieCompactionOperation> operations, Pair<List<String>, List<String>> partitionPair);

protected abstract boolean filterLogCompactionOperations();

Expand Down
Loading
Loading