Skip to content

Commit 2c46cc7

Browse files
author
huyuanfeng
committed
Merge branch 'main' into FLINK-36192
2 parents 3bce43c + 4f87bc2 commit 2c46cc7

File tree

80 files changed

+2228
-1185
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

80 files changed

+2228
-1185
lines changed

.github/workflows/ci.yml

Lines changed: 78 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@
2323
name: Flink Kubernetes Operator CI
2424
on:
2525
push:
26+
branches:
27+
- main
28+
- release-*
2629
pull_request:
2730
jobs:
2831
test_ci:
@@ -32,19 +35,13 @@ jobs:
3235
matrix:
3336
java-version: [ 11, 17, 21 ]
3437
steps:
35-
- uses: actions/checkout@v2
38+
- uses: actions/checkout@v4
3639
- name: Set up JDK ${{ matrix.java-version }}
37-
uses: actions/setup-java@v2
40+
uses: actions/setup-java@v4
3841
with:
3942
java-version: ${{ matrix.java-version }}
40-
distribution: 'adopt'
41-
- name: Cache local Maven repository
42-
uses: actions/cache@v3
43-
with:
44-
path: ~/.m2/repository
45-
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
46-
restore-keys: |
47-
${{ runner.os }}-maven-
43+
distribution: 'temurin'
44+
cache: 'maven'
4845
- name: Build with Maven
4946
run: |
5047
mvn -B clean install javadoc:javadoc -Pgenerate-docs
@@ -70,7 +67,72 @@ jobs:
7067
cd flink-autoscaler-plugin-jdbc
7168
mvn -B verify -Dit.skip=false
7269
cd ..
70+
e2e_smoke_test:
71+
name: HTTP Client smoke test
72+
runs-on: ubuntu-latest
73+
strategy:
74+
matrix:
75+
http-client: [ "okhttp", "jdk", "jetty", "vertx" ]
76+
version: ["v1_20"]
77+
mode: ["native"]
78+
namespace: ["default"]
79+
java-version: ["21"]
80+
test:
81+
- test_application_operations.sh
82+
steps:
83+
- uses: actions/checkout@v4
84+
- name: Set up JDK ${{ matrix.java-version }}
85+
uses: actions/setup-java@v4
86+
with:
87+
java-version: ${{ matrix.java-version }}
88+
distribution: 'temurin'
89+
cache: 'maven'
90+
- name: Start minikube
91+
run: |
92+
source e2e-tests/utils.sh
93+
start_minikube
94+
- name: Install cert-manager
95+
run: |
96+
kubectl get pods -A
97+
kubectl apply -f https://github.com/jetstack/cert-manager/releases/download/v1.8.2/cert-manager.yaml
98+
kubectl -n cert-manager wait --all=true --for=condition=Available --timeout=300s deploy
99+
- name: Build image
100+
run: |
101+
export SHELL=/bin/bash
102+
export DOCKER_BUILDKIT=1
103+
eval $(minikube -p minikube docker-env)
104+
JAVA_VERSION=${{matrix.java-version}}
105+
HTTP_CLIENT=${{ matrix.http-client }}
106+
docker build --progress=plain --no-cache -f ./Dockerfile -t flink-kubernetes-operator:ci-latest --progress plain --build-arg JAVA_VERSION="${JAVA_VERSION:-11}" --build-arg HTTP_CLIENT="${HTTP_CLIENT:-okhttp}" .
107+
docker images
108+
- name: Start the operator
109+
run: |
110+
if [[ "${{ matrix.test }}" == "test_flink_operator_ha.sh" ]]; then
111+
sed -i "s/# kubernetes.operator.leader-election.enabled: false/kubernetes.operator.leader-election.enabled: true/" helm/flink-kubernetes-operator/conf/flink-conf.yaml
112+
sed -i "s/# kubernetes.operator.leader-election.lease-name: flink-operator-lease/kubernetes.operator.leader-election.lease-name: flink-operator-lease/" helm/flink-kubernetes-operator/conf/flink-conf.yaml
113+
sed -i "s/replicas: 1/replicas: 2/" helm/flink-kubernetes-operator/values.yaml
114+
fi
115+
helm --debug install flink-kubernetes-operator -n ${{ matrix.namespace }} helm/flink-kubernetes-operator --set image.repository=flink-kubernetes-operator --set image.tag=ci-latest ${{ matrix.extraArgs }}
116+
kubectl wait --for=condition=Available --timeout=120s -n ${{ matrix.namespace }} deploy/flink-kubernetes-operator
117+
kubectl get pods -n ${{ matrix.namespace }}
118+
- name: Run Flink e2e tests
119+
run: |
120+
sed -i "s/image: flink:.*/image: ${{ matrix.image }}/" e2e-tests/data/*.yaml
121+
sed -i "s/flinkVersion: .*/flinkVersion: ${{ matrix.version }}/" e2e-tests/data/*.yaml
122+
sed -i "s/mode: .*/mode: ${{ matrix.mode }}/" e2e-tests/data/*.yaml
123+
git diff HEAD
124+
echo "Running e2e-tests/$test"
125+
bash e2e-tests/${{ matrix.test }} || exit 1
126+
git reset --hard
127+
- name: Stop the operator
128+
run: |
129+
helm uninstall -n ${{ matrix.namespace }} flink-kubernetes-operator
130+
- name: Stop minikube
131+
run: |
132+
source e2e-tests/utils.sh
133+
stop_minikube
73134
e2e_ci:
135+
needs: e2e_smoke_test
74136
runs-on: ubuntu-latest
75137
strategy:
76138
matrix:
@@ -148,19 +210,13 @@ jobs:
148210
java-version: 21
149211
name: e2e_ci
150212
steps:
151-
- uses: actions/checkout@v2
213+
- uses: actions/checkout@v4
152214
- name: Set up JDK ${{ matrix.java-version }}
153-
uses: actions/setup-java@v2
215+
uses: actions/setup-java@v4
154216
with:
155217
java-version: ${{ matrix.java-version }}
156-
distribution: 'adopt'
157-
- name: Cache local Maven repository
158-
uses: actions/cache@v3
159-
with:
160-
path: ~/.m2/repository
161-
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
162-
restore-keys: |
163-
${{ runner.os }}-maven-
218+
distribution: 'temurin'
219+
cache: 'maven'
164220
- name: Start minikube
165221
run: |
166222
source e2e-tests/utils.sh
@@ -175,7 +231,8 @@ jobs:
175231
export SHELL=/bin/bash
176232
export DOCKER_BUILDKIT=1
177233
eval $(minikube -p minikube docker-env)
178-
docker build --progress=plain --no-cache -f ./Dockerfile -t flink-kubernetes-operator:ci-latest --progress plain .
234+
JAVA_VERSION=${{ matrix.java-version }}
235+
docker build --progress=plain --no-cache -f ./Dockerfile -t flink-kubernetes-operator:ci-latest --progress plain --build-arg JAVA_VERSION="${JAVA_VERSION:-11}" .
179236
docker images
180237
- name: Start the operator
181238
run: |

Dockerfile

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,16 @@
1616
# limitations under the License.
1717
################################################################################
1818
# Build
19-
FROM maven:3.8.4-eclipse-temurin-11 AS build
19+
ARG JAVA_VERSION=11
20+
FROM maven:3.8.8-eclipse-temurin-${JAVA_VERSION} AS build
2021
ARG SKIP_TESTS=true
22+
ARG HTTP_CLIENT=okhttp
2123

2224
WORKDIR /app
2325

2426
COPY . .
2527

26-
RUN --mount=type=cache,target=/root/.m2 mvn -ntp clean install -pl flink-kubernetes-standalone,flink-kubernetes-operator-api,flink-kubernetes-operator,flink-autoscaler,flink-kubernetes-webhook -DskipTests=$SKIP_TESTS
28+
RUN --mount=type=cache,target=/root/.m2 mvn -ntp clean install -pl flink-kubernetes-standalone,flink-kubernetes-operator-api,flink-kubernetes-operator,flink-autoscaler,flink-kubernetes-webhook -DskipTests=$SKIP_TESTS -Dfabric8.httpclinent.impl="$HTTP_CLIENT"
2729

2830
RUN cd /app/tools/license; mkdir jars; cd jars; \
2931
cp /app/flink-kubernetes-operator/target/flink-kubernetes-operator-*-shaded.jar . && \
@@ -33,7 +35,7 @@ RUN cd /app/tools/license; mkdir jars; cd jars; \
3335
cd ../ && ./collect_license_files.sh ./jars ./licenses-output
3436

3537
# stage
36-
FROM eclipse-temurin:11-jre-jammy
38+
FROM eclipse-temurin:${JAVA_VERSION}-jre-jammy
3739
ENV FLINK_HOME=/opt/flink
3840
ENV FLINK_PLUGINS_DIR=$FLINK_HOME/plugins
3941
ENV OPERATOR_VERSION=1.10-SNAPSHOT

docs/content/docs/custom-resource/job-management.md

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -84,13 +84,13 @@ Supported values: `stateless`, `savepoint`, `last-state`
8484

8585
The `upgradeMode` setting controls both the stop and restore mechanisms as detailed in the following table:
8686

87-
| | Stateless | Last State | Savepoint |
88-
|------------------------|-------------------------|--------------------------------------------|----------------------------------------|
89-
| Config Requirement | None | Checkpointing & HA Enabled | Checkpoint/Savepoint directory defined |
90-
| Job Status Requirement | None | HA metadata available | Job Running* |
91-
| Suspend Mechanism | Cancel / Delete | Delete Flink deployment (keep HA metadata) | Cancel with savepoint |
92-
| Restore Mechanism | Deploy from empty state | Recover last state using HA metadata | Restore From savepoint |
93-
| Production Use | Not recommended | Recommended | Recommended |
87+
| | Stateless | Last State | Savepoint |
88+
|------------------------|-----------------|------------------------------------|----------------------------------------|
89+
| Config Requirement | None | Checkpointing Enabled | Checkpoint/Savepoint directory defined |
90+
| Job Status Requirement | None | Job or HA metadata accessible | Job Running* |
91+
| Suspend Mechanism | Cancel / Delete | Cancel / Delete (keep HA metadata) | Cancel with savepoint |
92+
| Restore Mechanism | Empty state | Use HA metadata or last cp/sp | Restore From savepoint |
93+
| Production Use | Not recommended | Recommended | Recommended |
9494

9595

9696
*\* When HA is enabled the `savepoint` upgrade mode may fall back to the `last-state` behaviour in cases where the job is in an unhealthy state.*
@@ -149,10 +149,6 @@ spec:
149149
state: running
150150
```
151151
152-
{{< hint warning >}}
153-
Last state upgrade mode is currently only supported for `FlinkDeployments`.
154-
{{< /hint >}}
155-
156152
### Application restarts without spec change
157153
158154
There are cases when users would like to restart the Flink deployments to deal with some transient problem.

docs/content/docs/custom-resource/overview.md

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -215,10 +215,6 @@ COPY flink-hadoop-fs-1.19-SNAPSHOT.jar $FLINK_PLUGINS_DIR/hadoop-fs/
215215

216216
Alternatively, if you use helm to install flink-kubernetes-operator, it allows you to specify a postStart hook to download the required plugins.
217217

218-
### Limitations
219-
220-
- Last-state upgradeMode is currently not supported for FlinkSessionJobs
221-
222218
## Further information
223219

224220
- [Snapshots]({{< ref "docs/custom-resource/snapshots" >}})

docs/content/docs/custom-resource/reference.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,7 @@ This serves as a full reference for FlinkDeployment and FlinkSessionJob custom r
402402
| ----------| ---- | ---- |
403403
| jobName | java.lang.String | Name of the job. |
404404
| jobId | java.lang.String | Flink JobId of the Job. |
405-
| state | java.lang.String | Last observed state of the job. |
405+
| state | org.apache.flink.api.common.JobStatus | Last observed state of the job. |
406406
| startTime | java.lang.String | Start time of the job. |
407407
| updateTime | java.lang.String | Update time of the job. |
408408
| upgradeSavepointPath | java.lang.String | |
@@ -452,7 +452,7 @@ This serves as a full reference for FlinkDeployment and FlinkSessionJob custom r
452452

453453
| Parameter | Type | Docs |
454454
| ----------| ---- | ---- |
455-
| lastSavepoint | org.apache.flink.kubernetes.operator.api.status.Savepoint | Last completed savepoint by the operator for manual and periodic snapshots. Only used if FlinkStateSnapshot resources are disabled. |
455+
| lastSavepoint | org.apache.flink.kubernetes.operator.api.status.Savepoint | Last completed savepoint by the operator. |
456456
| triggerId | java.lang.String | Trigger id of a pending savepoint operation. |
457457
| triggerTimestamp | java.lang.Long | Trigger timestamp of a pending savepoint operation. |
458458
| triggerType | org.apache.flink.kubernetes.operator.api.status.SnapshotTriggerType | Savepoint trigger mechanism. |

docs/layouts/shortcodes/generated/dynamic_section.html

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,12 @@
122122
<td>Boolean</td>
123123
<td>Enables last-state fallback for savepoint upgrade mode. When the job is not running thus savepoint cannot be triggered but HA metadata is available for last state restore the operator can initiate the upgrade process when the flag is enabled.</td>
124124
</tr>
125+
<tr>
126+
<td><h5>kubernetes.operator.job.upgrade.last-state.job-cancel.enabled</h5></td>
127+
<td style="word-wrap: break-word;">false</td>
128+
<td>Boolean</td>
129+
<td>Cancel jobs during last-state upgrade. This config is ignored for session jobs where cancel is the only mechanism to perform this type of upgrade.</td>
130+
</tr>
125131
<tr>
126132
<td><h5>kubernetes.operator.job.upgrade.last-state.max.allowed.checkpoint.age</h5></td>
127133
<td style="word-wrap: break-word;">(none)</td>

docs/layouts/shortcodes/generated/kubernetes_operator_config_configuration.html

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,12 @@
212212
<td>Boolean</td>
213213
<td>Enables last-state fallback for savepoint upgrade mode. When the job is not running thus savepoint cannot be triggered but HA metadata is available for last state restore the operator can initiate the upgrade process when the flag is enabled.</td>
214214
</tr>
215+
<tr>
216+
<td><h5>kubernetes.operator.job.upgrade.last-state.job-cancel.enabled</h5></td>
217+
<td style="word-wrap: break-word;">false</td>
218+
<td>Boolean</td>
219+
<td>Cancel jobs during last-state upgrade. This config is ignored for session jobs where cancel is the only mechanism to perform this type of upgrade.</td>
220+
</tr>
215221
<tr>
216222
<td><h5>kubernetes.operator.job.upgrade.last-state.max.allowed.checkpoint.age</h5></td>
217223
<td style="word-wrap: break-word;">(none)</td>

e2e-tests/test_sessionjob_operations.sh

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ if [ "$location" == "" ];then
5454
exit 1
5555
fi
5656

57+
echo "Starting sessionjob savepoint upgrade test"
5758
# Testing savepoint mode upgrade
5859
# Update the FlinkSessionJob and trigger the savepoint upgrade
5960
kubectl patch sessionjob ${SESSION_JOB_NAME} --type merge --patch '{"spec":{"job": {"parallelism": 1 } } }'
@@ -67,6 +68,24 @@ assert_available_slots 1 $CLUSTER_ID
6768

6869
echo "Successfully run the sessionjob savepoint upgrade test"
6970

71+
flink_version=$(kubectl get $SESSION_CLUSTER_IDENTIFIER -o yaml | yq '.spec.flinkVersion')
72+
73+
if [ "$flink_version" != "v1_16" ]; then
74+
echo "Starting sessionjob last-state upgrade test"
75+
# Testing last-state mode upgrade
76+
# Update the FlinkSessionJob and trigger the last-state upgrade
77+
kubectl patch sessionjob ${SESSION_JOB_NAME} --type merge --patch '{"spec":{"job": {"parallelism": 2, "upgradeMode": "last-state" } } }'
78+
79+
# Check the job was restarted with the new parallelism
80+
wait_for_status $SESSION_JOB_IDENTIFIER '.status.jobStatus.state' CANCELLING ${TIMEOUT} || exit 1
81+
wait_for_status $SESSION_JOB_IDENTIFIER '.status.jobStatus.state' RUNNING ${TIMEOUT} || exit 1
82+
assert_available_slots 0 $CLUSTER_ID
83+
84+
echo "Successfully run the sessionjob last-state upgrade test"
85+
else
86+
echo "Skipping last-state test for flink version 1.16"
87+
fi
88+
7089
# Test Operator restart
7190
echo "Delete session job " + $SESSION_JOB_NAME
7291
kubectl delete flinksessionjob $SESSION_JOB_NAME

flink-autoscaler-plugin-jdbc/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ under the License.
3333

3434
<properties>
3535
<testcontainers.version>1.18.2</testcontainers.version>
36-
<postgres.version>42.5.4</postgres.version>
36+
<postgres.version>42.5.6</postgres.version>
3737
<mysql.version>8.0.33</mysql.version>
3838
</properties>
3939

flink-kubernetes-operator-api/pom.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ under the License.
226226
fork="true" failonerror="true">
227227
<classpath refid="maven.compile.classpath"/>
228228
<arg value="file://${rootDir}/helm/flink-kubernetes-operator/crds/flinkdeployments.flink.apache.org-v1.yml"/>
229-
<arg value="https://raw.githubusercontent.com/apache/flink-kubernetes-operator/release-1.6.0/helm/flink-kubernetes-operator/crds/flinkdeployments.flink.apache.org-v1.yml"/>
229+
<arg value="https://raw.githubusercontent.com/apache/flink-kubernetes-operator/release-1.9.0/helm/flink-kubernetes-operator/crds/flinkdeployments.flink.apache.org-v1.yml"/>
230230
</java>
231231
</target>
232232
</configuration>
@@ -243,7 +243,7 @@ under the License.
243243
fork="true" failonerror="true">
244244
<classpath refid="maven.compile.classpath"/>
245245
<arg value="file://${rootDir}/helm/flink-kubernetes-operator/crds/flinksessionjobs.flink.apache.org-v1.yml"/>
246-
<arg value="https://raw.githubusercontent.com/apache/flink-kubernetes-operator/release-1.6.0/helm/flink-kubernetes-operator/crds/flinksessionjobs.flink.apache.org-v1.yml"/>
246+
<arg value="https://raw.githubusercontent.com/apache/flink-kubernetes-operator/release-1.9.0/helm/flink-kubernetes-operator/crds/flinksessionjobs.flink.apache.org-v1.yml"/>
247247
</java>
248248
</target>
249249
</configuration>

0 commit comments

Comments
 (0)