Skip to content

Commit 9635f03

Browse files
[docs] Update Quickstart to Use S3 (via RustFS) Instead of Local File System (apache#2569)
1 parent 5a4374a commit 9635f03

File tree

4 files changed

+318
-88
lines changed

4 files changed

+318
-88
lines changed

docker/quickstart-flink/prepare_build.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ download_jar() {
7676
log_info "Downloading $description..."
7777

7878
# Download the file
79-
if ! wget -O "$dest_file" "$url"; then
79+
if ! curl -fL -o "$dest_file" "$url"; then
8080
log_error "Failed to download $description from $url"
8181
return 1
8282
fi
@@ -258,4 +258,4 @@ show_summary() {
258258
}
259259

260260
# Run main function
261-
main "$@"
261+
main "$@"

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1139,4 +1139,4 @@
11391139
</pluginManagement>
11401140
</build>
11411141

1142-
</project>
1142+
</project>

website/docs/quickstart/flink.md

Lines changed: 85 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -33,34 +33,66 @@ mkdir fluss-quickstart-flink
3333
cd fluss-quickstart-flink
3434
```
3535

36-
2. Create a `lib` directory and download the required jar files. You can adjust the Flink version as needed. Please make sure to download the compatible versions of [fluss-flink connector jar](/downloads) and [flink-connector-faker](https://github.com/knaufk/flink-faker/releases)
36+
2. Create a `lib` directory and download the required jar files. You can adjust the Flink version as needed. Please make sure to download the compatible versions of [fluss-flink connector jar](/downloads), [fluss-fs-s3 jar](/downloads), and [flink-connector-faker](https://github.com/knaufk/flink-faker/releases)
3737

3838
```shell
3939
export FLINK_VERSION="1.20"
4040
```
4141

4242
```shell
4343
mkdir lib
44-
wget -O lib/flink-faker-0.5.3.jar https://github.com/knaufk/flink-faker/releases/download/v0.5.3/flink-faker-0.5.3.jar
45-
wget -O "lib/fluss-flink-${FLINK_VERSION}-$FLUSS_DOCKER_VERSION$.jar" "https://repo1.maven.org/maven2/org/apache/fluss/fluss-flink-${FLINK_VERSION}/$FLUSS_DOCKER_VERSION$/fluss-flink-${FLINK_VERSION}-$FLUSS_DOCKER_VERSION$.jar"
44+
curl -fL -o lib/flink-faker-0.5.3.jar https://github.com/knaufk/flink-faker/releases/download/v0.5.3/flink-faker-0.5.3.jar
45+
curl -fL -o "lib/fluss-flink-${FLINK_VERSION}-$FLUSS_DOCKER_VERSION$.jar" "https://repo1.maven.org/maven2/org/apache/fluss/fluss-flink-${FLINK_VERSION}/$FLUSS_DOCKER_VERSION$/fluss-flink-${FLINK_VERSION}-$FLUSS_DOCKER_VERSION$.jar"
46+
curl -fL -o "lib/fluss-fs-s3-$FLUSS_DOCKER_VERSION$.jar" "https://repo1.maven.org/maven2/org/apache/fluss/fluss-fs-s3/$FLUSS_DOCKER_VERSION$/fluss-fs-s3-$FLUSS_DOCKER_VERSION$.jar"
4647
```
4748

4849
3. Create a `docker-compose.yml` file with the following content:
4950

5051
```yaml
5152
services:
53+
#begin RustFS (S3-compatible storage)
54+
rustfs:
55+
image: rustfs/rustfs:latest
56+
ports:
57+
- "9000:9000"
58+
- "9001:9001"
59+
environment:
60+
- RUSTFS_ACCESS_KEY=rustfsadmin
61+
- RUSTFS_SECRET_KEY=rustfsadmin
62+
- RUSTFS_CONSOLE_ENABLE=true
63+
volumes:
64+
- rustfs-data:/data
65+
command: /data
66+
rustfs-init:
67+
image: minio/mc
68+
depends_on:
69+
- rustfs
70+
entrypoint: >
71+
/bin/sh -c "
72+
until mc alias set rustfs http://rustfs:9000 rustfsadmin rustfsadmin; do
73+
echo 'Waiting for RustFS...';
74+
sleep 1;
75+
done;
76+
mc mb --ignore-existing rustfs/fluss;
77+
"
78+
#end
5279
#begin Fluss cluster
5380
coordinator-server:
5481
image: apache/fluss:$FLUSS_DOCKER_VERSION$
5582
command: coordinatorServer
5683
depends_on:
5784
- zookeeper
85+
- rustfs-init
5886
environment:
5987
- |
6088
FLUSS_PROPERTIES=
6189
zookeeper.address: zookeeper:2181
6290
bind.listeners: FLUSS://coordinator-server:9123
63-
remote.data.dir: /tmp/fluss/remote-data
91+
remote.data.dir: s3://fluss/remote-data
92+
s3.endpoint: http://rustfs:9000
93+
s3.access-key: rustfsadmin
94+
s3.secret-key: rustfsadmin
95+
s3.path-style-access: true
6496
tablet-server:
6597
image: apache/fluss:$FLUSS_DOCKER_VERSION$
6698
command: tabletServer
@@ -72,8 +104,12 @@ services:
72104
zookeeper.address: zookeeper:2181
73105
bind.listeners: FLUSS://tablet-server:9123
74106
data.dir: /tmp/fluss/data
75-
remote.data.dir: /tmp/fluss/remote-data
76-
kv.snapshot.interval: 0s
107+
remote.data.dir: s3://fluss/remote-data
108+
s3.endpoint: http://rustfs:9000
109+
s3.access-key: rustfsadmin
110+
s3.secret-key: rustfsadmin
111+
s3.path-style-access: true
112+
kv.snapshot.interval: 60s
77113
zookeeper:
78114
restart: always
79115
image: zookeeper:3.9.2
@@ -112,33 +148,43 @@ services:
112148
- |
113149
FLINK_PROPERTIES=
114150
jobmanager.rpc.address: jobmanager
115-
rest.address: jobmanager
151+
rest.address: jobmanager
116152
entrypoint: ["sh", "-c", "cp -v /tmp/lib/*.jar /opt/flink/lib && exec /docker-entrypoint.sh bin/sql-client.sh"]
117153
volumes:
118154
- ./lib:/tmp/lib
119155
#end
156+
157+
volumes:
158+
rustfs-data:
120159
```
121160
122161
The Docker Compose environment consists of the following containers:
162+
- **RustFS:** an S3-compatible object storage for tiered storage. You can access the RustFS console at http://localhost:9001 with credentials `rustfsadmin/rustfsadmin`. An init container (`rustfs-init`) automatically creates the `fluss` bucket on startup.
123163
- **Fluss Cluster:** a Fluss `CoordinatorServer`, a Fluss `TabletServer` and a `ZooKeeper` server.
164+
- Snapshot interval `kv.snapshot.interval` is configured as 60 seconds. You may want to configure this differently for production systems
165+
- Credentials are configured directly with `s3.access-key` and `s3.secret-key`. Production systems should use CredentialsProvider chain specific to cloud environments.
124166
- **Flink Cluster**: a Flink `JobManager`, a Flink `TaskManager`, and a Flink SQL client container to execute queries.
125167

126-
3. To start all containers, run:
168+
:::tip
169+
[RustFS](https://github.com/rustfs/rustfs) is used as replacement for S3 in this quickstart example, for your production setup you may want to configure this to use cloud file system. See [here](/maintenance/filesystems/overview.md) for information on how to setup cloud file systems
170+
:::
171+
172+
4. To start all containers, run:
127173
```shell
128174
docker compose up -d
129175
```
130176
This command automatically starts all the containers defined in the Docker Compose configuration in detached mode.
131177

132-
Run
178+
Run
133179
```shell
134180
docker compose ps
135181
```
136182
to check whether all containers are running properly.
137183

138-
You can also visit http://localhost:8083/ to see if Flink is running normally.
184+
5. Verify the setup. You can visit http://localhost:8083/ to see if Flink is running normally. The S3 bucket for Fluss tiered storage is automatically created by the `rustfs-init` service. You can access the RustFS console at http://localhost:9001 with credentials `rustfsadmin/rustfsadmin` to view the `fluss` bucket.
139185

140186
:::note
141-
- If you want to additionally use an observability stack, follow one of the provided quickstart guides [here](maintenance/observability/quickstart.md) and then continue with this guide.
187+
- If you want to additionally use an observability stack, follow one of the provided quickstart guides [here](/docs/maintenance/observability/quickstart.md) and then continue with this guide.
142188
- All the following commands involving `docker compose` should be executed in the created working directory that contains the `docker-compose.yml` file.
143189
:::
144190

@@ -399,6 +445,34 @@ The following SQL query should return an empty result.
399445
SELECT * FROM fluss_customer WHERE `cust_key` = 1;
400446
```
401447

448+
### Quitting Sql Client
449+
450+
The following command allows you to quit Flink SQL Client.
451+
```sql title="Flink SQL"
452+
quit;
453+
```
454+
455+
### Remote Storage
456+
457+
Finally, you can use the following command to view the Primary Key Table snapshot files stored on RustFS:
458+
459+
```shell
460+
docker run --rm --net=host \
461+
-e MC_HOST_rustfs=http://rustfsadmin:rustfsadmin@localhost:9000 \
462+
minio/mc ls --recursive rustfs/fluss/
463+
```
464+
465+
Sample output:
466+
```shell
467+
[2026-02-03 20:28:59 UTC] 26KiB STANDARD remote-data/kv/fluss/enriched_orders-3/0/shared/4f675202-e560-4b8e-9af4-08e9769b4797
468+
[2026-02-03 20:27:59 UTC] 11KiB STANDARD remote-data/kv/fluss/enriched_orders-3/0/shared/87447c34-81d0-4be5-b4c8-abcea5ce68e9
469+
[2026-02-03 20:28:59 UTC] 0B STANDARD remote-data/kv/fluss/enriched_orders-3/0/snap-0/
470+
[2026-02-03 20:28:59 UTC] 1.1KiB STANDARD remote-data/kv/fluss/enriched_orders-3/0/snap-1/_METADATA
471+
[2026-02-03 20:28:59 UTC] 211B STANDARD remote-data/kv/fluss/enriched_orders-3/0/snap-1/aaffa8fc-ddb3-4754-938a-45e28df6d975
472+
[2026-02-03 20:28:59 UTC] 16B STANDARD remote-data/kv/fluss/enriched_orders-3/0/snap-1/d3c18e43-11ee-4e39-912d-087ca01de0e8
473+
[2026-02-03 20:28:59 UTC] 6.2KiB STANDARD remote-data/kv/fluss/enriched_orders-3/0/snap-1/ea2f2097-aa9a-4c2a-9e72-530218cd551c
474+
```
475+
402476
## Clean up
403477
After finishing the tutorial, run `exit` to exit Flink SQL CLI Container and then run
404478
```shell

0 commit comments

Comments
 (0)