Skip to content

Commit 5f4acf4

Browse files
committed
feat: add docker compose stack for nessie, spark, spark jdbc and minio and notebook server for testing
1 parent cb6b33c commit 5f4acf4

File tree

1 file changed

+185
-0
lines changed

1 file changed

+185
-0
lines changed

docker-compose.yml

Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
version: '3.8'
2+
3+
services:
4+
minio:
5+
image: minio/minio:latest
6+
container_name: minio
7+
ports:
8+
- "9000:9000"
9+
- "9001:9001"
10+
environment:
11+
MINIO_ROOT_USER: minioadmin
12+
MINIO_ROOT_PASSWORD: minioadmin
13+
volumes:
14+
- minio_data:/data
15+
command: server /data --console-address ":9001"
16+
healthcheck:
17+
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
18+
interval: 30s
19+
timeout: 20s
20+
retries: 3
21+
networks:
22+
- app-network
23+
24+
createbuckets:
25+
image: minio/mc:latest
26+
depends_on:
27+
- minio
28+
entrypoint: >
29+
/bin/sh -c "
30+
sleep 5;
31+
/usr/bin/mc config host add myminio http://minio:9000 minioadmin minioadmin;
32+
/usr/bin/mc mb myminio/nessie;
33+
/usr/bin/mc mb myminio/spark;
34+
exit 0;
35+
"
36+
networks:
37+
- app-network
38+
39+
postgres:
40+
image: postgres:14
41+
container_name: postgres
42+
ports:
43+
- "5432:5432"
44+
environment:
45+
POSTGRES_USER: nessie
46+
POSTGRES_PASSWORD: nessie
47+
POSTGRES_DB: nessie
48+
volumes:
49+
- postgres_data:/var/lib/postgresql/data
50+
networks:
51+
- app-network
52+
53+
nessie:
54+
image: projectnessie/nessie:latest
55+
container_name: nessie
56+
depends_on:
57+
- postgres
58+
ports:
59+
- "19120:19120"
60+
environment:
61+
QUARKUS_PROFILE: postgresql
62+
NESSIE_VERSION_STORE_TYPE: jdbc
63+
QUARKUS_DATASOURCE_USERNAME: nessie
64+
QUARKUS_DATASOURCE_PASSWORD: nessie
65+
QUARKUS_DATASOURCE_JDBC_URL: jdbc:postgresql://postgres:5432/nessie
66+
networks:
67+
- app-network
68+
69+
spark-master:
70+
image: bitnami/spark:latest
71+
container_name: spark-master
72+
environment:
73+
- SPARK_MODE=master
74+
- SPARK_RPC_AUTHENTICATION_ENABLED=no
75+
- SPARK_RPC_ENCRYPTION_ENABLED=no
76+
- SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no
77+
- SPARK_SSL_ENABLED=no
78+
ports:
79+
- "8080:8080"
80+
- "7077:7077"
81+
networks:
82+
- app-network
83+
84+
spark-worker:
85+
image: bitnami/spark:latest
86+
container_name: spark-worker
87+
depends_on:
88+
- spark-master
89+
environment:
90+
- SPARK_MODE=worker
91+
- SPARK_MASTER_URL=spark://spark-master:7077
92+
- SPARK_WORKER_MEMORY=1G
93+
- SPARK_WORKER_CORES=1
94+
- SPARK_RPC_AUTHENTICATION_ENABLED=no
95+
- SPARK_RPC_ENCRYPTION_ENABLED=no
96+
- SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no
97+
- SPARK_SSL_ENABLED=no
98+
ports:
99+
- "8081:8081"
100+
networks:
101+
- app-network
102+
103+
spark-thrift-server:
104+
image: bitnami/spark:latest
105+
container_name: spark-thrift-server
106+
depends_on:
107+
- spark-master
108+
- nessie
109+
- minio
110+
ports:
111+
- "10000:10000" # Thrift JDBC/ODBC server
112+
- "4040:4040" # Spark UI
113+
environment:
114+
- SPARK_MODE=master
115+
- SPARK_MASTER_URL=spark://spark-master:7077
116+
command: >
117+
bash -c "
118+
/opt/bitnami/spark/sbin/start-thriftserver.sh \
119+
--master spark://spark-master:7077 \
120+
--hiveconf hive.server2.thrift.port=10000 \
121+
--hiveconf hive.server2.thrift.bind.host=0.0.0.0 \
122+
--conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \
123+
--conf spark.sql.catalog.iceberg=org.apache.iceberg.spark.SparkCatalog \
124+
--conf spark.sql.catalog.iceberg.type=nessie \
125+
--conf spark.sql.catalog.iceberg.uri=http://nessie:19120/api/v1 \
126+
--conf spark.sql.catalog.iceberg.ref=main \
127+
--conf spark.sql.catalog.iceberg.catalog-impl=org.apache.iceberg.aws.s3.S3FileIO \
128+
--conf spark.sql.catalog.iceberg.s3.endpoint=http://minio:9000 \
129+
--conf spark.sql.catalog.iceberg.s3.path-style-access=true \
130+
--conf spark.sql.catalog.iceberg.warehouse=s3a://nessie \
131+
--conf spark.hadoop.fs.s3a.access.key=minioadmin \
132+
--conf spark.hadoop.fs.s3a.secret.key=minioadmin \
133+
--conf spark.hadoop.fs.s3a.endpoint=http://minio:9000 \
134+
--conf spark.hadoop.fs.s3a.path.style.access=true \
135+
--conf spark.hadoop.fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem \
136+
&& tail -f /opt/bitnami/spark/logs/*thriftserver*.out
137+
"
138+
volumes:
139+
- spark_logs:/opt/bitnami/spark/logs
140+
networks:
141+
- app-network
142+
143+
jupyter:
144+
image: jupyter/pyspark-notebook:latest
145+
container_name: jupyter
146+
depends_on:
147+
- spark-master
148+
- nessie
149+
- minio
150+
ports:
151+
- "8888:8888"
152+
environment:
153+
JUPYTER_ENABLE_LAB: "yes"
154+
SPARK_OPTS: >
155+
--master=spark://spark-master:7077
156+
--conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions
157+
--conf spark.sql.catalog.iceberg=org.apache.iceberg.spark.SparkCatalog
158+
--conf spark.sql.catalog.iceberg.type=nessie
159+
--conf spark.sql.catalog.iceberg.uri=http://nessie:19120/api/v1
160+
--conf spark.sql.catalog.iceberg.ref=main
161+
--conf spark.sql.catalog.iceberg.catalog-impl=org.apache.iceberg.aws.s3.S3FileIO
162+
--conf spark.sql.catalog.iceberg.s3.endpoint=http://minio:9000
163+
--conf spark.sql.catalog.iceberg.s3.path-style-access=true
164+
--conf spark.sql.catalog.iceberg.warehouse=s3a://nessie
165+
--conf spark.hadoop.fs.s3a.access.key=minioadmin
166+
--conf spark.hadoop.fs.s3a.secret.key=minioadmin
167+
--conf spark.hadoop.fs.s3a.endpoint=http://minio:9000
168+
--conf spark.hadoop.fs.s3a.path.style.access=true
169+
--conf spark.hadoop.fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem
170+
volumes:
171+
- jupyter_notebooks:/home/jovyan/work
172+
command: >
173+
start.sh jupyter lab --LabApp.token='' --LabApp.password=''
174+
networks:
175+
- app-network
176+
177+
networks:
178+
app-network:
179+
driver: bridge
180+
181+
volumes:
182+
minio_data:
183+
postgres_data:
184+
spark_logs:
185+
jupyter_notebooks:

0 commit comments

Comments
 (0)