@@ -2,166 +2,68 @@ version: '3.8'
22
33services :
44 minio :
5- image : minio/minio:latest
5+ image : quay.io/ minio/minio:latest
66 container_name : minio
7- ports :
8- - " 9000:9000"
9- - " 9001:9001"
7+ command : server /data --console-address ":9001"
108 environment :
119 MINIO_ROOT_USER : minioadmin
1210 MINIO_ROOT_PASSWORD : minioadmin
11+ ports :
12+ - " 9000:9000"
13+ - " 9001:9001"
1314 volumes :
1415 - minio_data:/data
15- command : server /data --console-address ":9001"
16- healthcheck :
17- test : ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
18- interval : 30s
19- timeout : 20s
20- retries : 3
2116 networks :
22- - app-network
17+ - common_network
2318
24- createbuckets :
25- image : minio/mc:latest
19+ minio-client :
20+ image : minio/mc
2621 depends_on :
2722 - minio
2823 entrypoint : >
2924 /bin/sh -c "
30- sleep 5;
31- /usr/bin/mc config host add myminio http://minio:9000 minioadmin minioadmin;
32- /usr/bin/mc mb myminio/nessie;
33- /usr/bin/mc mb myminio/spark;
34- exit 0;
25+ sleep 5;
26+ mc alias set local http://minio:9000 minioadmin minioadmin;
27+ mc mb local/warehouse;
28+ exit 0;
3529 "
3630 networks :
37- - app-network
38-
39- postgres :
40- image : postgres:14
41- container_name : postgres
42- ports :
43- - " 5432:5432"
44- environment :
45- POSTGRES_USER : nessie
46- POSTGRES_PASSWORD : nessie
47- POSTGRES_DB : nessie
48- volumes :
49- - postgres_data:/var/lib/postgresql/data
50- networks :
51- - app-network
31+ - common_network
5232
5333 nessie :
5434 image : projectnessie/nessie:latest
5535 container_name : nessie
56- depends_on :
57- - postgres
5836 ports :
5937 - " 19120:19120"
6038 environment :
61- QUARKUS_PROFILE : postgresql
62- NESSIE_VERSION_STORE_TYPE : jdbc
63- QUARKUS_DATASOURCE_USERNAME : nessie
64- QUARKUS_DATASOURCE_PASSWORD : nessie
65- QUARKUS_DATASOURCE_JDBC_URL : jdbc:postgresql://postgres:5432/nessie
66- networks :
67- - app-network
68-
69- spark-master :
70- image : bitnami/spark:latest
71- container_name : spark-master
72- environment :
73- - SPARK_MODE=master
74- - SPARK_RPC_AUTHENTICATION_ENABLED=no
75- - SPARK_RPC_ENCRYPTION_ENABLED=no
76- - SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no
77- - SPARK_SSL_ENABLED=no
78- ports :
79- - " 8080:8080"
80- - " 7077:7077"
81- networks :
82- - app-network
83-
84- spark-worker :
85- image : bitnami/spark:latest
86- container_name : spark-worker
87- depends_on :
88- - spark-master
89- environment :
90- - SPARK_MODE=worker
91- - SPARK_MASTER_URL=spark://spark-master:7077
92- - SPARK_WORKER_MEMORY=2G
93- - SPARK_WORKER_CORES=2
94- - SPARK_RPC_AUTHENTICATION_ENABLED=no
95- - SPARK_RPC_ENCRYPTION_ENABLED=no
96- - SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no
97- - SPARK_SSL_ENABLED=no
98- ports :
99- - " 8081:8081"
39+ QUARKUS_HTTP_PORT : 19120
10040 networks :
101- - app-network
41+ - common_network
10242
103- spark-thrift-server :
104- image : bitnami/ spark:latest
105- container_name : spark-thrift-server
43+ spark-iceberg :
44+ container_name : spark-iceberg
45+ build : spark/
10646 depends_on :
107- - spark-master
10847 - nessie
10948 - minio
110- ports :
111- - " 10000:10000" # Thrift JDBC/ODBC server
112- - " 4040:4040" # Spark UI
113- environment :
114- - SPARK_MODE=master
115- - SPARK_MASTER_URL=spark://spark-master:7077
116- command : >
117- bash -c "
118- /opt/bitnami/spark/sbin/start-thriftserver.sh \
119- --master spark://spark-master:7077 \
120- --hiveconf hive.server2.thrift.port=10000 \
121- --hiveconf hive.server2.thrift.bind.host=0.0.0.0 \
122- --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \
123- --conf spark.sql.catalog.iceberg=org.apache.iceberg.spark.SparkSessionCatalog \
124- --conf spark.sql.catalog.iceberg.type=NessieCatalog \
125- --conf spark.sql.catalog.iceberg.uri=http://nessie:19120/api/v1 \
126- --conf spark.sql.catalog.iceberg.ref=main \
127- --conf spark.sql.catalog.iceberg.s3.endpoint=http://minio:9000 \
128- --conf spark.sql.catalog.iceberg.s3.path-style-access=true \
129- --conf spark.sql.catalog.iceberg.warehouse=s3a://nessie \
130- --conf spark.hadoop.fs.s3a.access.key=minioadmin \
131- --conf spark.hadoop.fs.s3a.secret.key=minioadmin \
132- --conf spark.hadoop.fs.s3a.endpoint=http://minio:9000 \
133- --conf spark.hadoop.fs.s3a.path.style.access=true \
134- --conf spark.hadoop.fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem \
135- && tail -f /opt/bitnami/spark/logs/*thriftserver*.out
136- "
13749 volumes :
138- - spark_logs:/opt/bitnami/spark/logs
139- networks :
140- - app-network
141-
142- jupyter :
143- build :
144- context : .
145- container_name : jupyter
146- depends_on :
147- - spark-master
148- - nessie
149- - minio
150- ports :
151- - " 8888:8888"
50+ - ./warehouse:/home/iceberg/warehouse
51+ - ./notebooks:/home/iceberg/notebooks/notebooks
15252 environment :
153- JUPYTER_ENABLE_LAB : " yes"
154- volumes :
155- - jupyter_notebooks:/home/jovyan/work
53+ - AWS_ACCESS_KEY_ID=minioadmin
54+ - AWS_SECRET_ACCESS_KEY=minioadmin
55+ - AWS_REGION=us-east-1
56+ ports :
57+ - 8888:8888
58+ - 8080:8080
59+ - 10000:10000
60+ - 10001:10001
15661 networks :
157- - app-network
158-
159- networks :
160- app-network :
161- driver : bridge
62+ - common_network
16263
16364volumes :
16465 minio_data :
165- postgres_data :
166- spark_logs :
167- jupyter_notebooks:
66+
67+ networks :
68+ common_network :
69+ driver : bridge
0 commit comments