1+ version : ' 3.8'
2+
3+ services :
4+ minio :
5+ image : minio/minio:latest
6+ container_name : minio
7+ ports :
8+ - " 9000:9000"
9+ - " 9001:9001"
10+ environment :
11+ MINIO_ROOT_USER : minioadmin
12+ MINIO_ROOT_PASSWORD : minioadmin
13+ volumes :
14+ - minio_data:/data
15+ command : server /data --console-address ":9001"
16+ healthcheck :
17+ test : ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
18+ interval : 30s
19+ timeout : 20s
20+ retries : 3
21+ networks :
22+ - app-network
23+
24+ createbuckets :
25+ image : minio/mc:latest
26+ depends_on :
27+ - minio
28+ entrypoint : >
29+ /bin/sh -c "
30+ sleep 5;
31+ /usr/bin/mc config host add myminio http://minio:9000 minioadmin minioadmin;
32+ /usr/bin/mc mb myminio/nessie;
33+ /usr/bin/mc mb myminio/spark;
34+ exit 0;
35+ "
36+ networks :
37+ - app-network
38+
39+ postgres :
40+ image : postgres:14
41+ container_name : postgres
42+ ports :
43+ - " 5432:5432"
44+ environment :
45+ POSTGRES_USER : nessie
46+ POSTGRES_PASSWORD : nessie
47+ POSTGRES_DB : nessie
48+ volumes :
49+ - postgres_data:/var/lib/postgresql/data
50+ networks :
51+ - app-network
52+
53+ nessie :
54+ image : projectnessie/nessie:latest
55+ container_name : nessie
56+ depends_on :
57+ - postgres
58+ ports :
59+ - " 19120:19120"
60+ environment :
61+ QUARKUS_PROFILE : postgresql
62+ NESSIE_VERSION_STORE_TYPE : jdbc
63+ QUARKUS_DATASOURCE_USERNAME : nessie
64+ QUARKUS_DATASOURCE_PASSWORD : nessie
65+ QUARKUS_DATASOURCE_JDBC_URL : jdbc:postgresql://postgres:5432/nessie
66+ networks :
67+ - app-network
68+
69+ spark-master :
70+ image : bitnami/spark:latest
71+ container_name : spark-master
72+ environment :
73+ - SPARK_MODE=master
74+ - SPARK_RPC_AUTHENTICATION_ENABLED=no
75+ - SPARK_RPC_ENCRYPTION_ENABLED=no
76+ - SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no
77+ - SPARK_SSL_ENABLED=no
78+ ports :
79+ - " 8080:8080"
80+ - " 7077:7077"
81+ networks :
82+ - app-network
83+
84+ spark-worker :
85+ image : bitnami/spark:latest
86+ container_name : spark-worker
87+ depends_on :
88+ - spark-master
89+ environment :
90+ - SPARK_MODE=worker
91+ - SPARK_MASTER_URL=spark://spark-master:7077
92+ - SPARK_WORKER_MEMORY=1G
93+ - SPARK_WORKER_CORES=1
94+ - SPARK_RPC_AUTHENTICATION_ENABLED=no
95+ - SPARK_RPC_ENCRYPTION_ENABLED=no
96+ - SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no
97+ - SPARK_SSL_ENABLED=no
98+ ports :
99+ - " 8081:8081"
100+ networks :
101+ - app-network
102+
103+ spark-thrift-server :
104+ image : bitnami/spark:latest
105+ container_name : spark-thrift-server
106+ depends_on :
107+ - spark-master
108+ - nessie
109+ - minio
110+ ports :
111+ - " 10000:10000" # Thrift JDBC/ODBC server
112+ - " 4040:4040" # Spark UI
113+ environment :
114+ - SPARK_MODE=master
115+ - SPARK_MASTER_URL=spark://spark-master:7077
116+ command : >
117+ bash -c "
118+ /opt/bitnami/spark/sbin/start-thriftserver.sh \
119+ --master spark://spark-master:7077 \
120+ --hiveconf hive.server2.thrift.port=10000 \
121+ --hiveconf hive.server2.thrift.bind.host=0.0.0.0 \
122+ --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \
123+ --conf spark.sql.catalog.iceberg=org.apache.iceberg.spark.SparkCatalog \
124+ --conf spark.sql.catalog.iceberg.type=nessie \
125+ --conf spark.sql.catalog.iceberg.uri=http://nessie:19120/api/v1 \
126+ --conf spark.sql.catalog.iceberg.ref=main \
127+ --conf spark.sql.catalog.iceberg.catalog-impl=org.apache.iceberg.aws.s3.S3FileIO \
128+ --conf spark.sql.catalog.iceberg.s3.endpoint=http://minio:9000 \
129+ --conf spark.sql.catalog.iceberg.s3.path-style-access=true \
130+ --conf spark.sql.catalog.iceberg.warehouse=s3a://nessie \
131+ --conf spark.hadoop.fs.s3a.access.key=minioadmin \
132+ --conf spark.hadoop.fs.s3a.secret.key=minioadmin \
133+ --conf spark.hadoop.fs.s3a.endpoint=http://minio:9000 \
134+ --conf spark.hadoop.fs.s3a.path.style.access=true \
135+ --conf spark.hadoop.fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem \
136+ && tail -f /opt/bitnami/spark/logs/*thriftserver*.out
137+ "
138+ volumes :
139+ - spark_logs:/opt/bitnami/spark/logs
140+ networks :
141+ - app-network
142+
143+ jupyter :
144+ image : jupyter/pyspark-notebook:latest
145+ container_name : jupyter
146+ depends_on :
147+ - spark-master
148+ - nessie
149+ - minio
150+ ports :
151+ - " 8888:8888"
152+ environment :
153+ JUPYTER_ENABLE_LAB : " yes"
154+ SPARK_OPTS : >
155+ --master=spark://spark-master:7077
156+ --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions
157+ --conf spark.sql.catalog.iceberg=org.apache.iceberg.spark.SparkCatalog
158+ --conf spark.sql.catalog.iceberg.type=nessie
159+ --conf spark.sql.catalog.iceberg.uri=http://nessie:19120/api/v1
160+ --conf spark.sql.catalog.iceberg.ref=main
161+ --conf spark.sql.catalog.iceberg.catalog-impl=org.apache.iceberg.aws.s3.S3FileIO
162+ --conf spark.sql.catalog.iceberg.s3.endpoint=http://minio:9000
163+ --conf spark.sql.catalog.iceberg.s3.path-style-access=true
164+ --conf spark.sql.catalog.iceberg.warehouse=s3a://nessie
165+ --conf spark.hadoop.fs.s3a.access.key=minioadmin
166+ --conf spark.hadoop.fs.s3a.secret.key=minioadmin
167+ --conf spark.hadoop.fs.s3a.endpoint=http://minio:9000
168+ --conf spark.hadoop.fs.s3a.path.style.access=true
169+ --conf spark.hadoop.fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem
170+ volumes :
171+ - jupyter_notebooks:/home/jovyan/work
172+ command : >
173+ start.sh jupyter lab --LabApp.token='' --LabApp.password=''
174+ networks :
175+ - app-network
176+
177+ networks :
178+ app-network :
179+ driver : bridge
180+
181+ volumes :
182+ minio_data :
183+ postgres_data :
184+ spark_logs :
185+ jupyter_notebooks:
0 commit comments