Skip to content

Commit 0cde21b

Browse files
committed
feat(nessie): add nessie stack\
1 parent 0912907 commit 0cde21b

File tree

192 files changed

+4936
-178
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

192 files changed

+4936
-178
lines changed

nessie-stack/Dockerfile

Lines changed: 0 additions & 26 deletions
This file was deleted.

nessie-stack/docker-compose.yml

Lines changed: 33 additions & 131 deletions
Original file line numberDiff line numberDiff line change
@@ -2,166 +2,68 @@ version: '3.8'
22

33
services:
44
minio:
5-
image: minio/minio:latest
5+
image: quay.io/minio/minio:latest
66
container_name: minio
7-
ports:
8-
- "9000:9000"
9-
- "9001:9001"
7+
command: server /data --console-address ":9001"
108
environment:
119
MINIO_ROOT_USER: minioadmin
1210
MINIO_ROOT_PASSWORD: minioadmin
11+
ports:
12+
- "9000:9000"
13+
- "9001:9001"
1314
volumes:
1415
- minio_data:/data
15-
command: server /data --console-address ":9001"
16-
healthcheck:
17-
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
18-
interval: 30s
19-
timeout: 20s
20-
retries: 3
2116
networks:
22-
- app-network
17+
- common_network
2318

24-
createbuckets:
25-
image: minio/mc:latest
19+
minio-client:
20+
image: minio/mc
2621
depends_on:
2722
- minio
2823
entrypoint: >
2924
/bin/sh -c "
30-
sleep 5;
31-
/usr/bin/mc config host add myminio http://minio:9000 minioadmin minioadmin;
32-
/usr/bin/mc mb myminio/nessie;
33-
/usr/bin/mc mb myminio/spark;
34-
exit 0;
25+
sleep 5;
26+
mc alias set local http://minio:9000 minioadmin minioadmin;
27+
mc mb local/warehouse;
28+
exit 0;
3529
"
3630
networks:
37-
- app-network
38-
39-
postgres:
40-
image: postgres:14
41-
container_name: postgres
42-
ports:
43-
- "5432:5432"
44-
environment:
45-
POSTGRES_USER: nessie
46-
POSTGRES_PASSWORD: nessie
47-
POSTGRES_DB: nessie
48-
volumes:
49-
- postgres_data:/var/lib/postgresql/data
50-
networks:
51-
- app-network
31+
- common_network
5232

5333
nessie:
5434
image: projectnessie/nessie:latest
5535
container_name: nessie
56-
depends_on:
57-
- postgres
5836
ports:
5937
- "19120:19120"
6038
environment:
61-
QUARKUS_PROFILE: postgresql
62-
NESSIE_VERSION_STORE_TYPE: jdbc
63-
QUARKUS_DATASOURCE_USERNAME: nessie
64-
QUARKUS_DATASOURCE_PASSWORD: nessie
65-
QUARKUS_DATASOURCE_JDBC_URL: jdbc:postgresql://postgres:5432/nessie
66-
networks:
67-
- app-network
68-
69-
spark-master:
70-
image: bitnami/spark:latest
71-
container_name: spark-master
72-
environment:
73-
- SPARK_MODE=master
74-
- SPARK_RPC_AUTHENTICATION_ENABLED=no
75-
- SPARK_RPC_ENCRYPTION_ENABLED=no
76-
- SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no
77-
- SPARK_SSL_ENABLED=no
78-
ports:
79-
- "8080:8080"
80-
- "7077:7077"
81-
networks:
82-
- app-network
83-
84-
spark-worker:
85-
image: bitnami/spark:latest
86-
container_name: spark-worker
87-
depends_on:
88-
- spark-master
89-
environment:
90-
- SPARK_MODE=worker
91-
- SPARK_MASTER_URL=spark://spark-master:7077
92-
- SPARK_WORKER_MEMORY=2G
93-
- SPARK_WORKER_CORES=2
94-
- SPARK_RPC_AUTHENTICATION_ENABLED=no
95-
- SPARK_RPC_ENCRYPTION_ENABLED=no
96-
- SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no
97-
- SPARK_SSL_ENABLED=no
98-
ports:
99-
- "8081:8081"
39+
QUARKUS_HTTP_PORT: 19120
10040
networks:
101-
- app-network
41+
- common_network
10242

103-
spark-thrift-server:
104-
image: bitnami/spark:latest
105-
container_name: spark-thrift-server
43+
spark-iceberg:
44+
container_name: spark-iceberg
45+
build: spark/
10646
depends_on:
107-
- spark-master
10847
- nessie
10948
- minio
110-
ports:
111-
- "10000:10000" # Thrift JDBC/ODBC server
112-
- "4040:4040" # Spark UI
113-
environment:
114-
- SPARK_MODE=master
115-
- SPARK_MASTER_URL=spark://spark-master:7077
116-
command: >
117-
bash -c "
118-
/opt/bitnami/spark/sbin/start-thriftserver.sh \
119-
--master spark://spark-master:7077 \
120-
--hiveconf hive.server2.thrift.port=10000 \
121-
--hiveconf hive.server2.thrift.bind.host=0.0.0.0 \
122-
--conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \
123-
--conf spark.sql.catalog.iceberg=org.apache.iceberg.spark.SparkSessionCatalog \
124-
--conf spark.sql.catalog.iceberg.type=NessieCatalog \
125-
--conf spark.sql.catalog.iceberg.uri=http://nessie:19120/api/v1 \
126-
--conf spark.sql.catalog.iceberg.ref=main \
127-
--conf spark.sql.catalog.iceberg.s3.endpoint=http://minio:9000 \
128-
--conf spark.sql.catalog.iceberg.s3.path-style-access=true \
129-
--conf spark.sql.catalog.iceberg.warehouse=s3a://nessie \
130-
--conf spark.hadoop.fs.s3a.access.key=minioadmin \
131-
--conf spark.hadoop.fs.s3a.secret.key=minioadmin \
132-
--conf spark.hadoop.fs.s3a.endpoint=http://minio:9000 \
133-
--conf spark.hadoop.fs.s3a.path.style.access=true \
134-
--conf spark.hadoop.fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem \
135-
&& tail -f /opt/bitnami/spark/logs/*thriftserver*.out
136-
"
13749
volumes:
138-
- spark_logs:/opt/bitnami/spark/logs
139-
networks:
140-
- app-network
141-
142-
jupyter:
143-
build:
144-
context: .
145-
container_name: jupyter
146-
depends_on:
147-
- spark-master
148-
- nessie
149-
- minio
150-
ports:
151-
- "8888:8888"
50+
- ./warehouse:/home/iceberg/warehouse
51+
- ./notebooks:/home/iceberg/notebooks/notebooks
15252
environment:
153-
JUPYTER_ENABLE_LAB: "yes"
154-
volumes:
155-
- jupyter_notebooks:/home/jovyan/work
53+
- AWS_ACCESS_KEY_ID=minioadmin
54+
- AWS_SECRET_ACCESS_KEY=minioadmin
55+
- AWS_REGION=us-east-1
56+
ports:
57+
- 8888:8888
58+
- 8080:8080
59+
- 10000:10000
60+
- 10001:10001
15661
networks:
157-
- app-network
158-
159-
networks:
160-
app-network:
161-
driver: bridge
62+
- common_network
16263

16364
volumes:
16465
minio_data:
165-
postgres_data:
166-
spark_logs:
167-
jupyter_notebooks:
66+
67+
networks:
68+
common_network:
69+
driver: bridge

nessie-stack/notebooks/.ipynb_checkpoints/test-checkpoint.ipynb

Lines changed: 210 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
2+
# *************************************************************************
3+
# *** DO NOT TOUCH FILES IN THIS DIRECTORY! ***
4+
# *** FILES IN THIS DIRECTORY AND SUBDIRECTORIES CONSTITUTE A DERBY ***
5+
# *** DATABASE, WHICH INCLUDES THE DATA (USER AND SYSTEM) AND THE ***
6+
# *** FILES NECESSARY FOR DATABASE RECOVERY. ***
7+
# *** EDITING, ADDING, OR DELETING ANY OF THESE FILES MAY CAUSE DATA ***
8+
# *** CORRUPTION AND LEAVE THE DATABASE IN A NON-RECOVERABLE STATE. ***
9+
# *************************************************************************
38 Bytes
Binary file not shown.
4 Bytes
Binary file not shown.
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
2+
# *************************************************************************
3+
# *** DO NOT TOUCH FILES IN THIS DIRECTORY! ***
4+
# *** FILES IN THIS DIRECTORY ARE USED BY THE DERBY DATABASE RECOVERY ***
5+
# *** SYSTEM. EDITING, ADDING, OR DELETING FILES IN THIS DIRECTORY ***
6+
# *** WILL CAUSE THE DERBY RECOVERY SYSTEM TO FAIL, LEADING TO ***
7+
# *** NON-RECOVERABLE CORRUPT DATABASES. ***
8+
# *************************************************************************
48 Bytes
Binary file not shown.
1 MB
Binary file not shown.
48 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)