Skip to content

Commit 0d1de21

Browse files
committed
feat: update configs
1 parent 5f4acf4 commit 0d1de21

File tree

4 files changed

+49
-21
lines changed

4 files changed

+49
-21
lines changed

nessie-stack/Dockerfile

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
FROM jupyter/pyspark-notebook:latest
2+
3+
USER root
4+
5+
# Install AWS Hadoop integration for S3
6+
RUN wget https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/3.3.4/hadoop-aws-3.3.4.jar -P /usr/local/spark/jars/ && \
7+
wget https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/1.12.262/aws-java-sdk-bundle-1.12.262.jar -P /usr/local/spark/jars/
8+
9+
# Install Iceberg dependencies for Spark 3.5 (version 1.8.1)
10+
RUN wget https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime-3.5_2.12/1.8.1/iceberg-spark-runtime-3.5_2.12-1.8.1.jar -P /usr/local/spark/jars/ && \
11+
wget https://repo1.maven.org/maven2/org/projectnessie/nessie-integrations/nessie-spark-extensions-3.5_2.12/0.102.5/nessie-spark-extensions-3.5_2.12-0.102.5.jar -P /usr/local/spark/jars/
12+
13+
# Install Python packages (update pyiceberg to match JAR version)
14+
RUN pip install pyiceberg==0.9.0 pynessie==0.67.0
15+
16+
# Create a directory for init scripts
17+
RUN mkdir -p /usr/local/bin/start-notebook.d
18+
19+
# Create init script to configure Spark environment
20+
RUN echo '#!/bin/bash\nexport PYSPARK_DRIVER_PYTHON=jupyter\nexport PYSPARK_DRIVER_PYTHON_OPTS="lab --NotebookApp.token=\'\' --NotebookApp.password=\'\'"' > /usr/local/bin/start-notebook.d/spark-config.sh && \
21+
chmod +x /usr/local/bin/start-notebook.d/spark-config.sh
22+
23+
USER $NB_UID
24+
25+
# Create a default spark-defaults.conf
26+
RUN mkdir -p $HOME/.sparkmagic
27+
COPY --chown=$NB_UID:$NB_GID spark-defaults.conf /usr/local/spark/conf/spark-defaults.conf

nessie-stack/config_notebook.sh

Whitespace-only changes.

docker-compose.yml renamed to nessie-stack/docker-compose.yml

Lines changed: 4 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,8 @@ services:
8989
environment:
9090
- SPARK_MODE=worker
9191
- SPARK_MASTER_URL=spark://spark-master:7077
92-
- SPARK_WORKER_MEMORY=1G
93-
- SPARK_WORKER_CORES=1
92+
- SPARK_WORKER_MEMORY=2G
93+
- SPARK_WORKER_CORES=2
9494
- SPARK_RPC_AUTHENTICATION_ENABLED=no
9595
- SPARK_RPC_ENCRYPTION_ENABLED=no
9696
- SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no
@@ -141,7 +141,8 @@ services:
141141
- app-network
142142

143143
jupyter:
144-
image: jupyter/pyspark-notebook:latest
144+
build:
145+
context: .
145146
container_name: jupyter
146147
depends_on:
147148
- spark-master
@@ -151,26 +152,8 @@ services:
151152
- "8888:8888"
152153
environment:
153154
JUPYTER_ENABLE_LAB: "yes"
154-
SPARK_OPTS: >
155-
--master=spark://spark-master:7077
156-
--conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions
157-
--conf spark.sql.catalog.iceberg=org.apache.iceberg.spark.SparkCatalog
158-
--conf spark.sql.catalog.iceberg.type=nessie
159-
--conf spark.sql.catalog.iceberg.uri=http://nessie:19120/api/v1
160-
--conf spark.sql.catalog.iceberg.ref=main
161-
--conf spark.sql.catalog.iceberg.catalog-impl=org.apache.iceberg.aws.s3.S3FileIO
162-
--conf spark.sql.catalog.iceberg.s3.endpoint=http://minio:9000
163-
--conf spark.sql.catalog.iceberg.s3.path-style-access=true
164-
--conf spark.sql.catalog.iceberg.warehouse=s3a://nessie
165-
--conf spark.hadoop.fs.s3a.access.key=minioadmin
166-
--conf spark.hadoop.fs.s3a.secret.key=minioadmin
167-
--conf spark.hadoop.fs.s3a.endpoint=http://minio:9000
168-
--conf spark.hadoop.fs.s3a.path.style.access=true
169-
--conf spark.hadoop.fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem
170155
volumes:
171156
- jupyter_notebooks:/home/jovyan/work
172-
command: >
173-
start.sh jupyter lab --LabApp.token='' --LabApp.password=''
174157
networks:
175158
- app-network
176159

nessie-stack/spark-defaults.conf

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Default Spark configuration
2+
spark.master spark://spark-master:7077
3+
spark.driver.memory 1g
4+
spark.executor.memory 1g
5+
spark.sql.extensions org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,org.projectnessie.spark.extensions.NessieSparkSessionExtensions
6+
spark.sql.catalog.iceberg org.apache.iceberg.spark.SparkCatalog
7+
spark.sql.catalog.iceberg.type nessie
8+
spark.sql.catalog.iceberg.uri http://nessie:19120/api/v1
9+
spark.sql.catalog.iceberg.ref main
10+
spark.sql.catalog.iceberg.catalog-impl org.apache.iceberg.aws.s3.S3FileIO
11+
spark.sql.catalog.iceberg.s3.endpoint http://minio:9000
12+
spark.sql.catalog.iceberg.s3.path-style-access true
13+
spark.sql.catalog.iceberg.warehouse s3a://nessie
14+
spark.hadoop.fs.s3a.access.key minioadmin
15+
spark.hadoop.fs.s3a.secret.key minioadmin
16+
spark.hadoop.fs.s3a.endpoint http://minio:9000
17+
spark.hadoop.fs.s3a.path.style.access true
18+
spark.hadoop.fs.s3a.impl org.apache.hadoop.fs.s3a.S3AFileSystem

0 commit comments

Comments
 (0)