Skip to content

Commit 0912907

Browse files
committed
fix: spark configuration
1 parent 0d1de21 commit 0912907

File tree

4 files changed

+8
-7
lines changed

4 files changed

+8
-7
lines changed

nessie-stack/Dockerfile

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM jupyter/pyspark-notebook:latest
1+
FROM docker.io/jupyter/pyspark-notebook:latest
22

33
USER root
44

@@ -11,14 +11,13 @@ RUN wget https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime
1111
wget https://repo1.maven.org/maven2/org/projectnessie/nessie-integrations/nessie-spark-extensions-3.5_2.12/0.102.5/nessie-spark-extensions-3.5_2.12-0.102.5.jar -P /usr/local/spark/jars/
1212

1313
# Install Python packages (update pyiceberg to match JAR version)
14-
RUN pip install pyiceberg==0.9.0 pynessie==0.67.0
14+
RUN pip install pyiceberg==0.9.0 pynessie==0.67.0 findspark
1515

1616
# Create a directory for init scripts
1717
RUN mkdir -p /usr/local/bin/start-notebook.d
1818

1919
# Create init script to configure Spark environment
20-
RUN echo '#!/bin/bash\nexport PYSPARK_DRIVER_PYTHON=jupyter\nexport PYSPARK_DRIVER_PYTHON_OPTS="lab --NotebookApp.token=\'\' --NotebookApp.password=\'\'"' > /usr/local/bin/start-notebook.d/spark-config.sh && \
21-
chmod +x /usr/local/bin/start-notebook.d/spark-config.sh
20+
COPY spark-config.sh /usr/local/bin/start-notebook.d/spark-config.sh
2221

2322
USER $NB_UID
2423

nessie-stack/config_notebook.sh

Whitespace-only changes.

nessie-stack/docker-compose.yml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -120,11 +120,10 @@ services:
120120
--hiveconf hive.server2.thrift.port=10000 \
121121
--hiveconf hive.server2.thrift.bind.host=0.0.0.0 \
122122
--conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \
123-
--conf spark.sql.catalog.iceberg=org.apache.iceberg.spark.SparkCatalog \
124-
--conf spark.sql.catalog.iceberg.type=nessie \
123+
--conf spark.sql.catalog.iceberg=org.apache.iceberg.spark.SparkSessionCatalog \
124+
--conf spark.sql.catalog.iceberg.type=NessieCatalog \
125125
--conf spark.sql.catalog.iceberg.uri=http://nessie:19120/api/v1 \
126126
--conf spark.sql.catalog.iceberg.ref=main \
127-
--conf spark.sql.catalog.iceberg.catalog-impl=org.apache.iceberg.aws.s3.S3FileIO \
128127
--conf spark.sql.catalog.iceberg.s3.endpoint=http://minio:9000 \
129128
--conf spark.sql.catalog.iceberg.s3.path-style-access=true \
130129
--conf spark.sql.catalog.iceberg.warehouse=s3a://nessie \

nessie-stack/spark-config.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#!/bin/bash
2+
export PYSPARK_DRIVER_PYTHON=jupyter
3+
export PYSPARK_DRIVER_PYTHON_OPTS="lab --NotebookApp.token='' --NotebookApp.password=''"

0 commit comments

Comments
 (0)