Skip to content

Commit c280a70

Browse files
committed
fix spark 4
1 parent 044fd65 commit c280a70

File tree

3 files changed

+9
-2
lines changed

3 files changed

+9
-2
lines changed

dev/Dockerfile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ RUN apt-get -qq update && \
2121
curl \
2222
vim \
2323
unzip \
24-
openjdk-11-jdk \
24+
openjdk-17-jdk \
2525
build-essential \
2626
software-properties-common \
2727
ssh && \
@@ -62,6 +62,7 @@ RUN chmod u+x /opt/spark/sbin/* && \
6262
chmod u+x /opt/spark/bin/*
6363

6464
RUN pip3 install -q ipython
65+
RUN pip3 install py4j
6566

6667
RUN pip3 install "pyiceberg[s3fs,hive,pyarrow]==${PYICEBERG_VERSION}"
6768

dev/provision.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
.builder
3333
.config("spark.sql.shuffle.partitions", "1")
3434
.config("spark.default.parallelism", "1")
35+
.config("spark.sql.ansi.enabled", "false")
3536
.getOrCreate()
3637
)
3738

tests/conftest.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2535,7 +2535,7 @@ def spark() -> "SparkSession":
25352535
spark_version = ".".join(importlib.metadata.version("pyspark").split(".")[:2])
25362536
scala_version = "2.13"
25372537
iceberg_version = "1.10.0"
2538-
hadoop_version = "3.3.4"
2538+
hadoop_version = "3.3.6"
25392539
aws_sdk_version = "1.12.753"
25402540

25412541
os.environ["PYSPARK_SUBMIT_ARGS"] = (
@@ -2577,6 +2577,11 @@ def spark() -> "SparkSession":
25772577
.config("spark.sql.catalog.spark_catalog.warehouse", "s3://warehouse/hive/")
25782578
.config("spark.hadoop.fs.s3a.endpoint", "http://localhost:9000")
25792579
.config("spark.hadoop.fs.s3a.path.style.access", "true")
2580+
.config("spark.hadoop.fs.s3a.threads.keepalivetime", "60000")
2581+
.config("spark.hadoop.fs.s3a.connection.establish.timeout", "30000")
2582+
.config("spark.hadoop.fs.s3a.connection.timeout", "200000")
2583+
.config("spark.hadoop.fs.s3a.multipart.purge.age", str(24 * 60 * 60))
2584+
.config("spark.hadoop.fs.s3a.aws.credentials.provider", "com.amazonaws.auth.EnvironmentVariableCredentialsProvider")
25802585
.config("spark.sql.catalogImplementation", "hive")
25812586
.config("spark.sql.defaultCatalog", "integration")
25822587
.config("spark.sql.execution.arrow.pyspark.enabled", "true")

0 commit comments

Comments
 (0)