Skip to content

Commit d1f525c

Browse files
feat: add spark_properties to athena spark (#2508)
* add spark_properties to athena spark * add test_cases for delta spark properties * Add simple delta spark code to the test case * formatting --------- Co-authored-by: Anton Kukushkin <[email protected]>
1 parent 466dafe commit d1f525c

File tree

2 files changed

+36
-0
lines changed

2 files changed

+36
-0
lines changed

awswrangler/athena/_spark.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ def create_spark_session(
9191
max_concurrent_dpus: int = 5,
9292
default_executor_dpu_size: int = 1,
9393
additional_configs: Optional[Dict[str, Any]] = None,
94+
spark_properties: Optional[Dict[str, Any]] = None,
9495
idle_timeout: int = 15,
9596
boto3_session: Optional[boto3.Session] = None,
9697
) -> str:
@@ -110,6 +111,9 @@ def create_spark_session(
110111
The default number of DPUs to use for executors. The default is 1.
111112
additional_configs : Dict[str, Any], optional
112113
Contains additional engine parameter mappings in the form of key-value pairs.
114+
spark_properties: Dict[str, Any], optional
115+
Contains SparkProperties in the form of key-value pairs.Specifies custom jar files and Spark properties
116+
for use cases like cluster encryption, table formats, and general Spark tuning.
113117
idle_timeout : int, optional
114118
The idle timeout in minutes for the session. The default is 15.
115119
boto3_session : boto3.Session(), optional
@@ -134,6 +138,8 @@ def create_spark_session(
134138
}
135139
if additional_configs:
136140
engine_configuration["AdditionalConfigs"] = additional_configs
141+
if spark_properties:
142+
engine_configuration["SparkProperties"] = spark_properties
137143
response = client_athena.start_session(
138144
WorkGroup=workgroup,
139145
EngineConfiguration=engine_configuration,
@@ -157,6 +163,7 @@ def run_spark_calculation(
157163
max_concurrent_dpus: int = 5,
158164
default_executor_dpu_size: int = 1,
159165
additional_configs: Optional[Dict[str, Any]] = None,
166+
spark_properties: Optional[Dict[str, Any]] = None,
160167
idle_timeout: int = 15,
161168
boto3_session: Optional[boto3.Session] = None,
162169
) -> Dict[str, Any]:
@@ -180,6 +187,9 @@ def run_spark_calculation(
180187
The default number of DPUs to use for executors. The default is 1.
181188
additional_configs : Dict[str, Any], optional
182189
Contains additional engine parameter mappings in the form of key-value pairs.
190+
spark_properties: Dict[str, Any], optional
191+
Contains SparkProperties in the form of key-value pairs.Specifies custom jar files and Spark properties
192+
for use cases like cluster encryption, table formats, and general Spark tuning.
183193
idle_timeout : int, optional
184194
The idle timeout in minutes for the session. The default is 15.
185195
boto3_session : boto3.Session(), optional
@@ -208,6 +218,7 @@ def run_spark_calculation(
208218
max_concurrent_dpus=max_concurrent_dpus,
209219
default_executor_dpu_size=default_executor_dpu_size,
210220
additional_configs=additional_configs,
221+
spark_properties=spark_properties,
211222
idle_timeout=idle_timeout,
212223
boto3_session=boto3_session,
213224
)

tests/unit/test_athena_spark.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,3 +47,28 @@ def test_athena_spark_calculation(code, path, workgroup_spark):
4747
)
4848

4949
assert result["Status"]["State"] == "COMPLETED"
50+
51+
52+
@pytest.mark.parametrize(
53+
"code",
54+
[
55+
"""
56+
output_path = "$PATH"
57+
58+
data = spark.range(0, 5)
59+
data.write.format("delta").save(output_path)
60+
""",
61+
],
62+
)
63+
def test_athena_spark_calculation_with_spark_properties(code, path, workgroup_spark):
64+
code = code.replace("$PATH", path)
65+
66+
result = wr.athena.run_spark_calculation(
67+
code=code,
68+
workgroup=workgroup_spark,
69+
spark_properties={
70+
"spark.sql.catalog.spark_catalog": "org.apache.spark.sql.delta.catalog.DeltaCatalog",
71+
"spark.sql.extensions": "io.delta.sql.DeltaSparkSessionExtension",
72+
},
73+
)
74+
assert result["Status"]["State"] == "COMPLETED"

0 commit comments

Comments
 (0)