Skip to content

Commit d7fb501

Browse files
Vikrammalachi-constant
andauthored
Adding args to submit spark step (#1826)
* Adding args to submit spark step * Add example * Update args to default arg of None Co-authored-by: Lucas Hanson <[email protected]>
1 parent ca47748 commit d7fb501

File tree

1 file changed

+11
-2
lines changed

1 file changed

+11
-2
lines changed

awswrangler/emr.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -987,6 +987,7 @@ def submit_ecr_credentials_refresh(
987987

988988
def build_spark_step(
989989
path: str,
990+
args: Optional[List[str]] = None,
990991
deploy_mode: str = "cluster",
991992
docker_image: Optional[str] = None,
992993
name: str = "my-step",
@@ -1000,6 +1001,8 @@ def build_spark_step(
10001001
----------
10011002
path : str
10021003
Script path. (e.g. s3://bucket/app.py)
1004+
args : List[str], optional
1005+
CLI args to use with script
10031006
deploy_mode : str
10041007
"cluster" | "client"
10051008
docker_image : str, optional
@@ -1029,8 +1032,9 @@ def build_spark_step(
10291032
>>> )
10301033
10311034
"""
1035+
script_args = " ".join(args) if args else ""
10321036
if docker_image is None:
1033-
cmd: str = f"spark-submit --deploy-mode {deploy_mode} {path}"
1037+
cmd: str = f"spark-submit --deploy-mode {deploy_mode} {path} {script_args}"
10341038
else:
10351039
config: str = "hdfs:///user/hadoop/config.json"
10361040
cmd = (
@@ -1043,7 +1047,7 @@ def build_spark_step(
10431047
f"--conf spark.yarn.appMasterEnv.YARN_CONTAINER_RUNTIME_DOCKER_IMAGE={docker_image} "
10441048
f"--conf spark.yarn.appMasterEnv.YARN_CONTAINER_RUNTIME_DOCKER_CLIENT_CONFIG={config} "
10451049
f"--conf spark.yarn.appMasterEnv.YARN_CONTAINER_RUNTIME_DOCKER_MOUNTS=/etc/passwd:/etc/passwd:ro "
1046-
f"{path}"
1050+
f"{path} {script_args}"
10471051
)
10481052
return build_step(
10491053
command=cmd,
@@ -1058,6 +1062,7 @@ def build_spark_step(
10581062
def submit_spark_step(
10591063
cluster_id: str,
10601064
path: str,
1065+
args: Optional[List[str]] = None,
10611066
deploy_mode: str = "cluster",
10621067
docker_image: Optional[str] = None,
10631068
name: str = "my-step",
@@ -1073,6 +1078,9 @@ def submit_spark_step(
10731078
Cluster ID.
10741079
path : str
10751080
Script path. (e.g. s3://bucket/app.py)
1081+
args : List[str], optional
1082+
CLI args to use with script
1083+
eg. args = ["--name", "hello-world"]
10761084
deploy_mode : str
10771085
"cluster" | "client"
10781086
docker_image : str, optional
@@ -1103,6 +1111,7 @@ def submit_spark_step(
11031111
session: boto3.Session = _utils.ensure_session(session=boto3_session)
11041112
step = build_spark_step(
11051113
path=path,
1114+
args=args,
11061115
deploy_mode=deploy_mode,
11071116
docker_image=docker_image,
11081117
name=name,

0 commit comments

Comments
 (0)