Rename torch_dist_role script_args to args, script_envs to env (#64)

aivanou · facebook-github-bot · commit cf73c8b68dbc · 2021-06-16T23:02:07.000-07:00
Summary: Pull Request resolved: #64 Rename torch_dist_role ``script_args`` to ``args``, ``script_envs`` to ``env`` Reviewed By: kiukchung Differential Revision: D29152204 fbshipit-source-id: a0a234d7b10c86dc073504c8633b692524df8cb8
diff --git a/torchx/cli/test/cmd_describe_test.py b/torchx/cli/test/cmd_describe_test.py
@@ -21,7 +21,7 @@ def get_test_app(self) -> AppDef:
             "elastic_trainer",
             image="trainer_fbpkg",
             entrypoint="trainer.par",
-            script_args=["--arg1", "foo"],
+            args=["--arg1", "foo"],
             resource=resource,
             num_replicas=2,
             nnodes="2:3",
diff --git a/torchx/components/base/__init__.py b/torchx/components/base/__init__.py
@@ -32,8 +32,8 @@ def torch_dist_role(
     entrypoint: str,
     resource: Union[str, Resource] = NULL_RESOURCE,
     base_image: Optional[str] = None,
-    script_args: Optional[List[str]] = None,
-    script_envs: Optional[Dict[str, str]] = None,
+    args: Optional[List[str]] = None,
+    env: Optional[Dict[str, str]] = None,
     num_replicas: int = 1,
     max_retries: int = 0,
     port_map: Optional[Dict[str, int]] = None,
@@ -65,8 +65,8 @@ def torch_dist_role(
         entrypoint: Script or binary to launch
         resource: Resource specs that define the container properties. Predefined resources
             are supported as str arguments.
-        script_args: Arguments to the script
-        script_envs: Env. variables to the worker
+        args: Arguments to the script
+        env: Env. variables to the worker
         num_replicas: Number of replicas
         max_retries: Number of retries
         retry_policy: ``torchx.specs.api.RetryPolicy``
@@ -90,8 +90,8 @@ def torch_dist_role(
         entrypoint,
         resource,
         base_image,
-        script_args,
-        script_envs,
+        args,
+        env,
         num_replicas,
         max_retries,
         port_map or {},
diff --git a/torchx/components/base/roles.py b/torchx/components/base/roles.py
@@ -17,8 +17,8 @@ def create_torch_dist_role(
     entrypoint: str,
     resource: Resource = NULL_RESOURCE,
     base_image: Optional[str] = None,
-    script_args: Optional[List[str]] = None,
-    script_envs: Optional[Dict[str, str]] = None,
+    args: Optional[List[str]] = None,
+    env: Optional[Dict[str, str]] = None,
     num_replicas: int = 1,
     max_retries: int = 0,
     port_map: Dict[str, int] = field(default_factory=dict),
@@ -54,7 +54,7 @@ def create_torch_dist_role(
     ...     image="<NONE>",
     ...     resource=NULL_RESOURCE,
     ...     entrypoint="my_train_script.py",
-    ...     script_args=["--script_arg", "foo", "--another_arg", "bar"],
+    ...     args=["--script_arg", "foo", "--another_arg", "bar"],
     ...     num_replicas=4, max_retries=1,
     ...     nproc_per_node=8, nnodes="2:4", max_restarts=3)
     ... # effectively runs:
@@ -72,8 +72,8 @@ def create_torch_dist_role(
         entrypoint: User binary or python script that will be launched.
         resource: Resource that is requested by scheduler
         base_image: Optional base image, if schedulers support image overlay
-        script_args: User provided arguments
-        script_envs: Env. variables that will be set on worker process that runs entrypoint
+        args: User provided arguments
+        env: Env. variables that will be set on worker process that runs entrypoint
         num_replicas: Number of role replicas to run
         max_retries: Max number of retries
         port_map: Port mapping for the role
@@ -84,11 +84,11 @@ def create_torch_dist_role(
         Role object that launches user entrypoint via the torchelastic as proxy
 
     """
-    script_args = script_args or []
-    script_envs = script_envs or {}
+    args = args or []
+    env = env or {}
 
     entrypoint_override = "python"
-    args: List[str] = ["-m", "torch.distributed.launch"]
+    torch_run_args: List[str] = ["-m", "torch.distributed.launch"]
 
     launch_kwargs.setdefault("rdzv_backend", "etcd")
     launch_kwargs.setdefault("rdzv_id", macros.app_id)
@@ -98,14 +98,14 @@ def create_torch_dist_role(
         if isinstance(val, bool):
             # treat boolean kwarg as a flag
             if val:
-                args += [f"--{arg}"]
+                torch_run_args += [f"--{arg}"]
         else:
-            args += [f"--{arg}", str(val)]
+            torch_run_args += [f"--{arg}", str(val)]
     if not os.path.isabs(entrypoint) and not entrypoint.startswith(macros.img_root):
         # make entrypoint relative to {img_root} ONLY if it is not an absolute path
         entrypoint = os.path.join(macros.img_root, entrypoint)
 
-    args += [entrypoint, *script_args]
+    args = [*torch_run_args, entrypoint, *args]
     return (
         Role(
             name,
@@ -114,7 +114,7 @@ def create_torch_dist_role(
             resource=resource,
             port_map=port_map,
         )
-        .runs(entrypoint_override, *args, **script_envs)
+        .runs(entrypoint_override, *args, **env)
         .replicas(num_replicas)
         .with_retry_policy(retry_policy, max_retries)
     )
diff --git a/torchx/components/base/test/lib_test.py b/torchx/components/base/test/lib_test.py
@@ -36,8 +36,8 @@ def test_torch_dist_role_default(self) -> None:
                 entrypoint="test_entry.py",
                 base_image="test_base_image",
                 resource=Resource(1, 1, 10),
-                script_args=["arg1", "arg2"],
-                script_envs={"FOO": "BAR"},
+                args=["arg1", "arg2"],
+                env={"FOO": "BAR"},
                 nnodes=2,
             )
 
diff --git a/torchx/components/base/test/roles_test.py b/torchx/components/base/test/roles_test.py
@@ -27,8 +27,8 @@ def test_build_create_torch_dist_role(self) -> None:
             "elastic_trainer",
             image="test_image",
             entrypoint="/bin/echo",
-            script_args=["hello", "world"],
-            script_envs={"ENV_VAR_1": "FOOBAR"},
+            args=["hello", "world"],
+            env={"ENV_VAR_1": "FOOBAR"},
             port_map={"foo": 8080},
             nnodes="2:4",
             max_restarts=3,
@@ -65,7 +65,7 @@ def test_build_create_torch_dist_role_override_rdzv_params(self) -> None:
             "test_role",
             image="torch_image",
             entrypoint="user_script.py",
-            script_args=["--script_arg", "foo"],
+            args=["--script_arg", "foo"],
             nnodes="2:4",
             rdzv_backend="etcd",
             rdzv_id="foobar",
@@ -148,7 +148,7 @@ def test_json_serialization_factory(self) -> None:
             image="user_image",
             entrypoint="user_script.py",
             resource=resource,
-            script_args=["--script_arg", "foo"],
+            args=["--script_arg", "foo"],
             port_map={"tensorboard": 8080},
             nnodes="2:4",
             rdzv_backend="etcd",
diff --git a/torchx/specs/api.py b/torchx/specs/api.py
@@ -772,20 +772,20 @@ def _create_args_parser(
     )
 
     for param_name, parameter in parameters.items():
-        script_args: Dict[str, Any] = {
+        args: Dict[str, Any] = {
             "help": args_desc[param_name],
             "type": get_argparse_param_type(parameter),
         }
         if parameter.default != inspect.Parameter.empty:
-            script_args["default"] = parameter.default
+            args["default"] = parameter.default
         if parameter.kind == inspect._ParameterKind.VAR_POSITIONAL:
-            script_args["nargs"] = argparse.REMAINDER
+            args["nargs"] = argparse.REMAINDER
             arg_name = param_name
         else:
             arg_name = f"--{param_name}"
-            if "default" not in script_args:
-                script_args["required"] = True
-        script_parser.add_argument(arg_name, **script_args)
+            if "default" not in args:
+                args["required"] = True
+        script_parser.add_argument(arg_name, **args)
     return script_parser