diff --git a/torchx/cli/cmd_status.py b/torchx/cli/cmd_status.py index 4cd543bfe..1fd65bc7a 100644 --- a/torchx/cli/cmd_status.py +++ b/torchx/cli/cmd_status.py @@ -8,6 +8,7 @@ # pyre-strict import argparse +import json import logging import sys from typing import List, Optional @@ -46,6 +47,11 @@ def add_arguments(self, subparser: argparse.ArgumentParser) -> None: subparser.add_argument( "--roles", type=str, default="", help="comma separated roles to filter" ) + subparser.add_argument( + "--json", + action="store_true", + help="output the status in JSON format", + ) def run(self, args: argparse.Namespace) -> None: app_handle = args.app_handle @@ -54,7 +60,10 @@ def run(self, args: argparse.Namespace) -> None: app_status = runner.status(app_handle) filter_roles = parse_list_arg(args.roles) if app_status: - print(app_status.format(filter_roles)) + if args.json: + print(json.dumps(app_status.to_json(filter_roles))) + else: + print(app_status.format(filter_roles)) else: logger.error( f"AppDef: {app_id}," diff --git a/torchx/specs/api.py b/torchx/specs/api.py index 8c894f354..5e471d4e6 100644 --- a/torchx/specs/api.py +++ b/torchx/specs/api.py @@ -538,6 +538,15 @@ class RoleStatus: role: str replicas: List[ReplicaStatus] + def to_json(self) -> Dict[str, Any]: + """ + Convert the RoleStatus to a json object. + """ + return { + "role": self.role, + "replicas": [asdict(replica) for replica in self.replicas], + } + @dataclass class AppStatus: @@ -657,6 +666,21 @@ def _format_role_status( replica_data += self._format_replica_status(replica) return f"{replica_data}" + def to_json(self, filter_roles: Optional[List[str]] = None) -> Dict[str, Any]: + """ + Convert the AppStatus to a json object, including RoleStatus. + """ + roles = self._get_role_statuses(self.roles, filter_roles) + + return { + "state": str(self.state), + "num_restarts": self.num_restarts, + "roles": [role_status.to_json() for role_status in roles], + "msg": self.msg, + "structured_error_msg": self.structured_error_msg, + "url": self.ui_url, + } + def format( self, filter_roles: Optional[List[str]] = None, @@ -672,6 +696,7 @@ def format( """ roles_data = "" roles = self._get_role_statuses(self.roles, filter_roles) + for role_status in roles: roles_data += self._format_role_status(role_status) return Template(_APP_STATUS_FORMAT_TEMPLATE).substitute( diff --git a/torchx/specs/test/api_test.py b/torchx/specs/test/api_test.py index 60dfc0dc6..072c7a7da 100644 --- a/torchx/specs/test/api_test.py +++ b/torchx/specs/test/api_test.py @@ -176,6 +176,42 @@ def test_format_app_status(self) -> None: # Split and compare to aviod AssertionError. self.assertEqual(expected_message.split(), actual_message.split()) + def test_app_status_in_json(self) -> None: + app_status = self._get_test_app_status() + result = app_status.to_json() + error_msg = '{"message":{"message":"error","errorCode":-1,"extraInfo":{"timestamp":1293182}}}' + self.assertDictEqual( + result, + { + "state": "RUNNING", + "num_restarts": 0, + "roles": [ + { + "role": "worker", + "replicas": [ + { + "id": 0, + "state": 5, + "role": "worker", + "hostname": "localhost", + "structured_error_msg": error_msg, + }, + { + "id": 1, + "state": 3, + "role": "worker", + "hostname": "localhost", + "structured_error_msg": "", + }, + ], + } + ], + "msg": "", + "structured_error_msg": "", + "url": None, + }, + ) + class ResourceTest(unittest.TestCase): def test_copy_resource(self) -> None: