Skip to content
This repository was archived by the owner on Sep 22, 2023. It is now read-only.

Commit 1f5c866

Browse files
authored
Improve CLI for multi-node sessions (#153)
* feat/refactor: Show status_data in the output of `admin session` command and move out some utility functions
1 parent edaf132 commit 1f5c866

File tree

3 files changed

+73
-58
lines changed

3 files changed

+73
-58
lines changed

changes/153.feature

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add support for `status_data` field when querying session/kernel information to help diagnosing multi-container and scheduling problems

src/ai/backend/client/cli/admin/sessions.py

Lines changed: 39 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
11
from collections import defaultdict
22
import functools
3-
import json
43
import sys
54
import textwrap
65
from typing import (
76
Any,
87
Dict,
98
Mapping,
10-
Optional,
119
Sequence,
1210
)
1311
import uuid
@@ -23,14 +21,15 @@
2321
echo_via_pager,
2422
tabulate_items,
2523
)
24+
from ..utils import format_multiline, format_stats
2625
from ...exceptions import NoItems
2726

2827

2928
SessionItem = Dict[str, Any]
3029

3130

3231
# Lets say formattable options are:
33-
format_options = {
32+
field_names = {
3433
'name': ('Session Name',
3534
lambda api_session: get_naming(api_session.api_version, 'name_gql_field')),
3635
'type': ('Type',
@@ -39,6 +38,7 @@
3938
'session_id': ('Session ID', 'session_id'),
4039
'status': ('Status', 'status'),
4140
'status_info': ('Status Info', 'status_info'),
41+
'status_data': ('Status Data', 'status_data'),
4242
'created_at': ('Created At', 'created_at'),
4343
'terminated_at': ('Terminated At', 'terminated_at'),
4444
'last_updated': ('Last updated', 'status_changed'),
@@ -51,14 +51,14 @@
5151
'cluster_hostname': ('Hostname', 'cluster_hostname'),
5252
}
5353

54-
format_options_legacy = {
54+
field_names_legacy = {
5555
'used_memory': ('Used Memory (MiB)', 'mem_cur_bytes'),
5656
'max_used_memory': ('Max Used Memory (MiB)', 'mem_max_bytes'),
5757
'cpu_using': ('CPU Using (%)', 'cpu_using'),
5858
}
5959

6060

61-
def transform_legacy_mem_fields(item: SessionItem) -> SessionItem:
61+
def transform_fields(item: SessionItem, *, in_row: bool = True) -> SessionItem:
6262
if 'mem_cur_bytes' in item:
6363
item['mem_cur_bytes'] = round(item['mem_cur_bytes'] / 2 ** 20, 1)
6464
if 'mem_max_bytes' in item:
@@ -97,9 +97,9 @@ def sessions(status, access_key, name_only, dead, running, detail, plain, format
9797
is_admin = session.KeyPair(session.config.access_key).info()['is_admin']
9898
try:
9999
name_key = get_naming(session.api_version, 'name_gql_field')
100-
fields.append(format_options['name'])
100+
fields.append(field_names['name'])
101101
if is_admin:
102-
fields.append(format_options['owner'])
102+
fields.append(field_names['owner'])
103103
except Exception as e:
104104
print_error(e)
105105
sys.exit(1)
@@ -108,36 +108,36 @@ def sessions(status, access_key, name_only, dead, running, detail, plain, format
108108
elif format is not None:
109109
options = format.split(',')
110110
for opt in options:
111-
if opt not in format_options:
111+
if opt not in field_names:
112112
print_fail(f'There is no such format option: {opt}')
113113
sys.exit(1)
114114
fields = [
115-
format_options[opt] for opt in options
115+
field_names[opt] for opt in options
116116
]
117117
else:
118118
if session.api_version[0] >= 6:
119-
fields.append(format_options['session_id'])
119+
fields.append(field_names['session_id'])
120120
fields.extend([
121-
format_options['group'],
122-
format_options['kernel_id'],
123-
format_options['image'],
124-
format_options['type'],
125-
format_options['status'],
126-
format_options['status_info'],
127-
format_options['last_updated'],
128-
format_options['result'],
121+
field_names['group'],
122+
field_names['kernel_id'],
123+
field_names['image'],
124+
field_names['type'],
125+
field_names['status'],
126+
field_names['status_info'],
127+
field_names['last_updated'],
128+
field_names['result'],
129129
])
130130
if detail:
131131
fields.extend([
132-
format_options['tag'],
133-
format_options['created_at'],
134-
format_options['occupied_slots'],
132+
field_names['tag'],
133+
field_names['created_at'],
134+
field_names['occupied_slots'],
135135
])
136136
if session.api_version[0] < 5:
137137
fields.extend([
138-
format_options_legacy['used_memory'],
139-
format_options_legacy['max_used_memory'],
140-
format_options_legacy['cpu_using'],
138+
field_names_legacy['used_memory'],
139+
field_names_legacy['max_used_memory'],
140+
field_names_legacy['cpu_using'],
141141
])
142142

143143
no_match_name = None
@@ -175,7 +175,7 @@ def sessions(status, access_key, name_only, dead, running, detail, plain, format
175175
else:
176176
echo_via_pager(
177177
tabulate_items(items, fields,
178-
item_formatter=transform_legacy_mem_fields)
178+
item_formatter=transform_fields)
179179
)
180180
except NoItems:
181181
print("There are no matching sessions.")
@@ -184,23 +184,16 @@ def sessions(status, access_key, name_only, dead, running, detail, plain, format
184184
sys.exit(1)
185185

186186

187-
def format_stats(raw_stats: Optional[str], indent='') -> str:
188-
if raw_stats is None:
189-
return "(unavailable)"
190-
stats = json.loads(raw_stats)
191-
text = "\n".join(f"- {k + ': ':18s}{v}" for k, v in stats.items())
192-
return "\n" + textwrap.indent(text, indent)
193-
194-
195187
def format_containers(containers: Sequence[Mapping[str, Any]], indent='') -> str:
188+
196189
if len(containers) == 0:
197190
text = "- (There are no sub-containers belonging to the session)"
198191
else:
199192
text = ""
200193
for cinfo in containers:
201194
text += "\n".join((
202195
f"+ {cinfo['id']}",
203-
*(f" - {k + ': ':18s}{v}"
196+
*(f" - {k + ': ':18s}{format_multiline(v, 22)}"
204197
for k, v in cinfo.items()
205198
if k not in ('id', 'live_stat', 'last_stat')),
206199
f" + live_stat: {format_stats(cinfo['live_stat'], indent=' ')}",
@@ -235,19 +228,20 @@ def session(id_or_name):
235228
)),
236229
]
237230
if session_.api_version[0] >= 6:
238-
fields.append(format_options['session_id'])
239-
fields.append(format_options['kernel_id'])
231+
fields.append(field_names['session_id'])
232+
fields.append(field_names['kernel_id'])
240233
fields.extend([
241-
format_options['image'],
234+
field_names['image'],
242235
])
243236
if session_.api_version >= (4, '20181215'):
244-
fields.append(format_options['tag'])
237+
fields.append(field_names['tag'])
245238
fields.extend([
246-
format_options['created_at'],
247-
format_options['terminated_at'],
248-
format_options['status'],
249-
format_options['status_info'],
250-
format_options['occupied_slots'],
239+
field_names['created_at'],
240+
field_names['terminated_at'],
241+
field_names['status'],
242+
field_names['status_info'],
243+
field_names['status_data'],
244+
field_names['occupied_slots'],
251245
])
252246
fields = apply_version_aware_fields(session_, fields)
253247
field_formatters = defaultdict(lambda: str)
@@ -271,7 +265,7 @@ def session(id_or_name):
271265
'Containers',
272266
'containers {'
273267
' id cluster_role cluster_idx cluster_hostname '
274-
' agent status status_info status_changed '
268+
' agent status status_info status_data status_changed '
275269
' occupied_slots live_stat last_stat '
276270
'}',
277271
))
@@ -310,7 +304,7 @@ def session(id_or_name):
310304
else:
311305
print_fail('There is no such compute session.')
312306
sys.exit(1)
313-
transform_legacy_mem_fields(resp['compute_session'])
307+
transform_fields(resp['compute_session'], in_row=False)
314308
for i, (key, value) in enumerate(resp['compute_session'].items()):
315309
fmt = field_formatters[key]
316310
print(f"{fields[i][0] + ': ':20s}{fmt(value)}")

src/ai/backend/client/cli/utils.py

Lines changed: 33 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1+
import json
12
import re
23
import textwrap
3-
from typing import Any, Mapping
4+
from typing import Any, Mapping, Optional
45

56
import click
67

@@ -41,26 +42,45 @@ def convert(self, value, param, ctx):
4142
return value
4243

4344

45+
def format_stats(raw_stats: Optional[str], indent='') -> str:
46+
if raw_stats is None:
47+
return "(unavailable)"
48+
stats = json.loads(raw_stats)
49+
text = "\n".join(f"- {k + ': ':18s}{v}" for k, v in stats.items())
50+
return "\n" + textwrap.indent(text, indent)
51+
52+
53+
def format_multiline(value: Any, indent_length: int) -> str:
54+
buf = []
55+
for idx, line in enumerate(str(value).strip().splitlines()):
56+
if idx == 0:
57+
buf.append(line)
58+
else:
59+
buf.append((" " * indent_length) + line)
60+
return "\n".join(buf)
61+
62+
4463
def format_nested_dicts(value: Mapping[str, Mapping[str, Any]]) -> str:
4564
"""
4665
Format a mapping from string keys to sub-mappings.
4766
"""
4867
rows = []
4968
if not value:
5069
rows.append("(empty)")
51-
for outer_key, outer_value in value.items():
52-
if isinstance(outer_value, dict):
53-
if outer_value:
54-
rows.append(f"+ {outer_key}")
55-
inner_rows = format_nested_dicts(outer_value)
56-
rows.append(textwrap.indent(inner_rows, prefix=" "))
57-
else:
58-
rows.append(f"+ {outer_key}: (empty)")
59-
else:
60-
if outer_value is None:
61-
rows.append(f"- {outer_key}: (null)")
70+
else:
71+
for outer_key, outer_value in value.items():
72+
if isinstance(outer_value, dict):
73+
if outer_value:
74+
rows.append(f"+ {outer_key}")
75+
inner_rows = format_nested_dicts(outer_value)
76+
rows.append(textwrap.indent(inner_rows, prefix=" "))
77+
else:
78+
rows.append(f"+ {outer_key}: (empty)")
6279
else:
63-
rows.append(f"- {outer_key}: {outer_value}")
80+
if outer_value is None:
81+
rows.append(f"- {outer_key}: (null)")
82+
else:
83+
rows.append(f"- {outer_key}: {outer_value}")
6484
return "\n".join(rows)
6585

6686

0 commit comments

Comments
 (0)