Skip to content

Commit f0ddae8

Browse files
[UX] Minor improvements of dstack metrics (#2667)
Co-authored-by: jvstme <[email protected]>
1 parent 44ad748 commit f0ddae8

File tree

2 files changed

+59
-10
lines changed

2 files changed

+59
-10
lines changed

src/dstack/_internal/cli/commands/metrics.py

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,6 @@ def _command(self, args: argparse.Namespace):
3939
run = self.api.runs.get(run_name=args.run_name)
4040
if run is None:
4141
raise CLIError(f"Run {args.run_name} not found")
42-
if run.status.is_finished():
43-
raise CLIError(f"Run {args.run_name} is finished")
4442
metrics = _get_run_jobs_metrics(api=self.api, run=run)
4543

4644
if not args.watch:
@@ -55,8 +53,6 @@ def _command(self, args: argparse.Namespace):
5553
run = self.api.runs.get(run_name=args.run_name)
5654
if run is None:
5755
raise CLIError(f"Run {args.run_name} not found")
58-
if run.status.is_finished():
59-
raise CLIError(f"Run {args.run_name} is finished")
6056
metrics = _get_run_jobs_metrics(api=self.api, run=run)
6157
except KeyboardInterrupt:
6258
pass
@@ -78,11 +74,12 @@ def _get_run_jobs_metrics(api: Client, run: Run) -> List[JobMetrics]:
7874
def _get_metrics_table(run: Run, metrics: List[JobMetrics]) -> Table:
7975
table = Table(box=None)
8076
table.add_column("NAME", style="bold", no_wrap=True)
77+
table.add_column("STATUS")
8178
table.add_column("CPU")
8279
table.add_column("MEMORY")
8380
table.add_column("GPU")
8481

85-
run_row: Dict[Union[str, int], Any] = {"NAME": run.name}
82+
run_row: Dict[Union[str, int], Any] = {"NAME": run.name, "STATUS": run.status.value}
8683
if len(run._run.jobs) != 1:
8784
add_row_from_dict(table, run_row)
8885

@@ -101,9 +98,9 @@ def _get_metrics_table(run: Run, metrics: List[JobMetrics]) -> Table:
10198
cpu_usage = f"{cpu_usage:.0f}%"
10299
memory_usage = _get_metric_value(job_metrics, "memory_working_set_bytes")
103100
if memory_usage is not None:
104-
memory_usage = f"{round(memory_usage / 1024 / 1024)}MB"
101+
memory_usage = _format_memory(memory_usage, 2)
105102
if resources is not None:
106-
memory_usage += f"/{resources.memory_mib}MB"
103+
memory_usage += f"/{_format_memory(resources.memory_mib * 1024 * 1024, 2)}"
107104
gpu_metrics = ""
108105
gpus_detected_num = _get_metric_value(job_metrics, "gpus_detected_num")
109106
if gpus_detected_num is not None:
@@ -113,13 +110,16 @@ def _get_metrics_table(run: Run, metrics: List[JobMetrics]) -> Table:
113110
if gpu_memory_usage is not None:
114111
if i != 0:
115112
gpu_metrics += "\n"
116-
gpu_metrics += f"#{i} {round(gpu_memory_usage / 1024 / 1024)}MB"
113+
gpu_metrics += f"gpu={i} mem={_format_memory(gpu_memory_usage, 2)}"
117114
if resources is not None:
118-
gpu_metrics += f"/{resources.gpus[i].memory_mib}MB"
119-
gpu_metrics += f" {gpu_util_percent}% Util"
115+
gpu_metrics += (
116+
f"/{_format_memory(resources.gpus[i].memory_mib * 1024 * 1024, 2)}"
117+
)
118+
gpu_metrics += f" util={gpu_util_percent}%"
120119

121120
job_row: Dict[Union[str, int], Any] = {
122121
"NAME": f" replica={job.job_spec.replica_num} job={job.job_spec.job_num}",
122+
"STATUS": job.job_submissions[-1].status.value,
123123
"CPU": cpu_usage or "-",
124124
"MEMORY": memory_usage or "-",
125125
"GPU": gpu_metrics or "-",
@@ -136,3 +136,18 @@ def _get_metric_value(job_metrics: JobMetrics, name: str) -> Optional[Any]:
136136
if metric.name == name:
137137
return metric.values[-1]
138138
return None
139+
140+
141+
def _format_memory(memory_bytes: int, decimal_places: int) -> str:
142+
"""See test_format_memory in tests/_internal/cli/commands/test_metrics.py for examples."""
143+
memory_mb = memory_bytes / 1024 / 1024
144+
if memory_mb >= 1024:
145+
value = memory_mb / 1024
146+
unit = "GB"
147+
else:
148+
value = memory_mb
149+
unit = "MB"
150+
151+
if decimal_places == 0:
152+
return f"{round(value)}{unit}"
153+
return f"{value:.{decimal_places}f}".rstrip("0").rstrip(".") + unit
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import pytest
2+
3+
from dstack._internal.cli.commands.metrics import _format_memory
4+
5+
6+
@pytest.mark.parametrize(
7+
"bytes_value,decimal_places,expected",
8+
[
9+
# Test MB values with different decimal places
10+
(512 * 1024 * 1024, 0, "512MB"), # exact MB, no decimals
11+
(512 * 1024 * 1024, 2, "512MB"), # exact MB, with decimals
12+
(512.5 * 1024 * 1024, 0, "512MB"), # decimal MB, no decimals
13+
(512.5 * 1024 * 1024, 2, "512.5MB"), # decimal MB, 2 decimals
14+
(512.5 * 1024 * 1024, 3, "512.5MB"), # decimal MB, 3 decimals
15+
(999 * 1024 * 1024, 0, "999MB"), # just under 1GB, no decimals
16+
(999 * 1024 * 1024, 2, "999MB"), # just under 1GB, with decimals
17+
# Test GB values with different decimal places
18+
(1.5 * 1024 * 1024 * 1024, 0, "2GB"), # decimal GB, no decimals
19+
(1.5 * 1024 * 1024 * 1024, 2, "1.5GB"), # decimal GB, 2 decimals
20+
(1.5 * 1024 * 1024 * 1024, 3, "1.5GB"), # decimal GB, 3 decimals
21+
(2 * 1024 * 1024 * 1024, 0, "2GB"), # exact GB, no decimals
22+
(2 * 1024 * 1024 * 1024, 2, "2GB"), # exact GB, with decimals
23+
# Test edge cases
24+
(0, 0, "0MB"), # zero bytes, no decimals
25+
(0, 2, "0MB"), # zero bytes, with decimals
26+
(1023 * 1024, 0, "1MB"), # just under 1MB, no decimals
27+
(1023 * 1024, 2, "1MB"), # just under 1MB, with decimals
28+
(1024 * 1024 * 1024 - 1, 0, "1024MB"), # just under 1GB, no decimals
29+
(1024 * 1024 * 1024 - 1, 2, "1024MB"), # just under 1GB, with decimals
30+
],
31+
)
32+
def test_format_memory(bytes_value: int, decimal_places: int, expected: str):
33+
result = _format_memory(bytes_value, decimal_places)
34+
assert result == expected

0 commit comments

Comments
 (0)