Skip to content

Commit d0348a7

Browse files
authored
Set min-slices and fix worker dashboard. (#3222)
1 parent ba965b4 commit d0348a7

File tree

5 files changed

+208
-68
lines changed

5 files changed

+208
-68
lines changed

lib/iris/examples/marin.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ scale_groups:
5959
num_vms: 1
6060
priority: 10
6161
resources: { cpu: 112, ram: 192GB, disk: 100GB, tpu_count: 4, gpu_count: 0 }
62-
min_slices: 0
62+
min_slices: 1
6363
max_slices: 1024
6464
slice_template:
6565
accelerator_variant: v5litepod-4
@@ -183,7 +183,7 @@ scale_groups:
183183
num_vms: 1
184184
priority: 10
185185
resources: { cpu: 180, ram: 720GB, disk: 100GB, tpu_count: 4, gpu_count: 0 }
186-
min_slices: 0
186+
min_slices: 1
187187
max_slices: 1024
188188
slice_template:
189189
accelerator_variant: v6e-4

lib/iris/src/iris/cluster/static/controller/worker-detail.js

Lines changed: 65 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ import { useState, useEffect, useRef, useCallback } from 'preact/hooks';
1111
import htm from 'htm';
1212
import { controllerRpc } from '/static/shared/rpc.js';
1313
import { formatBytes, formatRelativeTime, stateToName, formatDuration } from '/static/shared/utils.js';
14-
import { MetricCard, ResourceSection, Gauge, InlineGauge, Field, Section, Sparkline, formatMbPair, formatRate } from '/static/shared/components.js';
14+
import { MetricCard, InlineGauge, Field, Section, Sparkline, formatMbPair, formatRate } from '/static/shared/components.js';
1515

1616
const html = htm.bind(h);
1717

@@ -175,66 +175,76 @@ function WorkerDetailApp() {
175175
${workerHeartbeat && html`<${Field} label="Last Heartbeat" value=${workerHeartbeat} />`}
176176
${workerCpu && html`<${Field} label="CPU Cores" value=${workerCpu} />`}
177177
${accelDisplay && html`<${Field} label="Accelerator" value=${accelDisplay} />`}
178+
${!liveRes && workerMem > 0 && html`<${Field} label="Memory" value=${formatBytes(workerMem)} />`}
179+
${!liveRes && workerDisk > 0 && html`<${Field} label="Disk" value=${formatBytes(workerDisk)} />`}
178180
</dl>
179-
${liveRes && html`
180-
<${ResourceSection} title="Live Utilization">
181-
<div style="display:flex;align-items:center;gap:8px">
182-
<div style="flex:1"><${Gauge} label="CPU" value=${liveCpuPct || 0} max=${100} format="percent" /></div>
183-
${resourceHistory.length >= 2 && html`
184-
<${Sparkline} values=${resourceHistory.map(s => s.cpuPercent || 0)}
185-
max=${100} width=${80} height=${24}
186-
color="var(--color-accent)"
187-
fillColor="rgba(9,105,218,0.1)" />
188-
`}
181+
<//>
182+
</div>
183+
184+
${liveRes && html`
185+
<div class="utilization-panel">
186+
<h2 class="utilization-panel__title">Live Utilization</h2>
187+
<div class="utilization-panel__grid">
188+
<div class="utilization-metric">
189+
<div class="utilization-metric__header">
190+
<span class="utilization-metric__label">CPU</span>
191+
<span class="utilization-metric__value utilization-metric__value--accent">${(liveCpuPct || 0) + '%'}</span>
192+
</div>
193+
<div class="utilization-metric__chart">
194+
<${Sparkline} values=${resourceHistory.map(s => s.cpuPercent || 0)}
195+
max=${100} width=${240} height=${40}
196+
color="var(--color-accent)"
197+
fillColor="rgba(9,105,218,0.10)" />
189198
</div>
190-
${liveMemTotal > 0 && html`
191-
<div style="display:flex;align-items:center;gap:8px">
192-
<div style="flex:1"><${Gauge} label="Memory" value=${liveMemUsed} max=${liveMemTotal} format="bytes" /></div>
193-
${resourceHistory.length >= 2 && html`
194-
<${Sparkline} values=${resourceHistory.map(s => parseInt(s.memoryUsedBytes || 0))}
195-
max=${liveMemTotal} width=${80} height=${24}
196-
color="var(--color-success)"
197-
fillColor="rgba(26,127,55,0.1)" />
198-
`}
199+
</div>
200+
201+
${liveMemTotal > 0 && html`
202+
<div class="utilization-metric">
203+
<div class="utilization-metric__header">
204+
<span class="utilization-metric__label">Memory</span>
205+
<span class="utilization-metric__value utilization-metric__value--success">${formatBytes(liveMemUsed) + ' / ' + formatBytes(liveMemTotal)}</span>
206+
</div>
207+
<div class="utilization-metric__chart">
208+
<${Sparkline} values=${resourceHistory.map(s => parseInt(s.memoryUsedBytes || 0))}
209+
max=${liveMemTotal} width=${240} height=${40}
210+
color="var(--color-success)"
211+
fillColor="rgba(26,127,55,0.10)" />
199212
</div>
200-
`}
201-
${liveDiskTotal > 0 && html`
202-
<div style="display:flex;align-items:center;gap:8px">
203-
<div style="flex:1"><${Gauge} label="Disk" value=${liveDiskUsed} max=${liveDiskTotal} format="bytes" /></div>
204-
${resourceHistory.length >= 2 && html`
205-
<${Sparkline} values=${resourceHistory.map(s => parseInt(s.diskUsedBytes || 0))}
206-
max=${liveDiskTotal} width=${80} height=${24}
207-
color="var(--color-warning)"
208-
fillColor="rgba(191,135,0,0.1)" />
209-
`}
213+
</div>
214+
`}
215+
216+
${liveDiskTotal > 0 && html`
217+
<div class="utilization-metric">
218+
<div class="utilization-metric__header">
219+
<span class="utilization-metric__label">Disk</span>
220+
<span class="utilization-metric__value utilization-metric__value--warning">${formatBytes(liveDiskUsed) + ' / ' + formatBytes(liveDiskTotal)}</span>
210221
</div>
211-
`}
212-
<div style="display:flex;align-items:center;gap:8px">
213-
<div style="flex:1">
214-
<div class="gauge">
215-
<span class="gauge-label">Net</span>
216-
<span class="gauge-value" style="flex:1;text-align:right;font-variant-numeric:tabular-nums">
217-
${'↓ ' + formatRate(liveNetRecv) + ' ↑ ' + formatRate(liveNetSent)}
218-
</span>
219-
</div>
222+
<div class="utilization-metric__chart">
223+
<${Sparkline} values=${resourceHistory.map(s => parseInt(s.diskUsedBytes || 0))}
224+
max=${liveDiskTotal} width=${240} height=${40}
225+
color="var(--color-warning)"
226+
fillColor="rgba(154,103,0,0.10)" />
220227
</div>
221-
${resourceHistory.length >= 2 && html`
222-
<${Sparkline} values=${resourceHistory.map(s => parseInt(s.netRecvBps || 0) + parseInt(s.netSentBps || 0))}
223-
width=${80} height=${24}
224-
color="var(--color-purple, #8250df)"
225-
fillColor="rgba(130,80,223,0.1)" />
226-
`}
227228
</div>
228-
<//>
229-
`}
230-
${!liveRes && html`
231-
<dl class="worker-detail-fields">
232-
${workerMem > 0 && html`<${Field} label="Memory" value=${formatBytes(workerMem)} />`}
233-
${workerDisk > 0 && html`<${Field} label="Disk" value=${formatBytes(workerDisk)} />`}
234-
</dl>
235-
`}
236-
<//>
237-
</div>
229+
`}
230+
231+
<div class="utilization-metric">
232+
<div class="utilization-metric__header">
233+
<span class="utilization-metric__label">Network</span>
234+
<span class="utilization-metric__value utilization-metric__value--purple">
235+
${'↓ ' + formatRate(liveNetRecv) + ' ↑ ' + formatRate(liveNetSent)}
236+
</span>
237+
</div>
238+
<div class="utilization-metric__chart">
239+
<${Sparkline} values=${resourceHistory.map(s => parseInt(s.netRecvBps || 0) + parseInt(s.netSentBps || 0))}
240+
width=${240} height=${40}
241+
color="var(--color-purple, #8250df)"
242+
fillColor="rgba(130,80,223,0.10)" />
243+
</div>
244+
</div>
245+
</div>
246+
</div>
247+
`}
238248
239249
${'' /* --- Task History --- */}
240250
<div class="worker-detail-logs-section">

lib/iris/src/iris/cluster/static/shared/components.js

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -99,18 +99,23 @@ export function InlineGauge({ value, max, format = 'percent', label }) {
9999
* @param {string} [fillColor] - Optional fill color under the line
100100
*/
101101
export function Sparkline({ values, max, width = 64, height = 20, color = 'var(--color-accent)', fillColor }) {
102-
if (!values || values.length < 2) return null;
103-
const effectiveMax = max || Math.max(...values);
104-
if (effectiveMax === 0) return null;
102+
if (!values || values.length < 1) return null;
103+
104+
// Duplicate single values to render a flat baseline instead of hiding
105+
const data = values.length === 1 ? [values[0], values[0]] : values;
106+
const effectiveMax = max || Math.max(...data);
105107

106108
// Pad from edges so the line doesn't clip
107109
const pad = 1;
108110
const innerW = width - 2 * pad;
109111
const innerH = height - 2 * pad;
110112

111-
const points = values.map((v, i) => {
112-
const x = pad + (i / (values.length - 1)) * innerW;
113-
const y = pad + innerH - (Math.min(v, effectiveMax) / effectiveMax) * innerH;
113+
const points = data.map((v, i) => {
114+
const x = pad + (i / (data.length - 1)) * innerW;
115+
// When max is 0 (all idle), draw a flat line at the bottom
116+
const y = effectiveMax === 0
117+
? pad + innerH
118+
: pad + innerH - (Math.min(v, effectiveMax) / effectiveMax) * innerH;
114119
return `${x.toFixed(1)},${y.toFixed(1)}`;
115120
});
116121

lib/iris/src/iris/cluster/static/shared/styles.css

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1096,6 +1096,79 @@ input:focus, select:focus { outline: none; border-color: #0969da; box-shadow: 0
10961096
white-space: nowrap;
10971097
}
10981098

1099+
/* === Live Utilization Panel (worker detail) === */
1100+
.utilization-panel {
1101+
background: var(--color-surface);
1102+
border: 1px solid var(--color-border);
1103+
border-radius: var(--radius-lg);
1104+
padding: 20px 24px;
1105+
margin-bottom: 24px;
1106+
}
1107+
1108+
.utilization-panel__title {
1109+
font-size: 13px;
1110+
font-weight: 600;
1111+
color: var(--color-text-secondary);
1112+
text-transform: uppercase;
1113+
letter-spacing: 0.5px;
1114+
margin: 0 0 16px 0;
1115+
}
1116+
1117+
.utilization-panel__grid {
1118+
display: grid;
1119+
grid-template-columns: repeat(2, 1fr);
1120+
gap: 16px;
1121+
}
1122+
1123+
@media (max-width: 640px) {
1124+
.utilization-panel__grid { grid-template-columns: 1fr; }
1125+
}
1126+
1127+
.utilization-metric {
1128+
display: flex;
1129+
flex-direction: column;
1130+
gap: 8px;
1131+
background: var(--color-bg);
1132+
border-radius: var(--radius-md);
1133+
padding: 14px 16px;
1134+
}
1135+
1136+
.utilization-metric__header {
1137+
display: flex;
1138+
justify-content: space-between;
1139+
align-items: baseline;
1140+
}
1141+
1142+
.utilization-metric__label {
1143+
font-size: 11px;
1144+
font-weight: 600;
1145+
color: var(--color-text-secondary);
1146+
text-transform: uppercase;
1147+
letter-spacing: 0.5px;
1148+
}
1149+
1150+
.utilization-metric__value {
1151+
font-size: 14px;
1152+
font-weight: 600;
1153+
font-family: var(--font-mono);
1154+
font-variant-numeric: tabular-nums;
1155+
}
1156+
1157+
.utilization-metric__value--accent { color: var(--color-accent); }
1158+
.utilization-metric__value--success { color: var(--color-success); }
1159+
.utilization-metric__value--warning { color: var(--color-warning); }
1160+
.utilization-metric__value--danger { color: var(--color-danger); }
1161+
.utilization-metric__value--purple { color: var(--color-purple); }
1162+
1163+
.utilization-metric__chart {
1164+
display: flex;
1165+
}
1166+
1167+
.utilization-metric__chart .sparkline {
1168+
width: 100%;
1169+
height: 40px;
1170+
}
1171+
10991172
/* === Sparkline (inline trend chart) === */
11001173
.sparkline {
11011174
display: inline-block;

lib/iris/tests/e2e/test_dashboard.py

Lines changed: 57 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
rendering via screenshots.
99
"""
1010

11+
import time
12+
1113
import pytest
1214
from iris.rpc import cluster_pb2
1315

@@ -142,17 +144,69 @@ def test_worker_detail_shows_network_and_disk_sparklines(cluster, page, screensh
142144

143145
if not _is_noop_page(page):
144146
page.wait_for_function(
145-
"() => document.querySelector('.resource-section') !== null",
147+
"() => document.querySelector('.utilization-panel') !== null",
146148
timeout=10000,
147149
)
148-
# Live Utilization section should show CPU, Memory, Disk (all with sparklines) and Net
150+
# Live Utilization panel should show CPU, Memory, Disk, and Network
149151
assert_visible(page, "text=Live Utilization")
150152
assert_visible(page, "text=CPU")
151153
assert_visible(page, "text=Disk")
152-
assert_visible(page, "text=Net")
154+
assert_visible(page, "text=Network")
153155
screenshot("worker-detail-net-disk-sparklines")
154156

155157

158+
def _hold_for_heartbeats():
159+
"""Sleep long enough for multiple heartbeat cycles to accumulate resource history."""
160+
import time
161+
162+
time.sleep(6)
163+
return 1
164+
165+
166+
def test_worker_detail_sparklines_with_history(cluster, page, screenshot):
167+
"""Worker detail sparklines render once resource history has accumulated.
168+
169+
Submits a task that holds long enough for multiple heartbeat cycles (local
170+
heartbeat_interval=0.5s) so the resource_history deque has enough entries
171+
for the Sparkline SVGs to render.
172+
"""
173+
job = cluster.submit(_hold_for_heartbeats, "worker-sparkline-history")
174+
cluster.wait_for_state(job, cluster_pb2.JOB_STATE_RUNNING, timeout=15)
175+
176+
task_status = cluster.task_status(job)
177+
worker_id = task_status.worker_id
178+
assert worker_id
179+
180+
# Wait for heartbeats to accumulate (local interval is 0.5s, need >=2 entries)
181+
time.sleep(3)
182+
183+
dashboard_goto(page, f"{cluster.url}/worker/{worker_id}")
184+
185+
if not _is_noop_page(page):
186+
# Wait for the utilization panel with SVG sparklines to render
187+
page.wait_for_function(
188+
"() => document.querySelector('.utilization-panel') !== null"
189+
" && document.querySelectorAll('.utilization-panel svg.sparkline').length >= 3",
190+
timeout=15000,
191+
)
192+
193+
# Verify all four utilization metric sections are present
194+
assert_visible(page, "text=Live Utilization")
195+
assert_visible(page, "text=CPU")
196+
assert_visible(page, "text=Memory")
197+
assert_visible(page, "text=Disk")
198+
assert_visible(page, "text=Network")
199+
200+
# Verify SVG sparklines rendered (not hidden due to empty data)
201+
if not _is_noop_page(page):
202+
sparkline_count = page.locator(".utilization-panel svg.sparkline").count()
203+
assert sparkline_count >= 3, f"Expected at least 3 sparkline SVGs in utilization panel, got {sparkline_count}"
204+
205+
screenshot("worker-detail-sparklines-with-history")
206+
207+
cluster.wait(job, timeout=30)
208+
209+
156210
def _allocate_memory_and_wait():
157211
"""Allocate ~50 MB and sleep long enough for at least one stats collection cycle."""
158212
import time
@@ -192,8 +246,6 @@ def test_job_detail_task_table_shows_resource_values(cluster, page, screenshot):
192246
cluster.wait_for_state(job, cluster_pb2.JOB_STATE_RUNNING, timeout=15)
193247

194248
# Wait for stats collection (poll interval is 5s)
195-
import time
196-
197249
time.sleep(7)
198250

199251
dashboard_goto(page, f"{cluster.url}/job/{job.job_id.to_wire()}")

0 commit comments

Comments
 (0)