Skip to content

Commit 03a8ce0

Browse files
authored
webui: Show task fails and allow bulk restart on PoRep page (#170)
1 parent d7fa878 commit 03a8ce0

File tree

6 files changed

+189
-17
lines changed

6 files changed

+189
-17
lines changed

web/api/webrpc/pipeline.go renamed to web/api/webrpc/pipeline_porep.go

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99

1010
"github.com/filecoin-project/go-address"
1111
"github.com/filecoin-project/go-bitfield"
12+
"github.com/filecoin-project/go-state-types/abi"
1213

1314
"github.com/filecoin-project/lotus/chain/actors/adt"
1415
"github.com/filecoin-project/lotus/chain/actors/builtin/miner"
@@ -61,6 +62,10 @@ type PipelineTask struct {
6162
FailedReason string `db:"failed_reason"`
6263
}
6364

65+
func (pt PipelineTask) sectorID() abi.SectorID {
66+
return abi.SectorID{Miner: abi.ActorID(pt.SpID), Number: abi.SectorNumber(pt.SectorNumber)}
67+
}
68+
6469
type sectorListEntry struct {
6570
PipelineTask
6671

@@ -69,6 +74,9 @@ type sectorListEntry struct {
6974
AfterSeed bool
7075

7176
ChainAlloc, ChainSector, ChainActive, ChainUnproven, ChainFaulty bool
77+
78+
MissingTasks []int64
79+
AllTasks []int64
7280
}
7381

7482
type minerBitfields struct {
@@ -99,6 +107,16 @@ func (a *WebRPC) PipelinePorepSectors(ctx context.Context) ([]sectorListEntry, e
99107
return nil, xerrors.Errorf("failed to fetch pipeline tasks: %w", err)
100108
}
101109

110+
missingTasks, err := a.pipelinePorepMissingTasks(ctx)
111+
if err != nil {
112+
return nil, xerrors.Errorf("failed to fetch missing tasks: %w", err)
113+
}
114+
115+
missingTasksMap := make(map[abi.SectorID]porepMissingTask)
116+
for _, mt := range missingTasks {
117+
missingTasksMap[mt.sectorID()] = mt
118+
}
119+
102120
head, err := a.deps.Chain.ChainHead(ctx)
103121
if err != nil {
104122
return nil, xerrors.Errorf("failed to fetch chain head: %w", err)
@@ -129,6 +147,12 @@ func (a *WebRPC) PipelinePorepSectors(ctx context.Context) ([]sectorListEntry, e
129147

130148
afterSeed := task.SeedEpoch != nil && *task.SeedEpoch <= int64(epoch)
131149

150+
var missingTasks, allTasks []int64
151+
if mt, ok := missingTasksMap[task.sectorID()]; ok {
152+
missingTasks = mt.MissingTaskIDs
153+
allTasks = mt.AllTaskIDs
154+
}
155+
132156
sectorList = append(sectorList, sectorListEntry{
133157
PipelineTask: task,
134158
Address: addr,
@@ -140,6 +164,9 @@ func (a *WebRPC) PipelinePorepSectors(ctx context.Context) ([]sectorListEntry, e
140164
ChainActive: must.One(mbf.active.IsSet(uint64(task.SectorNumber))),
141165
ChainUnproven: must.One(mbf.unproven.IsSet(uint64(task.SectorNumber))),
142166
ChainFaulty: must.One(mbf.faulty.IsSet(uint64(task.SectorNumber))),
167+
168+
MissingTasks: missingTasks,
169+
AllTasks: allTasks,
143170
})
144171
}
145172

@@ -249,3 +276,87 @@ func (a *WebRPC) PorepPipelineSummary(ctx context.Context) ([]PorepPipelineSumma
249276
}
250277
return summaries, nil
251278
}
279+
280+
func (a *WebRPC) PipelinePorepRestartAll(ctx context.Context) error {
281+
missing, err := a.pipelinePorepMissingTasks(ctx)
282+
if err != nil {
283+
return err
284+
}
285+
286+
for _, mt := range missing {
287+
if len(mt.AllTaskIDs) != len(mt.MissingTaskIDs) || len(mt.MissingTaskIDs) == 0 {
288+
continue
289+
}
290+
291+
log.Infow("Restarting sector", "sector", mt.sectorID(), "missing_tasks", mt.MissingTasksCount)
292+
293+
if err := a.SectorResume(ctx, mt.SpID, mt.SectorNumber); err != nil {
294+
return err
295+
}
296+
}
297+
return nil
298+
}
299+
300+
type porepMissingTask struct {
301+
SpID int64 `db:"sp_id"`
302+
SectorNumber int64 `db:"sector_number"`
303+
304+
AllTaskIDs []int64 `db:"all_task_ids"`
305+
MissingTaskIDs []int64 `db:"missing_task_ids"`
306+
TotalTasks int `db:"total_tasks"`
307+
MissingTasksCount int `db:"missing_tasks_count"`
308+
RestartStatus string `db:"restart_status"`
309+
}
310+
311+
func (pmt porepMissingTask) sectorID() abi.SectorID {
312+
return abi.SectorID{Miner: abi.ActorID(pmt.SpID), Number: abi.SectorNumber(pmt.SectorNumber)}
313+
}
314+
315+
func (a *WebRPC) pipelinePorepMissingTasks(ctx context.Context) ([]porepMissingTask, error) {
316+
var tasks []porepMissingTask
317+
err := a.deps.DB.Select(ctx, &tasks, `
318+
WITH sector_tasks AS (
319+
SELECT
320+
sp.sp_id,
321+
sp.sector_number,
322+
get_sdr_pipeline_tasks(sp.sp_id, sp.sector_number) AS task_ids
323+
FROM
324+
sectors_sdr_pipeline sp
325+
),
326+
missing_tasks AS (
327+
SELECT
328+
st.sp_id,
329+
st.sector_number,
330+
st.task_ids,
331+
array_agg(CASE WHEN ht.id IS NULL THEN task_id ELSE NULL END) AS missing_task_ids
332+
FROM
333+
sector_tasks st
334+
CROSS JOIN UNNEST(st.task_ids) WITH ORDINALITY AS t(task_id, task_order)
335+
LEFT JOIN harmony_task ht ON ht.id = task_id
336+
GROUP BY
337+
st.sp_id, st.sector_number, st.task_ids
338+
)
339+
SELECT
340+
mt.sp_id,
341+
mt.sector_number,
342+
mt.task_ids AS all_task_ids,
343+
mt.missing_task_ids,
344+
array_length(mt.task_ids, 1) AS total_tasks,
345+
array_length(mt.missing_task_ids, 1) AS missing_tasks_count,
346+
CASE
347+
WHEN array_length(mt.task_ids, 1) = array_length(mt.missing_task_ids, 1) THEN 'All tasks missing'
348+
ELSE 'Some tasks missing'
349+
END AS restart_status
350+
FROM
351+
missing_tasks mt
352+
WHERE
353+
array_length(mt.task_ids, 1) > 0 -- Has at least one task
354+
AND array_length(array_remove(mt.missing_task_ids, NULL), 1) > 0 -- At least one task is missing
355+
ORDER BY
356+
mt.sp_id, mt.sector_number;`)
357+
if err != nil {
358+
return nil, xerrors.Errorf("failed to fetch missing tasks: %w", err)
359+
}
360+
361+
return tasks, nil
362+
}

web/api/webrpc/sector.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -410,15 +410,15 @@ func (a *WebRPC) SectorInfo(ctx context.Context, sp string, intid int64) (*Secto
410410
}, nil
411411
}
412412

413-
func (a *WebRPC) SectorResume(ctx context.Context, spid, id int) error {
413+
func (a *WebRPC) SectorResume(ctx context.Context, spid, id int64) error {
414414
_, err := a.deps.DB.Exec(ctx, `SELECT unset_task_id($1, $2)`, spid, id)
415415
if err != nil {
416416
return xerrors.Errorf("failed to resume sector: %w", err)
417417
}
418418
return nil
419419
}
420420

421-
func (a *WebRPC) SectorRemove(ctx context.Context, spid, id int) error {
421+
func (a *WebRPC) SectorRemove(ctx context.Context, spid, id int64) error {
422422
_, err := a.deps.DB.Exec(ctx, `DELETE FROM batch_sector_refs WHERE sp_id = $1 AND sector_number = $2`, spid, id)
423423
if err != nil {
424424
return xerrors.Errorf("failed to remove sector batch refs: %w", err)

web/static/pages/node_info/node-info.mjs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ customElements.define('node-info',class NodeInfoElement extends LitElement {
111111
<td>${task.ID}</td>
112112
<td>${task.Task}</td>
113113
<td>${task.Posted}</td>
114-
<td>${task.PoRepSector ? html`<a href="/pages/pipeline_porep/">f0${task.PoRepSectorSP}:${task.PoRepSector}</a>` : ''}</td>
114+
<td>${task.PoRepSector ? html`<a href="/pages/sector/?sp=f0${task.PoRepSectorSP}&id=${task.PoRepSector}">f0${task.PoRepSectorSP}:${task.PoRepSector}</a>` : ''}</td>
115115
</tr>
116116
`)}
117117
</table>

web/static/pages/pipeline_porep/index.html

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
<script type="module" src="/ux/curio-ux.mjs"></script>
66
<script type="module" src="/chain-connectivity.mjs"></script>
77
<script type="module" src="pipeline-porep-sectors.mjs"></script>
8+
<script type="module" src="restart-all-button.mjs"></script>
89
<link rel="stylesheet" href="/ux/main.css">
910
</head>
1011
<body style="visibility: hidden">
@@ -16,6 +17,13 @@ <h1>Curio PoRep Pipeline</h1>
1617
</div>
1718
<hr/>
1819
<div class="page">
20+
<div class="row">
21+
<div class="row-md-auto" style="width: 50%">
22+
<div class="info-block">
23+
<restart-all-button></restart-all-button>
24+
</div>
25+
</div>
26+
</div>
1927
<div class="row">
2028
<div class="row-md-auto" style="width: 50%">
2129
<div class="info-block">

web/static/pages/pipeline_porep/pipeline-porep-sectors.mjs

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -105,18 +105,18 @@ customElements.define('pipeline-porep-sectors',class PipelinePorepSectors extend
105105
<table class="porep-state">
106106
<tbody>
107107
<tr>
108-
${this.renderSectorState('SDR', 1, sector.TaskSDR, sector.AfterSDR)}
109-
${this.renderSectorState('TreeC', 1, sector.TaskTreeC, sector.AfterTreeC)}
110-
${this.renderSectorState('Synthetic', 2, sector.TaskSynthetic, sector.AfterSynthetic)}
111-
${this.renderSectorState('PComm Msg', 2, sector.TaskPrecommitMsg, sector.AfterPrecommitMsg)}
108+
${this.renderSectorState('SDR', 1, sector, sector.TaskSDR, sector.AfterSDR)}
109+
${this.renderSectorState('TreeC', 1, sector, sector.TaskTreeC, sector.AfterTreeC)}
110+
${this.renderSectorState('Synthetic', 2, sector, sector.TaskSynthetic, sector.AfterSynthetic)}
111+
${this.renderSectorState('PComm Msg', 2, sector, sector.TaskPrecommitMsg, sector.AfterPrecommitMsg)}
112112
${this.renderSectorStateNoTask('PComm Wait', 2, sector.AfterPrecommitMsg, sector.AfterPrecommitMsgSuccess)}
113113
<td rowspan=2 class="${sector.AfterPrecommitMsgSuccess?'pipeline-active':''} ${sector.AfterSeed?'pipeline-success':''}">
114114
<div>Wait Seed</div>
115115
<div>${sector.AfterSeed?'done':sector.SeedEpoch}</div>
116116
</td>
117-
${this.renderSectorState('PoRep', 2, sector.TaskPoRep, sector.AfterPoRep)}
118-
${this.renderSectorState('Clear Cache', 1, sector.TaskFinalize, sector.AfterFinalize)}
119-
${this.renderSectorState('Move Storage', 1, sector.TaskMoveStorage, sector.AfterMoveStorage)}
117+
${this.renderSectorState('PoRep', 2, sector, sector.TaskPoRep, sector.AfterPoRep)}
118+
${this.renderSectorState('Clear Cache', 1, sector, sector.TaskFinalize, sector.AfterFinalize)}
119+
${this.renderSectorState('Move Storage', 1, sector, sector.TaskMoveStorage, sector.AfterMoveStorage)}
120120
<td class="${sector.ChainSector ? 'pipeline-success' : (sector.ChainAlloc ? 'pipeline-active' : 'pipeline-failed')}">
121121
<div>On Chain</div>
122122
<div>${sector.ChainSector ? 'yes' : (sector.ChainAlloc ? 'allocated' : 'no')}</div>
@@ -127,10 +127,10 @@ customElements.define('pipeline-porep-sectors',class PipelinePorepSectors extend
127127
</td>
128128
</tr>
129129
<tr>
130-
${this.renderSectorState('TreeD', 1, sector.TaskTreeD, sector.AfterTreeD)}
131-
${this.renderSectorState('TreeR', 1, sector.TaskTreeR, sector.AfterTreeR)}
130+
${this.renderSectorState('TreeD', 1, sector, sector.TaskTreeD, sector.AfterTreeD)}
131+
${this.renderSectorState('TreeR', 1, sector, sector.TaskTreeR, sector.AfterTreeR)}
132132
<!-- PC-S, PC-W, WS, PoRep -->
133-
${this.renderSectorState('Commit Msg', 1, sector.TaskCommitMsg, sector.AfterCommitMsg)}
133+
${this.renderSectorState('Commit Msg', 1, sector, sector.TaskCommitMsg, sector.AfterCommitMsg)}
134134
${this.renderSectorStateNoTask('Commit Wait', 1, sector.AfterCommitMsg, sector.AfterCommitMsgSuccess)}
135135
<td class="${sector.ChainActive ? 'pipeline-success' : 'pipeline-failed'}">
136136
<div>Active</div>
@@ -149,13 +149,26 @@ customElements.define('pipeline-porep-sectors',class PipelinePorepSectors extend
149149
</td>
150150
`;
151151
}
152-
renderSectorState(name, rowspan, task, after) {
152+
renderSectorState(name, rowspan, sector, task, after) {
153+
if(task) {
154+
// sector.MissingTasks is a list of tasks
155+
// sector.MissingTasks.includes(task) is true if task is missing
156+
let missing = sector.MissingTasks && sector.MissingTasks.includes(task);
157+
158+
return html`
159+
<td rowspan="${rowspan}" class="${missing ? 'pipeline-failed' : 'pipeline-active'}">
160+
<div>${name}</div>
161+
<div>T:${task}</div>
162+
${missing ? html`<div><b>FAILED</b></div>` : ''}
163+
</td>
164+
`;
165+
}
166+
153167
return html`
154-
<td rowspan="${rowspan}" class="${task?'pipeline-active':''} ${after?'pipeline-success':''}">
168+
<td rowspan="${rowspan}" class="${after?'pipeline-success':''}">
155169
<div>${name}</div>
156-
<div>${after?'done':task?'T:'+task:'--'}</div>
170+
<div>${after?'done':'--'}</div>
157171
</td>
158172
`;
159173
}
160-
161174
} );
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import { LitElement, html } from 'https://cdn.jsdelivr.net/gh/lit/dist@3/all/lit-all.min.js';
2+
import RPCCall from '/lib/jsonrpc.mjs';
3+
4+
class RestartAllButton extends LitElement {
5+
static properties = {
6+
isProcessing: { type: Boolean },
7+
};
8+
9+
constructor() {
10+
super();
11+
this.isProcessing = false;
12+
}
13+
14+
render() {
15+
return html`
16+
<link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-1BmE4kWBq78iYhFldvKuhfTAU6auU8tT94WrHftjDbrCEXSU1oBoqyl2QvZ6jIW3" crossorigin="anonymous">
17+
<button
18+
@click="${this.handleClick}"
19+
class="btn ${this.isProcessing ? 'btn-secondary' : 'btn-primary'}"
20+
?disabled="${this.isProcessing}"
21+
>
22+
${this.isProcessing ? 'Processing...' : 'Restart All'}
23+
</button>
24+
`;
25+
}
26+
27+
async handleClick() {
28+
this.isProcessing = true;
29+
try {
30+
await RPCCall('PipelinePorepRestartAll', []);
31+
console.log('Restart All operation completed successfully');
32+
} catch (error) {
33+
console.error('Error during Restart All operation:', error);
34+
} finally {
35+
this.isProcessing = false;
36+
}
37+
}
38+
}
39+
40+
customElements.define('restart-all-button', RestartAllButton);

0 commit comments

Comments
 (0)