Skip to content

Commit 49084f2

Browse files
magik6krvagg
authored andcommitted
feat: Smart Cordon, Restart Requests (#595)
* cordon: yield bg tasks * make PSClientPoll yield as well * harmonytask: Scheduling Overrides * metrics: record version * fix build * smart restart * webui: Jsonrpc reconnect fixes * webui: restart requests * make gen * jrpc reject on conn fail * missing schema file * rm snake case
1 parent e10e62d commit 49084f2

File tree

15 files changed

+322
-48
lines changed

15 files changed

+322
-48
lines changed

apt/curio.service

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ LimitNOFILE=1000000
1010
Restart=always
1111
RestartSec=10
1212
EnvironmentFile=/etc/curio.env
13+
RestartForceExitStatus=100
1314

1415
[Install]
1516
WantedBy=multi-user.target

documentation/en/curio-service.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ LimitNOFILE=1000000
2727
Restart=always
2828
RestartSec=10
2929
EnvironmentFile=/etc/curio.env
30+
RestartForceExitStatus=100
3031

3132
[Install]
3233
WantedBy=multi-user.target

harmony/harmonydb/sql/20230719-harmony.sql

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ CREATE TABLE harmony_task (
1919
previous_task INTEGER,
2020
name varchar(16) NOT NULL
2121
-- retries INTEGER NOT NULL DEFAULT 0 -- added later
22+
-- unschedulable BOOLEAN DEFAULT FALSE -- added in 20250111-machine-maintenance.sql
23+
-- restart_request TIMESTAMP WITH TIME ZONE -- added in 20250818-restart-request.sql
2224
);
2325
COMMENT ON COLUMN harmony_task.initiated_by IS 'The task ID whose completion occasioned this task.';
2426
COMMENT ON COLUMN harmony_task.owner_id IS 'The foreign key to harmony_machines.';
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
ALTER TABLE harmony_machines
2+
ADD COLUMN restart_request TIMESTAMP WITH TIME ZONE;

harmony/harmonytask/harmonytask.go

Lines changed: 89 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package harmonytask
33
import (
44
"context"
55
"fmt"
6+
"os"
67
"strconv"
78
"sync/atomic"
89
"time"
@@ -23,6 +24,8 @@ var POLL_NEXT_DURATION = 100 * time.Millisecond // After scheduling a task, wait
2324
var CLEANUP_FREQUENCY = 5 * time.Minute // Check for dead workers this often * everyone
2425
var FOLLOW_FREQUENCY = 1 * time.Minute // Check for work to follow this often
2526

27+
var ExitStatusRestartRequest = 100
28+
2629
type TaskTypeDetails struct {
2730
// Max returns how many tasks this machine can run of this type.
2831
// Nil (default)/Zero or less means unrestricted.
@@ -57,6 +60,16 @@ type TaskTypeDetails struct {
5760
// CanAccept() can read taskEngine's WorkOrigin string to learn about a task.
5861
// Ex: make new CC sectors, clean-up, or retrying pipelines that failed in later states.
5962
IAmBored func(AddTaskFunc) error
63+
64+
// CanYield is true if the task should yield when the node is not schedulable.
65+
// This is implied for background tasks.
66+
CanYield bool
67+
68+
// SchedOverrides is a map of task names which, when running while the node is not schedulable,
69+
// allow this task to continue being scheduled. This is useful in pipelines where a long-running
70+
// task would block a short-running task from being scheduled, blocking other related pipelines on
71+
// other machines.
72+
SchedulingOverrides map[string]bool
6073
}
6174

6275
// TaskInterface must be implemented in order to have a task used by harmonytask.
@@ -126,6 +139,9 @@ type TaskEngine struct {
126139
follows map[string][]followStruct
127140
hostAndPort string
128141

142+
// runtime flags
143+
yieldBackground atomic.Bool
144+
129145
// synchronous to the single-threaded poller
130146
lastFollowTime time.Time
131147
lastCleanup atomic.Value
@@ -283,20 +299,24 @@ func (e *TaskEngine) poller() {
283299
nextWait = POLL_DURATION
284300

285301
// Check if the machine is schedulable
286-
schedulable, err := e.schedulable()
302+
schedulable, err := e.checkNodeFlags()
287303
if err != nil {
288304
log.Error("Unable to check schedulable status: ", err)
289305
continue
290306
}
307+
308+
e.yieldBackground.Store(!schedulable)
309+
310+
accepted := e.pollerTryAllWork(schedulable)
311+
if accepted {
312+
nextWait = POLL_NEXT_DURATION
313+
}
314+
291315
if !schedulable {
292316
log.Debugf("Machine %s is not schedulable. Please check the cordon status.", e.hostAndPort)
293317
continue
294318
}
295319

296-
accepted := e.pollerTryAllWork()
297-
if accepted {
298-
nextWait = POLL_NEXT_DURATION
299-
}
300320
if time.Since(e.lastFollowTime) > FOLLOW_FREQUENCY {
301321
e.followWorkInDB()
302322
}
@@ -361,12 +381,40 @@ func (e *TaskEngine) followWorkInDB() {
361381
}
362382

363383
// pollerTryAllWork starts the next 1 task
364-
func (e *TaskEngine) pollerTryAllWork() bool {
384+
func (e *TaskEngine) pollerTryAllWork(schedulable bool) bool {
365385
if time.Since(e.lastCleanup.Load().(time.Time)) > CLEANUP_FREQUENCY {
366386
e.lastCleanup.Store(time.Now())
367387
resources.CleanupMachines(e.ctx, e.db)
368388
}
369389
for _, v := range e.handlers {
390+
if !schedulable {
391+
if v.TaskTypeDetails.SchedulingOverrides == nil {
392+
continue
393+
}
394+
395+
// Override the schedulable flag if the task has any assigned overrides
396+
var foundOverride bool
397+
for relatedTaskName := range v.TaskTypeDetails.SchedulingOverrides {
398+
var assignedOverrideTasks []int
399+
err := e.db.Select(e.ctx, &assignedOverrideTasks, `SELECT id
400+
FROM harmony_task
401+
WHERE owner_id = $1 AND name=$2
402+
ORDER BY update_time LIMIT 1`, e.ownerID, relatedTaskName)
403+
if err != nil {
404+
log.Error("Unable to read assigned overrides ", err)
405+
break
406+
}
407+
if len(assignedOverrideTasks) > 0 {
408+
log.Infow("found override, scheduling despite schedulable=false flag", "ownerID", e.ownerID, "relatedTaskName", relatedTaskName, "assignedOverrideTasks", assignedOverrideTasks)
409+
foundOverride = true
410+
break
411+
}
412+
}
413+
if !foundOverride {
414+
continue
415+
}
416+
}
417+
370418
if err := v.AssertMachineHasCapacity(); err != nil {
371419
log.Debugf("skipped scheduling %s type tasks on due to %s", v.Name, err.Error())
372420
continue
@@ -407,6 +455,11 @@ func (e *TaskEngine) pollerTryAllWork() bool {
407455
log.Warn("Work not accepted for " + strconv.Itoa(len(unownedTasks)) + " " + v.Name + " task(s)")
408456
}
409457
}
458+
459+
if !schedulable {
460+
return false
461+
}
462+
410463
// if no work was accepted, are we bored? Then find work in priority order.
411464
for _, v := range e.handlers {
412465
v := v
@@ -462,15 +515,43 @@ func (e *TaskEngine) Host() string {
462515
return e.hostAndPort
463516
}
464517

465-
func (e *TaskEngine) schedulable() (bool, error) {
518+
func (e *TaskEngine) checkNodeFlags() (bool, error) {
466519
var unschedulable bool
467-
err := e.db.QueryRow(e.ctx, `SELECT unschedulable FROM harmony_machines WHERE host_and_port=$1`, e.hostAndPort).Scan(&unschedulable)
520+
var restartRequest *time.Time
521+
err := e.db.QueryRow(e.ctx, `SELECT unschedulable, restart_request FROM harmony_machines WHERE host_and_port=$1`, e.hostAndPort).Scan(&unschedulable, &restartRequest)
468522
if err != nil {
469523
return false, err
470524
}
525+
526+
if restartRequest != nil {
527+
e.restartIfNoTasksPending(*restartRequest)
528+
}
529+
471530
return !unschedulable, nil
472531
}
473532

533+
func (e *TaskEngine) restartIfNoTasksPending(pendingSince time.Time) {
534+
var tasksPending int
535+
err := e.db.QueryRow(e.ctx, `SELECT COUNT(*) FROM harmony_task WHERE owner_id=$1`, e.ownerID).Scan(&tasksPending)
536+
if err != nil {
537+
log.Error("Unable to check for tasks pending: ", err)
538+
return
539+
}
540+
if tasksPending == 0 {
541+
log.Infow("no tasks pending, restarting", "ownerID", e.ownerID, "pendingSince", pendingSince, "took", time.Since(pendingSince))
542+
543+
// unset the flags first
544+
_, err = e.db.Exec(e.ctx, `UPDATE harmony_machines SET restart_request=NULL, unschedulable=FALSE WHERE host_and_port=$1`, e.hostAndPort)
545+
if err != nil {
546+
log.Error("Unable to unset restart request: ", err)
547+
return
548+
}
549+
550+
// then exit
551+
os.Exit(ExitStatusRestartRequest)
552+
}
553+
}
554+
474555
// About the Registry
475556
// This registry exists for the benefit of "static methods" of TaskInterface extensions.
476557
// For example, GetSPID(db, taskID) (int, err) is a static method that can be called

harmony/harmonytask/metrics.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,21 @@
11
package harmonytask
22

33
import (
4+
"context"
5+
"time"
6+
47
promclient "github.com/prometheus/client_golang/prometheus"
58
"go.opencensus.io/stats"
69
"go.opencensus.io/stats/view"
710
"go.opencensus.io/tag"
11+
12+
curiobuild "github.com/filecoin-project/curio/build"
813
)
914

1015
var (
1116
taskNameTag, _ = tag.NewKey("task_name")
1217
sourceTag, _ = tag.NewKey("source")
18+
versionTag, _ = tag.NewKey("version")
1319
pre = "harmonytask_"
1420

1521
// tasks can be short, but can extend to hours
@@ -18,6 +24,7 @@ var (
1824

1925
// TaskMeasures groups all harmonytask metrics.
2026
var TaskMeasures = struct {
27+
Uptime *stats.Int64Measure
2128
TasksStarted *stats.Int64Measure
2229
TasksCompleted *stats.Int64Measure
2330
TasksFailed *stats.Int64Measure
@@ -29,6 +36,7 @@ var TaskMeasures = struct {
2936
PollerIterations *stats.Int64Measure
3037
AddedTasks *stats.Int64Measure
3138
}{
39+
Uptime: stats.Int64(pre+"uptime", "Total uptime of the node in seconds.", stats.UnitSeconds),
3240
TasksStarted: stats.Int64(pre+"tasks_started", "Total number of tasks started.", stats.UnitDimensionless),
3341
TasksCompleted: stats.Int64(pre+"tasks_completed", "Total number of tasks completed successfully.", stats.UnitDimensionless),
3442
TasksFailed: stats.Int64(pre+"tasks_failed", "Total number of tasks that failed.", stats.UnitDimensionless),
@@ -48,6 +56,11 @@ var TaskMeasures = struct {
4856
// TaskViews groups all harmonytask-related default views.
4957
func init() {
5058
err := view.Register(
59+
&view.View{
60+
Measure: TaskMeasures.Uptime,
61+
Aggregation: view.LastValue(),
62+
TagKeys: []tag.Key{versionTag},
63+
},
5164
&view.View{
5265
Measure: TaskMeasures.TasksStarted,
5366
Aggregation: view.Sum(),
@@ -102,4 +115,20 @@ func init() {
102115
if err != nil {
103116
panic(err)
104117
}
118+
119+
// record uptime every 10 seconds
120+
go func() {
121+
v := curiobuild.UserVersion()
122+
bootTime := time.Now()
123+
124+
for {
125+
time.Sleep(10 * time.Second)
126+
err := stats.RecordWithTags(context.Background(), []tag.Mutator{
127+
tag.Upsert(versionTag, v),
128+
}, TaskMeasures.Uptime.M(int64(time.Since(bootTime).Seconds())))
129+
if err != nil {
130+
log.Errorw("Could not record uptime", "error", err)
131+
}
132+
}
133+
}()
105134
}

harmony/harmonytask/task_type_handler.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
"github.com/filecoin-project/go-state-types/abi"
1717

1818
"github.com/filecoin-project/curio/harmony/harmonydb"
19+
"github.com/filecoin-project/curio/harmony/taskhelp"
1920
)
2021

2122
var log = logging.Logger("harmonytask")
@@ -215,6 +216,13 @@ canAcceptAgain:
215216
}()
216217

217218
done, doErr = h.Do(*tID, func() bool {
219+
if taskhelp.IsBackgroundTask(h.Name) || h.CanYield {
220+
if h.TaskEngine.yieldBackground.Load() {
221+
log.Infow("yielding background task", "name", h.Name, "id", *tID)
222+
return false
223+
}
224+
}
225+
218226
var owner int
219227
// Background here because we don't want GracefulRestart to block this save.
220228
err := h.TaskEngine.db.QueryRow(context.Background(),

harmony/taskhelp/common.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ func SliceIfFound[T any](slice []T, f func(T) bool) ([]T, bool) {
2323
// BackgroundTask are tasks that:
2424
// * Always run in the background
2525
// * Never finish "successfully"
26+
// When a node is cordoned (not schedulable), background tasks MUST yield.
2627
func BackgroundTask(name string) string {
2728
return "bg:" + name
2829
}

tasks/f3/f3_task.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ func (f *F3Task) Do(taskID harmonytask.TaskID, stillOwned func() bool) (done boo
112112
// When participateLoop returns, we go back to get a new ticket
113113
}
114114

115-
return false, xerrors.Errorf("f3 task is background task")
115+
return false, xerrors.Errorf("yield")
116116
}
117117

118118
func (f *F3Task) tryGetF3ParticipationTicket(ctx context.Context, stillOwned func() bool, participant address.Address, previousTicket []byte) (api.F3ParticipationTicket, error) {

tasks/proofshare/task_client_poll.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,8 @@ func (t *TaskClientPoll) Do(taskID harmonytask.TaskID, stillOwned func() bool) (
197197
// TypeDetails implements harmonytask.TaskInterface.
198198
func (t *TaskClientPoll) TypeDetails() harmonytask.TaskTypeDetails {
199199
return harmonytask.TaskTypeDetails{
200-
Name: "PSClientPoll",
200+
Name: "PSClientPoll",
201+
CanYield: true,
201202
Cost: resources.Resources{
202203
Cpu: 0,
203204
Ram: 4 << 20,

0 commit comments

Comments
 (0)