From ff1afac7b6bcaa6e9da550238e769f20208beafa Mon Sep 17 00:00:00 2001 From: MasterPtato Date: Thu, 8 Jan 2026 15:59:39 -0800 Subject: [PATCH] fix: remove pending actors metric, fix actor error tracker, engine runner error print --- .../dev-host/grafana/dashboards/guard.json | 2 +- .../core/grafana/dashboards/guard.json | 2 +- .../core/grafana/dashboards/guard.json | 2 +- .../grafana/dashboards/guard.json | 2 +- .../docker/dev/grafana/dashboards/guard.json | 2 +- .../template/grafana-dashboards/guard.json | 2 +- engine/packages/pegboard-runner/src/conn.rs | 1 + .../pegboard/src/workflows/actor/mod.rs | 10 +++++----- .../packages/pegboard/src/workflows/runner.rs | 12 ++---------- .../packages/pegboard/src/workflows/runner2.rs | 18 ++---------------- engine/sdks/typescript/runner/src/mod.ts | 2 +- 11 files changed, 17 insertions(+), 38 deletions(-) diff --git a/engine/docker/dev-host/grafana/dashboards/guard.json b/engine/docker/dev-host/grafana/dashboards/guard.json index 7c87e69c0b..833e81c4c8 100644 --- a/engine/docker/dev-host/grafana/dashboards/guard.json +++ b/engine/docker/dev-host/grafana/dashboards/guard.json @@ -1324,7 +1324,7 @@ }, "timepicker": {}, "timezone": "browser", - "title": "Rivet Guard", + "title": "Guard", "uid": "cen785ige8fswd", "version": 3 } \ No newline at end of file diff --git a/engine/docker/dev-multidc-multinode/core/grafana/dashboards/guard.json b/engine/docker/dev-multidc-multinode/core/grafana/dashboards/guard.json index 7c87e69c0b..833e81c4c8 100644 --- a/engine/docker/dev-multidc-multinode/core/grafana/dashboards/guard.json +++ b/engine/docker/dev-multidc-multinode/core/grafana/dashboards/guard.json @@ -1324,7 +1324,7 @@ }, "timepicker": {}, "timezone": "browser", - "title": "Rivet Guard", + "title": "Guard", "uid": "cen785ige8fswd", "version": 3 } \ No newline at end of file diff --git a/engine/docker/dev-multidc/core/grafana/dashboards/guard.json b/engine/docker/dev-multidc/core/grafana/dashboards/guard.json index 7c87e69c0b..833e81c4c8 100644 --- a/engine/docker/dev-multidc/core/grafana/dashboards/guard.json +++ b/engine/docker/dev-multidc/core/grafana/dashboards/guard.json @@ -1324,7 +1324,7 @@ }, "timepicker": {}, "timezone": "browser", - "title": "Rivet Guard", + "title": "Guard", "uid": "cen785ige8fswd", "version": 3 } \ No newline at end of file diff --git a/engine/docker/dev-multinode/grafana/dashboards/guard.json b/engine/docker/dev-multinode/grafana/dashboards/guard.json index 7c87e69c0b..833e81c4c8 100644 --- a/engine/docker/dev-multinode/grafana/dashboards/guard.json +++ b/engine/docker/dev-multinode/grafana/dashboards/guard.json @@ -1324,7 +1324,7 @@ }, "timepicker": {}, "timezone": "browser", - "title": "Rivet Guard", + "title": "Guard", "uid": "cen785ige8fswd", "version": 3 } \ No newline at end of file diff --git a/engine/docker/dev/grafana/dashboards/guard.json b/engine/docker/dev/grafana/dashboards/guard.json index 7c87e69c0b..833e81c4c8 100644 --- a/engine/docker/dev/grafana/dashboards/guard.json +++ b/engine/docker/dev/grafana/dashboards/guard.json @@ -1324,7 +1324,7 @@ }, "timepicker": {}, "timezone": "browser", - "title": "Rivet Guard", + "title": "Guard", "uid": "cen785ige8fswd", "version": 3 } \ No newline at end of file diff --git a/engine/docker/template/grafana-dashboards/guard.json b/engine/docker/template/grafana-dashboards/guard.json index 7c87e69c0b..833e81c4c8 100644 --- a/engine/docker/template/grafana-dashboards/guard.json +++ b/engine/docker/template/grafana-dashboards/guard.json @@ -1324,7 +1324,7 @@ }, "timepicker": {}, "timezone": "browser", - "title": "Rivet Guard", + "title": "Guard", "uid": "cen785ige8fswd", "version": 3 } \ No newline at end of file diff --git a/engine/packages/pegboard-runner/src/conn.rs b/engine/packages/pegboard-runner/src/conn.rs index 6de4f83618..9c928bcb07 100644 --- a/engine/packages/pegboard-runner/src/conn.rs +++ b/engine/packages/pegboard-runner/src/conn.rs @@ -254,6 +254,7 @@ impl Init { } } +#[tracing::instrument(skip_all)] pub async fn handle_init( ctx: &StandaloneCtx, conn: &Conn, diff --git a/engine/packages/pegboard/src/workflows/actor/mod.rs b/engine/packages/pegboard/src/workflows/actor/mod.rs index d6b776170d..f1ed5da03a 100644 --- a/engine/packages/pegboard/src/workflows/actor/mod.rs +++ b/engine/packages/pegboard/src/workflows/actor/mod.rs @@ -1058,11 +1058,11 @@ async fn handle_stopped( // Set Crashed failure reason for actual crashes. // Runner failure reasons are already set at the start of handle_stopped. - if let StoppedVariant::Normal { code, message } = &variant { - ensure!( - *code != protocol::mk2::StopCode::Ok, - "expected non-Ok stop code in crash handler, got Ok" - ); + if let StoppedVariant::Normal { + code: protocol::mk2::StopCode::Error, + message, + } = &variant + { ctx.v(3) .activity(runtime::SetFailureReasonInput { failure_reason: FailureReason::Crashed { diff --git a/engine/packages/pegboard/src/workflows/runner.rs b/engine/packages/pegboard/src/workflows/runner.rs index 7420d2103a..84b8fc7a0b 100644 --- a/engine/packages/pegboard/src/workflows/runner.rs +++ b/engine/packages/pegboard/src/workflows/runner.rs @@ -971,7 +971,7 @@ pub(crate) async fn allocate_pending_actors( let runner_eligible_threshold = ctx.config().pegboard().runner_eligible_threshold(); // NOTE: This txn should closely resemble the one found in the allocate_actor activity of the actor wf - let (allocations, pending_actor_count) = ctx + let allocations = ctx .udb()? .run(|tx| async move { let start = Instant::now(); @@ -993,7 +993,6 @@ pub(crate) async fn allocate_pending_actors( // the one we choose Snapshot, ); - let mut pending_actor_count = 0; let ping_threshold_ts = util::timestamp::now() - runner_eligible_threshold; 'queue_loop: loop { @@ -1006,8 +1005,6 @@ pub(crate) async fn allocate_pending_actors( break; }; - pending_actor_count += 1; - let (queue_key, generation) = tx.read_entry::(&queue_entry)?; @@ -1123,20 +1120,15 @@ pub(crate) async fn allocate_pending_actors( }, }); - pending_actor_count -= 1; continue 'queue_loop; } } - Ok((allocations, pending_actor_count)) + Ok(allocations) }) .custom_instrument(tracing::info_span!("runner_allocate_pending_actors_tx")) .await?; - metrics::ACTOR_PENDING_ALLOCATION - .with_label_values(&[&input.namespace_id.to_string(), &input.name.to_string()]) - .set(pending_actor_count as i64); - Ok(AllocatePendingActorsOutput { allocations }) } diff --git a/engine/packages/pegboard/src/workflows/runner2.rs b/engine/packages/pegboard/src/workflows/runner2.rs index c158f3ba09..8479f4b0b6 100644 --- a/engine/packages/pegboard/src/workflows/runner2.rs +++ b/engine/packages/pegboard/src/workflows/runner2.rs @@ -649,7 +649,7 @@ pub(crate) async fn allocate_pending_actors( let runner_eligible_threshold = ctx.config().pegboard().runner_eligible_threshold(); // NOTE: This txn should closely resemble the one found in the allocate_actor activity of the actor wf - let (allocations, pending_actor_count) = ctx + let allocations = ctx .udb()? .run(|tx| async move { let start = Instant::now(); @@ -671,7 +671,6 @@ pub(crate) async fn allocate_pending_actors( // the one we choose Snapshot, ); - let mut pending_actor_count = 0; let ping_threshold_ts = util::timestamp::now() - runner_eligible_threshold; 'queue_loop: loop { @@ -684,8 +683,6 @@ pub(crate) async fn allocate_pending_actors( break; }; - pending_actor_count += 1; - let (queue_key, generation) = tx.read_entry::(&queue_entry)?; @@ -801,20 +798,15 @@ pub(crate) async fn allocate_pending_actors( }, }); - pending_actor_count -= 1; continue 'queue_loop; } } - Ok((allocations, pending_actor_count)) + Ok(allocations) }) .custom_instrument(tracing::info_span!("runner_allocate_pending_actors_tx")) .await?; - metrics::ACTOR_PENDING_ALLOCATION - .with_label_values(&[&input.namespace_id.to_string(), &input.name.to_string()]) - .set(pending_actor_count as i64); - Ok(AllocatePendingActorsOutput { allocations }) } @@ -856,12 +848,6 @@ async fn drain_older_versions( ctx: &ActivityCtx, input: &DrainOlderVersionsInput, ) -> Result { - tracing::info!( - namespace_id = %input.namespace_id, - name = %input.name, - version = input.version, - "drain_older_versions activity called" - ); ctx.op(crate::ops::runner::drain::Input { namespace_id: input.namespace_id, name: input.name.clone(), diff --git a/engine/sdks/typescript/runner/src/mod.ts b/engine/sdks/typescript/runner/src/mod.ts index b6b60d3feb..490100fee5 100644 --- a/engine/sdks/typescript/runner/src/mod.ts +++ b/engine/sdks/typescript/runner/src/mod.ts @@ -886,7 +886,7 @@ export class Runner { ws.addEventListener("error", (ev) => { this.log?.error({ - msg: `WebSocket error: ${ev.error}`, + msg: `WebSocket error: ${stringifyError(ev.error)}`, }); if (!this.#shutdown) {