Skip to content

Commit 4d1e592

Browse files
committed
Run alarm without syncing alarm manager if currentTime past local time
Historically, if a SQLite DOs local alarm time in SQLite didn't match the AlarmManager's alarm time, we would reschedule the alarm and try to run the alarm again later. However, if the time we are attempting to sync to is already before the current real system time, there is no point in rescheduling to run later, since we might as well run now. This commit runs the alarm immediately if the current system time is already past the time we are trying to sync the alarm manager to, since it's better to run an alarm a little late rather than very late.
1 parent 7d8cacd commit 4d1e592

File tree

7 files changed

+76
-28
lines changed

7 files changed

+76
-28
lines changed

src/workerd/api/global-scope.c++

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -438,8 +438,9 @@ kj::Promise<WorkerInterface::AlarmResult> ServiceWorkerGlobalScope::runAlarm(kj:
438438
}
439439
}
440440

441+
auto currentTime = kj::systemPreciseCalendarClock().now();
441442
KJ_SWITCH_ONEOF(persistent.armAlarmHandler(
442-
scheduledTime, context.getCurrentTraceSpan(), false, actorId)) {
443+
scheduledTime, context.getCurrentTraceSpan(), currentTime, false, actorId)) {
443444
KJ_CASE_ONEOF(armResult, ActorCacheInterface::RunAlarmHandler) {
444445
auto& handler = KJ_REQUIRE_NONNULL(exportedHandler);
445446
if (handler.alarm == kj::none) {

src/workerd/io/actor-cache-test.c++

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4938,6 +4938,8 @@ KJ_TEST("ActorCache alarm get/put") {
49384938

49394939
auto oneMs = 1 * kj::MILLISECONDS + kj::UNIX_EPOCH;
49404940
auto twoMs = 2 * kj::MILLISECONDS + kj::UNIX_EPOCH;
4941+
// Used as the "current time" parameter for armAlarmHandler in tests.
4942+
auto testCurrentTime = kj::UNIX_EPOCH;
49414943
{
49424944
// Test alarm writes happen transactionally with storage ops
49434945
test.setAlarm(oneMs);
@@ -4979,7 +4981,8 @@ KJ_TEST("ActorCache alarm get/put") {
49794981

49804982
{
49814983
// we have a cached time == nullptr, so we should not attempt to run an alarm
4982-
auto armResult = test.cache.armAlarmHandler(10 * kj::SECONDS + kj::UNIX_EPOCH, nullptr);
4984+
auto armResult =
4985+
test.cache.armAlarmHandler(10 * kj::SECONDS + kj::UNIX_EPOCH, nullptr, testCurrentTime);
49834986
KJ_ASSERT(armResult.is<ActorCache::CancelAlarmHandler>());
49844987
auto cancelResult = kj::mv(armResult.get<ActorCache::CancelAlarmHandler>());
49854988
KJ_ASSERT(cancelResult.waitBeforeCancel.poll(ws));
@@ -4997,7 +5000,7 @@ KJ_TEST("ActorCache alarm get/put") {
49975000
{
49985001
// Test that alarm handler handle clears alarm when dropped with no writes
49995002
{
5000-
auto armResult = test.cache.armAlarmHandler(oneMs, nullptr);
5003+
auto armResult = test.cache.armAlarmHandler(oneMs, nullptr, testCurrentTime);
50015004
KJ_ASSERT(armResult.is<ActorCache::RunAlarmHandler>());
50025005
}
50035006
mockStorage->expectCall("deleteAlarm", ws)
@@ -5010,7 +5013,7 @@ KJ_TEST("ActorCache alarm get/put") {
50105013

50115014
// Test that alarm handler handle does not clear alarm when dropped with writes
50125015
{
5013-
auto armResult = test.cache.armAlarmHandler(oneMs, nullptr);
5016+
auto armResult = test.cache.armAlarmHandler(oneMs, nullptr, testCurrentTime);
50145017
KJ_ASSERT(armResult.is<ActorCache::RunAlarmHandler>());
50155018
test.setAlarm(twoMs);
50165019
}
@@ -5024,7 +5027,7 @@ KJ_TEST("ActorCache alarm get/put") {
50245027

50255028
// Test that alarm handler handle does not cache delete when it fails
50265029
{
5027-
auto armResult = test.cache.armAlarmHandler(oneMs, nullptr);
5030+
auto armResult = test.cache.armAlarmHandler(oneMs, nullptr, testCurrentTime);
50285031
KJ_ASSERT(armResult.is<ActorCache::RunAlarmHandler>());
50295032
}
50305033
mockStorage->expectCall("deleteAlarm", ws)
@@ -5036,7 +5039,7 @@ KJ_TEST("ActorCache alarm get/put") {
50365039
{
50375040
// Test that alarm handler handle does not cache alarm delete when noCache == true
50385041
{
5039-
auto armResult = test.cache.armAlarmHandler(twoMs, nullptr, true);
5042+
auto armResult = test.cache.armAlarmHandler(twoMs, nullptr, testCurrentTime, true);
50405043
KJ_ASSERT(armResult.is<ActorCache::RunAlarmHandler>());
50415044
}
50425045
mockStorage->expectCall("deleteAlarm", ws)
@@ -5073,6 +5076,7 @@ KJ_TEST("ActorCache alarm delete when flush fails") {
50735076
auto& mockStorage = test.mockStorage;
50745077

50755078
auto oneMs = 1 * kj::MILLISECONDS + kj::UNIX_EPOCH;
5079+
auto testCurrentTime = kj::UNIX_EPOCH;
50765080

50775081
{
50785082
auto time = expectUncached(test.getAlarm());
@@ -5090,7 +5094,7 @@ KJ_TEST("ActorCache alarm delete when flush fails") {
50905094
// we want to test that even if a flush is retried
50915095
// that the post-delete actions for a checked delete happen.
50925096
{
5093-
auto handle = test.cache.armAlarmHandler(oneMs, nullptr);
5097+
auto handle = test.cache.armAlarmHandler(oneMs, nullptr, testCurrentTime);
50945098

50955099
auto time = expectCached(test.getAlarm());
50965100
KJ_ASSERT(time == kj::none);

src/workerd/io/actor-cache.c++

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,11 @@ kj::Maybe<kj::Promise<void>> ActorCache::evictStale(kj::Date now) {
164164
}
165165

166166
kj::OneOf<ActorCache::CancelAlarmHandler, ActorCache::RunAlarmHandler> ActorCache::armAlarmHandler(
167-
kj::Date scheduledTime, SpanParent parentSpan, bool noCache, kj::StringPtr actorId) {
167+
kj::Date scheduledTime,
168+
SpanParent parentSpan,
169+
kj::Date /*currentTime -- unused*/,
170+
bool noCache,
171+
kj::StringPtr actorId) {
168172
noCache = noCache || lru.options.noCache;
169173

170174
KJ_ASSERT(!currentAlarmTime.is<DeferredAlarmDelete>());

src/workerd/io/actor-cache.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,8 +243,12 @@ class ActorCacheInterface: public ActorCacheOps {
243243
};
244244

245245
// Call when entering the alarm handler.
246+
//
247+
// `currentTime` is used to determine if an overdue alarm should run immediately even when
248+
// the local alarm state differs from the scheduled time (to avoid blocking on storage sync).
246249
virtual kj::OneOf<CancelAlarmHandler, RunAlarmHandler> armAlarmHandler(kj::Date scheduledTime,
247250
SpanParent parentSpan,
251+
kj::Date currentTime,
248252
bool noCache = false,
249253
kj::StringPtr actorId = "") = 0;
250254

@@ -363,6 +367,7 @@ class ActorCache final: public ActorCacheInterface {
363367

364368
kj::OneOf<CancelAlarmHandler, RunAlarmHandler> armAlarmHandler(kj::Date scheduledTime,
365369
SpanParent parentSpan,
370+
kj::Date currentTime,
366371
bool noCache = false,
367372
kj::StringPtr actorId = "") override;
368373
void cancelDeferredAlarmDeletion() override;

src/workerd/io/actor-sqlite-test.c++

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@ static constexpr kj::Date twoMs = 2 * kj::MILLISECONDS + kj::UNIX_EPOCH;
2020
static constexpr kj::Date threeMs = 3 * kj::MILLISECONDS + kj::UNIX_EPOCH;
2121
static constexpr kj::Date fourMs = 4 * kj::MILLISECONDS + kj::UNIX_EPOCH;
2222
static constexpr kj::Date fiveMs = 5 * kj::MILLISECONDS + kj::UNIX_EPOCH;
23+
// Used as the "current time" parameter for armAlarmHandler in tests.
24+
// Set to epoch (before all test alarm times) so existing tests aren't affected by
25+
// the overdue alarm check.
26+
static constexpr kj::Date testCurrentTime = kj::UNIX_EPOCH;
2327

2428
template <typename T>
2529
kj::Promise<T> eagerlyReportExceptions(kj::Promise<T> promise, kj::SourceLocation location = {}) {
@@ -586,7 +590,7 @@ KJ_TEST("tells alarm handler to cancel when committed alarm is empty") {
586590
ActorSqliteTest test;
587591

588592
{
589-
auto armResult = test.actor.armAlarmHandler(oneMs, nullptr);
593+
auto armResult = test.actor.armAlarmHandler(oneMs, nullptr, testCurrentTime);
590594
// We expect armAlarmHandler() to tell us to cancel the alarm.
591595
KJ_ASSERT(armResult.is<ActorCache::CancelAlarmHandler>());
592596
auto waitPromise = kj::mv(armResult.get<ActorCache::CancelAlarmHandler>().waitBeforeCancel);
@@ -612,7 +616,7 @@ KJ_TEST("tells alarm handler to reschedule when handler alarm is later than comm
612616
KJ_ASSERT(expectSync(test.getAlarm()) == oneMs);
613617

614618
// Request handler run at 2ms. Expect cancellation with rescheduling.
615-
auto armResult = test.actor.armAlarmHandler(twoMs, nullptr);
619+
auto armResult = test.actor.armAlarmHandler(twoMs, nullptr, testCurrentTime);
616620
KJ_ASSERT(armResult.is<ActorSqlite::CancelAlarmHandler>());
617621
auto cancelResult = kj::mv(armResult.get<ActorSqlite::CancelAlarmHandler>());
618622

@@ -636,7 +640,7 @@ KJ_TEST("tells alarm handler to reschedule when handler alarm is earlier than co
636640
KJ_ASSERT(expectSync(test.getAlarm()) == twoMs);
637641

638642
// Expect that armAlarmHandler() tells caller to cancel after rescheduling completes.
639-
auto armResult = test.actor.armAlarmHandler(oneMs, nullptr);
643+
auto armResult = test.actor.armAlarmHandler(oneMs, nullptr, testCurrentTime);
640644
KJ_ASSERT(armResult.is<ActorSqlite::CancelAlarmHandler>());
641645
auto cancelResult = kj::mv(armResult.get<ActorSqlite::CancelAlarmHandler>());
642646

@@ -661,7 +665,7 @@ KJ_TEST("does not cancel handler when local db alarm state is later than schedul
661665

662666
test.setAlarm(twoMs);
663667
{
664-
auto armResult = test.actor.armAlarmHandler(oneMs, nullptr);
668+
auto armResult = test.actor.armAlarmHandler(oneMs, nullptr, testCurrentTime);
665669
KJ_ASSERT(armResult.is<ActorSqlite::RunAlarmHandler>());
666670
}
667671
test.pollAndExpectCalls({"commit"})[0]->fulfill();
@@ -680,7 +684,7 @@ KJ_TEST("does not cancel handler when local db alarm state is earlier than sched
680684

681685
test.setAlarm(oneMs);
682686
{
683-
auto armResult = test.actor.armAlarmHandler(twoMs, nullptr);
687+
auto armResult = test.actor.armAlarmHandler(twoMs, nullptr, testCurrentTime);
684688
KJ_ASSERT(armResult.is<ActorSqlite::RunAlarmHandler>());
685689
}
686690
test.pollAndExpectCalls({"scheduleRun(1ms)"})[0]->fulfill();
@@ -698,7 +702,7 @@ KJ_TEST("getAlarm() returns null during handler") {
698702
KJ_ASSERT(expectSync(test.getAlarm()) == oneMs);
699703

700704
{
701-
auto armResult = test.actor.armAlarmHandler(oneMs, nullptr);
705+
auto armResult = test.actor.armAlarmHandler(oneMs, nullptr, testCurrentTime);
702706
KJ_ASSERT(armResult.is<ActorSqlite::RunAlarmHandler>());
703707
test.pollAndExpectCalls({});
704708

@@ -719,7 +723,7 @@ KJ_TEST("alarm handler handle clears alarm when dropped with no writes") {
719723
KJ_ASSERT(expectSync(test.getAlarm()) == oneMs);
720724

721725
{
722-
auto armResult = test.actor.armAlarmHandler(oneMs, nullptr);
726+
auto armResult = test.actor.armAlarmHandler(oneMs, nullptr, testCurrentTime);
723727
KJ_ASSERT(armResult.is<ActorSqlite::RunAlarmHandler>());
724728
}
725729
test.pollAndExpectCalls({"commit"})[0]->fulfill();
@@ -738,7 +742,7 @@ KJ_TEST("alarm deleter does not clear alarm when dropped with writes") {
738742
KJ_ASSERT(expectSync(test.getAlarm()) == oneMs);
739743

740744
{
741-
auto armResult = test.actor.armAlarmHandler(oneMs, nullptr);
745+
auto armResult = test.actor.armAlarmHandler(oneMs, nullptr, testCurrentTime);
742746
KJ_ASSERT(armResult.is<ActorSqlite::RunAlarmHandler>());
743747
test.setAlarm(twoMs);
744748
}
@@ -759,7 +763,7 @@ KJ_TEST("can cancel deferred alarm deletion during handler") {
759763
KJ_ASSERT(expectSync(test.getAlarm()) == oneMs);
760764

761765
{
762-
auto armResult = test.actor.armAlarmHandler(oneMs, nullptr);
766+
auto armResult = test.actor.armAlarmHandler(oneMs, nullptr, testCurrentTime);
763767
KJ_ASSERT(armResult.is<ActorSqlite::RunAlarmHandler>());
764768
test.actor.cancelDeferredAlarmDeletion();
765769
}
@@ -778,7 +782,7 @@ KJ_TEST("canceling deferred alarm deletion outside handler has no effect") {
778782
KJ_ASSERT(expectSync(test.getAlarm()) == oneMs);
779783

780784
{
781-
auto armResult = test.actor.armAlarmHandler(oneMs, nullptr);
785+
auto armResult = test.actor.armAlarmHandler(oneMs, nullptr, testCurrentTime);
782786
KJ_ASSERT(armResult.is<ActorSqlite::RunAlarmHandler>());
783787
}
784788
test.pollAndExpectCalls({"commit"})[0]->fulfill();
@@ -803,7 +807,7 @@ KJ_TEST("canceling deferred alarm deletion outside handler edge case") {
803807
KJ_ASSERT(expectSync(test.getAlarm()) == oneMs);
804808

805809
{
806-
auto armResult = test.actor.armAlarmHandler(oneMs, nullptr);
810+
auto armResult = test.actor.armAlarmHandler(oneMs, nullptr, testCurrentTime);
807811
KJ_ASSERT(armResult.is<ActorSqlite::RunAlarmHandler>());
808812
}
809813
test.actor.cancelDeferredAlarmDeletion();
@@ -825,7 +829,7 @@ KJ_TEST("canceling deferred alarm deletion is idempotent") {
825829
KJ_ASSERT(expectSync(test.getAlarm()) == oneMs);
826830

827831
{
828-
auto armResult = test.actor.armAlarmHandler(oneMs, nullptr);
832+
auto armResult = test.actor.armAlarmHandler(oneMs, nullptr, testCurrentTime);
829833
KJ_ASSERT(armResult.is<ActorSqlite::RunAlarmHandler>());
830834
test.actor.cancelDeferredAlarmDeletion();
831835
test.actor.cancelDeferredAlarmDeletion();
@@ -846,7 +850,7 @@ KJ_TEST("alarm handler cleanup succeeds when output gate is broken") {
846850
test.pollAndExpectCalls({});
847851
KJ_ASSERT(expectSync(test.getAlarm()) == oneMs);
848852

849-
auto armResult = test.actor.armAlarmHandler(oneMs, nullptr);
853+
auto armResult = test.actor.armAlarmHandler(oneMs, nullptr, testCurrentTime);
850854
KJ_ASSERT(armResult.is<ActorSqlite::RunAlarmHandler>());
851855
auto deferredDelete = kj::mv(armResult.get<ActorSqlite::RunAlarmHandler>().deferredDelete);
852856

@@ -893,7 +897,7 @@ KJ_TEST("handler alarm is not deleted when commit fails") {
893897
KJ_ASSERT(expectSync(test.getAlarm()) == oneMs);
894898

895899
{
896-
auto armResult = test.actor.armAlarmHandler(oneMs, nullptr);
900+
auto armResult = test.actor.armAlarmHandler(oneMs, nullptr, testCurrentTime);
897901
KJ_ASSERT(armResult.is<ActorSqlite::RunAlarmHandler>());
898902

899903
KJ_ASSERT(expectSync(test.getAlarm()) == kj::none);
@@ -1340,7 +1344,7 @@ KJ_TEST("rolling back transaction leaves deferred alarm deletion in expected sta
13401344
KJ_ASSERT(expectSync(test.getAlarm()) == twoMs);
13411345

13421346
{
1343-
auto armResult = test.actor.armAlarmHandler(twoMs, nullptr);
1347+
auto armResult = test.actor.armAlarmHandler(twoMs, nullptr, testCurrentTime);
13441348
KJ_ASSERT(armResult.is<ActorSqlite::RunAlarmHandler>());
13451349

13461350
auto txn = test.actor.startTransaction();
@@ -1373,7 +1377,7 @@ KJ_TEST("committing transaction leaves deferred alarm deletion in expected state
13731377
KJ_ASSERT(expectSync(test.getAlarm()) == twoMs);
13741378

13751379
{
1376-
auto armResult = test.actor.armAlarmHandler(twoMs, nullptr);
1380+
auto armResult = test.actor.armAlarmHandler(twoMs, nullptr, testCurrentTime);
13771381
KJ_ASSERT(armResult.is<ActorSqlite::RunAlarmHandler>());
13781382

13791383
auto txn = test.actor.startTransaction();
@@ -1404,7 +1408,7 @@ KJ_TEST("rolling back nested transaction leaves deferred alarm deletion in expec
14041408
KJ_ASSERT(expectSync(test.getAlarm()) == twoMs);
14051409

14061410
{
1407-
auto armResult = test.actor.armAlarmHandler(twoMs, nullptr);
1411+
auto armResult = test.actor.armAlarmHandler(twoMs, nullptr, testCurrentTime);
14081412
KJ_ASSERT(armResult.is<ActorSqlite::RunAlarmHandler>());
14091413

14101414
auto txn1 = test.actor.startTransaction();

src/workerd/io/actor-sqlite.c++

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -863,8 +863,11 @@ void ActorSqlite::shutdown(kj::Maybe<const kj::Exception&> maybeException) {
863863
}
864864

865865
kj::OneOf<ActorSqlite::CancelAlarmHandler, ActorSqlite::RunAlarmHandler> ActorSqlite::
866-
armAlarmHandler(
867-
kj::Date scheduledTime, SpanParent parentSpan, bool noCache, kj::StringPtr actorId) {
866+
armAlarmHandler(kj::Date scheduledTime,
867+
SpanParent parentSpan,
868+
kj::Date currentTime,
869+
bool noCache,
870+
kj::StringPtr actorId) {
868871
KJ_ASSERT(!inAlarmHandler);
869872

870873
if (haveDeferredDelete) {
@@ -879,6 +882,19 @@ kj::OneOf<ActorSqlite::CancelAlarmHandler, ActorSqlite::RunAlarmHandler> ActorSq
879882
// If there's a clean db time that differs from the requested handler's scheduled time, this
880883
// run should be canceled.
881884
if (willFireEarlier(scheduledTime, localAlarmState)) {
885+
// If the local alarm time is already in the past, just run the handler now. This avoids
886+
// blocking alarm execution on storage sync when storage is overloaded. The alarm will
887+
// either delete itself on success or reschedule on failure.
888+
if (localAlarmState != kj::none && willFireEarlier(localAlarmState, currentTime)) {
889+
LOG_WARNING_PERIODICALLY(
890+
"NOSENTRY SQLite alarm overdue, running despite AlarmManager mismatch", scheduledTime,
891+
KJ_ASSERT_NONNULL(localAlarmState), currentTime, actorId);
892+
haveDeferredDelete = true;
893+
inAlarmHandler = true;
894+
static const DeferredAlarmDeleter disposer;
895+
return RunAlarmHandler{.deferredDelete = kj::Own<void>(this, disposer)};
896+
}
897+
882898
// If the handler's scheduled time is earlier than the clean scheduled time, we may be
883899
// recovering from a failed db commit or scheduling request, so we need to request that
884900
// the alarm be rescheduled for the current db time, and tell the caller to wait for
@@ -909,10 +925,23 @@ kj::OneOf<ActorSqlite::CancelAlarmHandler, ActorSqlite::RunAlarmHandler> ActorSq
909925
// which suggests that either the alarm manager is working with stale data or that local
910926
// alarm time has somehow gotten out of sync with the scheduled alarm time.
911927

912-
// Only log if the alarm manager is significantly late (>10 seconds behind SQLite)
913928
// We know localAlarmState has a value here because we're in the branch where it's earlier
914929
// than scheduledTime (not equal, and not later).
915930
auto localTime = KJ_ASSERT_NONNULL(localAlarmState);
931+
932+
// If the local alarm time is already in the past, just run the handler now. This avoids
933+
// blocking alarm execution on storage sync when storage is overloaded.
934+
if (localTime <= currentTime) {
935+
LOG_WARNING_PERIODICALLY(
936+
"NOSENTRY SQLite alarm overdue, running despite stale AlarmManager time",
937+
scheduledTime, localTime, currentTime, actorId);
938+
haveDeferredDelete = true;
939+
inAlarmHandler = true;
940+
static const DeferredAlarmDeleter disposer;
941+
return RunAlarmHandler{.deferredDelete = kj::Own<void>(this, disposer)};
942+
}
943+
944+
// Only log if the alarm manager is significantly late (>10 seconds behind SQLite)
916945
if (scheduledTime - localTime > 10 * kj::SECONDS) {
917946
LOG_WARNING_PERIODICALLY(
918947
"NOSENTRY SQLite alarm handler canceled.", scheduledTime, actorId, localTime);

src/workerd/io/actor-sqlite.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ class ActorSqlite final: public ActorCacheInterface, private kj::TaskSet::ErrorH
9696
void shutdown(kj::Maybe<const kj::Exception&> maybeException) override;
9797
kj::OneOf<CancelAlarmHandler, RunAlarmHandler> armAlarmHandler(kj::Date scheduledTime,
9898
SpanParent parentSpan,
99+
kj::Date currentTime,
99100
bool noCache = false,
100101
kj::StringPtr actorId = "") override;
101102
void cancelDeferredAlarmDeletion() override;

0 commit comments

Comments
 (0)