From 4fb4831f92541439b2b041c0e2a684770bca8a68 Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Thu, 2 Oct 2025 15:08:22 +0100 Subject: [PATCH 1/9] add achedular --- .../query/live/collection-config-builder.ts | 69 ++++++++++++- .../src/query/live/collection-subscriber.ts | 24 +++-- packages/db/src/schedular.ts | 97 +++++++++++++++++++ packages/db/src/transactions.ts | 3 + 4 files changed, 185 insertions(+), 8 deletions(-) create mode 100644 packages/db/src/schedular.ts diff --git a/packages/db/src/query/live/collection-config-builder.ts b/packages/db/src/query/live/collection-config-builder.ts index a81d1df2a..fdd863de1 100644 --- a/packages/db/src/query/live/collection-config-builder.ts +++ b/packages/db/src/query/live/collection-config-builder.ts @@ -1,6 +1,8 @@ import { D2, output } from "@tanstack/db-ivm" import { compileQuery } from "../compiler/index.js" import { buildQuery, getQueryIR } from "../builder/index.js" +import { transactionScopedScheduler } from "../../schedular.js" +import { getActiveTransaction } from "../../transactions.js" import { CollectionSubscriber } from "./collection-subscriber.js" import type { CollectionSubscription } from "../../collection/subscription.js" import type { RootStreamBuilder } from "@tanstack/db-ivm" @@ -22,6 +24,12 @@ import type { SyncState, } from "./types.js" +type PendingGraphRun = { + config: Parameters[`sync`]>[0] + syncState: FullSyncState + loadCallbacks: Set<() => boolean> +} + // Global counter for auto-generated collection IDs let liveQueryCollectionCounter = 0 @@ -176,6 +184,65 @@ export class CollectionConfigBuilder< } } + scheduleGraphRun( + config: Parameters[`sync`]>[0], + syncState: FullSyncState, + callback?: () => boolean + ) { + const activeTransaction = getActiveTransaction() + const contextId = activeTransaction?.id + + const createEntry = () => { + const state: PendingGraphRun = { + config, + syncState, + loadCallbacks: new Set(), + } + + if (callback) { + state.loadCallbacks.add(callback) + } + + return { + state, + run: () => { + const combinedLoader = + state.loadCallbacks.size > 0 + ? () => { + let allDone = true + for (const loader of state.loadCallbacks) { + const result = loader() + if (result === false) { + allDone = false + } + } + + return allDone + } + : undefined + + this.maybeRunGraph(state.config, state.syncState, combinedLoader) + }, + } + } + + const updateEntry = (entry: { state: PendingGraphRun }) => { + entry.state.config = config + entry.state.syncState = syncState + + if (callback) { + entry.state.loadCallbacks.add(callback) + } + } + + transactionScopedScheduler.schedule({ + contextId, + jobId: this, + createEntry, + updateEntry, + }) + } + private getSyncConfig(): SyncConfig { return { rowUpdateMode: `full`, @@ -202,7 +269,7 @@ export class CollectionConfigBuilder< ) // Initial run with callback to load more data if needed - this.maybeRunGraph(config, fullSyncState, loadMoreDataCallbacks) + this.scheduleGraphRun(config, fullSyncState, loadMoreDataCallbacks) // Return the unsubscribe function return () => { diff --git a/packages/db/src/query/live/collection-subscriber.ts b/packages/db/src/query/live/collection-subscriber.ts index d196ecd77..deb5ac01a 100644 --- a/packages/db/src/query/live/collection-subscriber.ts +++ b/packages/db/src/query/live/collection-subscriber.ts @@ -11,6 +11,8 @@ import type { OrderByOptimizationInfo } from "../compiler/order-by.js" import type { CollectionConfigBuilder } from "./collection-config-builder.js" import type { CollectionSubscription } from "../../collection/subscription.js" +const loadMoreCallbackSymbol = Symbol(`tanstack.db.loadMore`) + export class CollectionSubscriber< TContext extends Context, TResult extends object = GetResult, @@ -85,10 +87,10 @@ export class CollectionSubscriber< // otherwise we end up in an infinite loop trying to load more data const dataLoader = sentChanges > 0 ? callback : undefined - // We need to call `maybeRunGraph` even if there's no data to load + // We need to schedule a graph run even if there's no data to load // because we need to mark the collection as ready if it's not already - // and that's only done in `maybeRunGraph` - this.collectionConfigBuilder.maybeRunGraph( + // and that's only done during the graph execution + this.collectionConfigBuilder.scheduleGraphRun( this.config, this.syncState, dataLoader @@ -201,10 +203,18 @@ export class CollectionSubscriber< } const trackedChanges = this.trackSentValues(changes, orderByInfo.comparator) - this.sendChangesToPipeline( - trackedChanges, - this.loadMoreIfNeeded.bind(this, subscription) - ) + type SubscriptionWithLoader = CollectionSubscription & { + [loadMoreCallbackSymbol]?: () => boolean + } + + const subscriptionWithLoader = subscription as SubscriptionWithLoader + + const boundLoader = + subscriptionWithLoader[loadMoreCallbackSymbol] ?? + (subscriptionWithLoader[loadMoreCallbackSymbol] = + this.loadMoreIfNeeded.bind(this, subscription)) + + this.sendChangesToPipeline(trackedChanges, boundLoader) } // Loads the next `n` items from the collection diff --git a/packages/db/src/schedular.ts b/packages/db/src/schedular.ts new file mode 100644 index 000000000..907d11b5d --- /dev/null +++ b/packages/db/src/schedular.ts @@ -0,0 +1,97 @@ +export type SchedulerContextId = string | symbol + +interface SchedulerEntry { + state: TState + run: () => void +} + +interface ScheduleOptions { + contextId?: SchedulerContextId + jobId: unknown + createEntry: () => SchedulerEntry + updateEntry?: (entry: SchedulerEntry) => void +} + +/** + * Basic scoped scheduler that coalesces work by context and job. + * + * - A context (e.g. a transaction id) scopes work execution: + * queued jobs for the same context run together when that context is flushed. + * - A job id (e.g. a specific CollectionConfigBuilder) dedupes work within a context. + * + * Callers provide an entry factory so state objects can be mutated across schedules, + * and optionally an updater to merge new data into an existing entry. + */ +export class Scheduler { + private contexts = new Map< + SchedulerContextId, + Map> + >() + + schedule({ + contextId, + jobId, + createEntry, + updateEntry, + }: ScheduleOptions): void { + if (typeof contextId === `undefined`) { + const entry = createEntry() + updateEntry?.(entry) + entry.run() + return + } + + let context = this.contexts.get(contextId) + if (!context) { + context = new Map() + this.contexts.set(contextId, context) + } + + let entry = context.get(jobId) as SchedulerEntry | undefined + if (!entry) { + entry = createEntry() + context.set(jobId, entry) + } + + updateEntry?.(entry) + } + + flush(contextId: SchedulerContextId): void { + const context = this.contexts.get(contextId) + if (!context) return + + this.contexts.delete(contextId) + + for (const entry of context.values()) { + entry.run() + } + } + + flushAll(): void { + const contexts = Array.from(this.contexts.keys()) + contexts.forEach((contextId) => { + const context = this.contexts.get(contextId) + if (!context) return + this.contexts.delete(contextId) + for (const entry of context.values()) { + entry.run() + } + }) + } + + clear(contextId: SchedulerContextId): void { + this.contexts.delete(contextId) + } + + clearJob(contextId: SchedulerContextId, jobId: unknown): void { + const context = this.contexts.get(contextId) + if (!context) return + + context.delete(jobId) + if (context.size === 0) { + this.contexts.delete(contextId) + } + } +} + +export const transactionScopedScheduler = new Scheduler() diff --git a/packages/db/src/transactions.ts b/packages/db/src/transactions.ts index 8d4500c25..c44e22c2d 100644 --- a/packages/db/src/transactions.ts +++ b/packages/db/src/transactions.ts @@ -5,6 +5,7 @@ import { TransactionNotPendingCommitError, TransactionNotPendingMutateError, } from "./errors" +import { transactionScopedScheduler } from "./schedular.js" import type { Deferred } from "./deferred" import type { MutationFn, @@ -179,10 +180,12 @@ export function getActiveTransaction(): Transaction | undefined { } function registerTransaction(tx: Transaction) { + transactionScopedScheduler.clear(tx.id) transactionStack.push(tx) } function unregisterTransaction(tx: Transaction) { + transactionScopedScheduler.flush(tx.id) transactionStack = transactionStack.filter((t) => t.id !== tx.id) } From 0f359f7b50b39c7a1c54f4b8ed10bf1708026bb4 Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Thu, 2 Oct 2025 15:09:25 +0100 Subject: [PATCH 2/9] add test --- packages/db/tests/query/schedular.test.ts | 89 +++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 packages/db/tests/query/schedular.test.ts diff --git a/packages/db/tests/query/schedular.test.ts b/packages/db/tests/query/schedular.test.ts new file mode 100644 index 000000000..deea63913 --- /dev/null +++ b/packages/db/tests/query/schedular.test.ts @@ -0,0 +1,89 @@ +import { describe, expect, it } from "vitest" +import { createCollection } from "../../src/collection/index.js" +import { createLiveQueryCollection, eq } from "../../src/query/index.js" +import { createTransaction } from "../../src/transactions.js" + +interface User { + id: number + name: string +} + +interface Task { + id: number + userId: number + title: string +} + +describe(`live query scheduler`, () => { + it(`runs the live query graph once per transaction that touches multiple collections`, async () => { + const users = createCollection({ + id: `test-users`, + getKey: (user) => user.id, + startSync: true, + sync: { + sync: ({ begin, commit, markReady }) => { + begin() + commit() + markReady() + }, + }, + }) + + const tasks = createCollection({ + id: `test-tasks`, + getKey: (task) => task.id, + startSync: true, + sync: { + sync: ({ begin, commit, markReady }) => { + begin() + commit() + markReady() + }, + }, + }) + + const assignments = createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ user: users }) + .join({ task: tasks }, ({ user, task }) => eq(user.id, task.userId)) + .select(({ user, task }) => ({ + userId: user.id, + taskId: task?.id, + title: task?.title, + })), + }) + + await assignments.preload() + + const batches: Array> = [] + const subscription = assignments.subscribeChanges((changes) => { + batches.push(changes) + }) + + const transaction = createTransaction({ + mutationFn: async () => {}, + autoCommit: false, + }) + + transaction.mutate(() => { + users.insert({ id: 1, name: `Alice` }) + tasks.insert({ id: 1, userId: 1, title: `Write tests` }) + }) + + expect(batches).toHaveLength(1) + expect(batches[0]).toHaveLength(1) + expect(batches[0]![0]).toMatchObject({ + type: `insert`, + value: { + userId: 1, + taskId: 1, + title: `Write tests`, + }, + }) + + subscription.unsubscribe() + transaction.rollback() + }) +}) From ddcf5fa75bb462ba564ea5bbb11b6ad0c0a972ab Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Thu, 2 Oct 2025 15:11:35 +0100 Subject: [PATCH 3/9] changeset --- .changeset/olive-crews-love.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/olive-crews-love.md diff --git a/.changeset/olive-crews-love.md b/.changeset/olive-crews-love.md new file mode 100644 index 000000000..00494cc5a --- /dev/null +++ b/.changeset/olive-crews-love.md @@ -0,0 +1,5 @@ +--- +"@tanstack/db": patch +--- + +Add a scheduler that ensures that if a transaction touches multiple collections that feed into a single live query, the liver query only emits a single batch of updates. This fixes an issue where multiple renders could be triggered from a live query under this situation. From 22b8b01887471991b53e18e4f094ca609ca480c7 Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Thu, 2 Oct 2025 17:07:57 +0100 Subject: [PATCH 4/9] more tests --- .../query/live/collection-config-builder.ts | 33 ++- .../db/src/{schedular.ts => scheduler.ts} | 16 +- packages/db/src/transactions.ts | 6 +- packages/db/tests/query/schedular.test.ts | 89 ------ packages/db/tests/query/scheduler.test.ts | 273 ++++++++++++++++++ 5 files changed, 315 insertions(+), 102 deletions(-) rename packages/db/src/{schedular.ts => scheduler.ts} (75%) delete mode 100644 packages/db/tests/query/schedular.test.ts create mode 100644 packages/db/tests/query/scheduler.test.ts diff --git a/packages/db/src/query/live/collection-config-builder.ts b/packages/db/src/query/live/collection-config-builder.ts index fdd863de1..7cbe03253 100644 --- a/packages/db/src/query/live/collection-config-builder.ts +++ b/packages/db/src/query/live/collection-config-builder.ts @@ -1,9 +1,10 @@ import { D2, output } from "@tanstack/db-ivm" import { compileQuery } from "../compiler/index.js" import { buildQuery, getQueryIR } from "../builder/index.js" -import { transactionScopedScheduler } from "../../schedular.js" +import { transactionScopedScheduler } from "../../scheduler.js" import { getActiveTransaction } from "../../transactions.js" import { CollectionSubscriber } from "./collection-subscriber.js" +import type { SchedulerContextId } from "../../scheduler.js" import type { CollectionSubscription } from "../../collection/subscription.js" import type { RootStreamBuilder } from "@tanstack/db-ivm" import type { OrderByOptimizationInfo } from "../compiler/order-by.js" @@ -187,10 +188,14 @@ export class CollectionConfigBuilder< scheduleGraphRun( config: Parameters[`sync`]>[0], syncState: FullSyncState, - callback?: () => boolean + callback?: () => boolean, + options?: { contextId?: SchedulerContextId; jobId?: unknown } ) { - const activeTransaction = getActiveTransaction() - const contextId = activeTransaction?.id + const contextId = options?.contextId ?? getActiveTransaction()?.id + const jobId = options?.jobId ?? this + // We intentionally scope deduplication to the builder instance. Each instance + // owns caches and compiled pipelines, so sharing an entry across instances that + // merely reuse the same string id would bind the wrong `this` in the run closure. const createEntry = () => { const state: PendingGraphRun = { @@ -210,13 +215,27 @@ export class CollectionConfigBuilder< state.loadCallbacks.size > 0 ? () => { let allDone = true + let firstError: unknown + for (const loader of state.loadCallbacks) { - const result = loader() - if (result === false) { + try { + const result = loader() + if (result === false) { + allDone = false + } + } catch (error) { allDone = false + if (firstError === undefined) { + firstError = error + } } } + if (firstError !== undefined) { + throw firstError + } + + // Returning false signals that callers should schedule another pass. return allDone } : undefined @@ -237,7 +256,7 @@ export class CollectionConfigBuilder< transactionScopedScheduler.schedule({ contextId, - jobId: this, + jobId, createEntry, updateEntry, }) diff --git a/packages/db/src/schedular.ts b/packages/db/src/scheduler.ts similarity index 75% rename from packages/db/src/schedular.ts rename to packages/db/src/scheduler.ts index 907d11b5d..ec6dafcb2 100644 --- a/packages/db/src/schedular.ts +++ b/packages/db/src/scheduler.ts @@ -15,12 +15,14 @@ interface ScheduleOptions { /** * Basic scoped scheduler that coalesces work by context and job. * - * - A context (e.g. a transaction id) scopes work execution: - * queued jobs for the same context run together when that context is flushed. - * - A job id (e.g. a specific CollectionConfigBuilder) dedupes work within a context. + * - A **context** (for example a transaction id) represents the batching boundary. + * Work scheduled with the same context id is queued until that context is flushed. + * - A **job id** deduplicates work inside a context. The first scheduled job determines + * the execution slot; subsequent schedules update the same entry but preserve order. + * - When no context id is provided the work executes immediately (no batching). * - * Callers provide an entry factory so state objects can be mutated across schedules, - * and optionally an updater to merge new data into an existing entry. + * Each job entry owns a mutable state object so callers can merge new data between + * schedule calls before the eventual `run()` executes. */ export class Scheduler { private contexts = new Map< @@ -83,6 +85,10 @@ export class Scheduler { this.contexts.delete(contextId) } + hasPendingJobs(contextId: SchedulerContextId): boolean { + return this.contexts.has(contextId) + } + clearJob(contextId: SchedulerContextId, jobId: unknown): void { const context = this.contexts.get(contextId) if (!context) return diff --git a/packages/db/src/transactions.ts b/packages/db/src/transactions.ts index c44e22c2d..c8df88b45 100644 --- a/packages/db/src/transactions.ts +++ b/packages/db/src/transactions.ts @@ -5,7 +5,7 @@ import { TransactionNotPendingCommitError, TransactionNotPendingMutateError, } from "./errors" -import { transactionScopedScheduler } from "./schedular.js" +import { transactionScopedScheduler } from "./scheduler.js" import type { Deferred } from "./deferred" import type { MutationFn, @@ -180,11 +180,15 @@ export function getActiveTransaction(): Transaction | undefined { } function registerTransaction(tx: Transaction) { + // Clear any stale work that may have been left behind if a previous mutate + // scope aborted before we could flush. transactionScopedScheduler.clear(tx.id) transactionStack.push(tx) } function unregisterTransaction(tx: Transaction) { + // Always flush pending work for this transaction before removing it from + // the ambient stack – this runs even if the mutate callback throws. transactionScopedScheduler.flush(tx.id) transactionStack = transactionStack.filter((t) => t.id !== tx.id) } diff --git a/packages/db/tests/query/schedular.test.ts b/packages/db/tests/query/schedular.test.ts deleted file mode 100644 index deea63913..000000000 --- a/packages/db/tests/query/schedular.test.ts +++ /dev/null @@ -1,89 +0,0 @@ -import { describe, expect, it } from "vitest" -import { createCollection } from "../../src/collection/index.js" -import { createLiveQueryCollection, eq } from "../../src/query/index.js" -import { createTransaction } from "../../src/transactions.js" - -interface User { - id: number - name: string -} - -interface Task { - id: number - userId: number - title: string -} - -describe(`live query scheduler`, () => { - it(`runs the live query graph once per transaction that touches multiple collections`, async () => { - const users = createCollection({ - id: `test-users`, - getKey: (user) => user.id, - startSync: true, - sync: { - sync: ({ begin, commit, markReady }) => { - begin() - commit() - markReady() - }, - }, - }) - - const tasks = createCollection({ - id: `test-tasks`, - getKey: (task) => task.id, - startSync: true, - sync: { - sync: ({ begin, commit, markReady }) => { - begin() - commit() - markReady() - }, - }, - }) - - const assignments = createLiveQueryCollection({ - startSync: true, - query: (q) => - q - .from({ user: users }) - .join({ task: tasks }, ({ user, task }) => eq(user.id, task.userId)) - .select(({ user, task }) => ({ - userId: user.id, - taskId: task?.id, - title: task?.title, - })), - }) - - await assignments.preload() - - const batches: Array> = [] - const subscription = assignments.subscribeChanges((changes) => { - batches.push(changes) - }) - - const transaction = createTransaction({ - mutationFn: async () => {}, - autoCommit: false, - }) - - transaction.mutate(() => { - users.insert({ id: 1, name: `Alice` }) - tasks.insert({ id: 1, userId: 1, title: `Write tests` }) - }) - - expect(batches).toHaveLength(1) - expect(batches[0]).toHaveLength(1) - expect(batches[0]![0]).toMatchObject({ - type: `insert`, - value: { - userId: 1, - taskId: 1, - title: `Write tests`, - }, - }) - - subscription.unsubscribe() - transaction.rollback() - }) -}) diff --git a/packages/db/tests/query/scheduler.test.ts b/packages/db/tests/query/scheduler.test.ts new file mode 100644 index 000000000..92aefe1e6 --- /dev/null +++ b/packages/db/tests/query/scheduler.test.ts @@ -0,0 +1,273 @@ +import { afterEach, describe, expect, it, vi } from "vitest" +import { createCollection } from "../../src/collection/index.js" +import { createLiveQueryCollection, eq } from "../../src/query/index.js" +import { createTransaction } from "../../src/transactions.js" +import { transactionScopedScheduler } from "../../src/scheduler.js" +import { CollectionConfigBuilder } from "../../src/query/live/collection-config-builder.js" +import type { FullSyncState } from "../../src/query/live/types.js" +import type { SyncConfig } from "../../src/types.js" + +interface ChangeMessageLike { + type: string + value: any +} + +interface User { + id: number + name: string +} + +interface Task { + id: number + userId: number + title: string +} + +function setupLiveQueryCollections(id: string) { + const users = createCollection({ + id: `${id}-users`, + getKey: (user) => user.id, + startSync: true, + sync: { + sync: ({ begin, commit, markReady }) => { + begin() + commit() + markReady() + }, + }, + }) + + const tasks = createCollection({ + id: `${id}-tasks`, + getKey: (task) => task.id, + startSync: true, + sync: { + sync: ({ begin, commit, markReady }) => { + begin() + commit() + markReady() + }, + }, + }) + + const assignments = createLiveQueryCollection({ + id: `${id}-assignments`, + startSync: true, + query: (q) => + q + .from({ user: users }) + .join({ task: tasks }, ({ user, task }) => eq(user.id, task.userId)) + .select(({ user, task }) => ({ + userId: user.id, + taskId: task?.id, + title: task?.title, + })), + }) + + return { users, tasks, assignments } +} + +function recordBatches(collection: any) { + const batches: Array> = [] + const subscription = collection.subscribeChanges((changes: any) => { + batches.push(changes as Array) + }) + return { + batches, + unsubscribe: () => subscription.unsubscribe(), + } +} + +afterEach(() => { + transactionScopedScheduler.flushAll() +}) + +describe(`live query scheduler`, () => { + it(`runs the live query graph once per transaction that touches multiple collections`, async () => { + const { users, tasks, assignments } = + setupLiveQueryCollections(`single-batch`) + await assignments.preload() + + const recorder = recordBatches(assignments) + + const transaction = createTransaction({ + mutationFn: async () => {}, + autoCommit: false, + }) + + transaction.mutate(() => { + users.insert({ id: 1, name: `Alice` }) + tasks.insert({ id: 1, userId: 1, title: `Write tests` }) + }) + + expect(recorder.batches).toHaveLength(1) + expect(recorder.batches[0]).toHaveLength(1) + expect(recorder.batches[0]![0]).toMatchObject({ + type: `insert`, + value: { + userId: 1, + taskId: 1, + title: `Write tests`, + }, + }) + + recorder.unsubscribe() + transaction.rollback() + }) + + it(`handles nested transactions without emitting duplicate batches`, async () => { + const { users, tasks, assignments } = setupLiveQueryCollections(`nested`) + await assignments.preload() + + const recorder = recordBatches(assignments) + + const outerTx = createTransaction({ + mutationFn: async () => {}, + autoCommit: false, + }) + const innerTx = createTransaction({ + mutationFn: async () => {}, + autoCommit: false, + }) + + outerTx.mutate(() => { + users.insert({ id: 11, name: `Nested User` }) + innerTx.mutate(() => { + tasks.insert({ id: 21, userId: 11, title: `Nested Task` }) + }) + }) + + expect(recorder.batches).toHaveLength(1) + expect(recorder.batches[0]![0]).toMatchObject({ + value: { + userId: 11, + taskId: 21, + title: `Nested Task`, + }, + }) + + recorder.unsubscribe() + innerTx.rollback() + outerTx.rollback() + }) + + it(`clears pending jobs when a transaction rolls back due to an error`, async () => { + const { users, tasks, assignments } = setupLiveQueryCollections(`rollback`) + await assignments.preload() + + const recorder = recordBatches(assignments) + const tx = createTransaction({ + mutationFn: async () => {}, + autoCommit: false, + }) + + expect(() => { + tx.mutate(() => { + users.insert({ id: 31, name: `Temp` }) + tasks.insert({ id: 41, userId: 31, title: `Temp Task` }) + throw new Error(`boom`) + }) + }).toThrowError(`boom`) + + tx.rollback() + + const batchesBeforeFlush = recorder.batches.length + transactionScopedScheduler.flush(tx.id) + expect(recorder.batches.length).toBeGreaterThanOrEqual(batchesBeforeFlush) + if (recorder.batches.length > batchesBeforeFlush) { + const latestBatch = recorder.batches.at(-1)! + expect(latestBatch[0]?.type).toBe(`delete`) + } + expect(transactionScopedScheduler.hasPendingJobs(tx.id)).toBe(false) + // We emit the optimistic insert and, after the explicit rollback, possibly a + // compensating delete – but no duplicate inserts. + expect(recorder.batches[0]![0]).toMatchObject({ type: `insert` }) + + recorder.unsubscribe() + }) + + it(`dedupes batches across multiple subscribers`, async () => { + const { users, tasks, assignments } = + setupLiveQueryCollections(`multi-subscriber`) + await assignments.preload() + + const first = recordBatches(assignments) + const second = recordBatches(assignments) + + const tx = createTransaction({ + mutationFn: async () => {}, + autoCommit: false, + }) + tx.mutate(() => { + users.insert({ id: 51, name: `Multi` }) + tasks.insert({ id: 61, userId: 51, title: `Subscriber Task` }) + }) + + expect(first.batches).toHaveLength(1) + expect(second.batches).toHaveLength(1) + expect(first.batches[0]![0]).toMatchObject({ + value: { + userId: 51, + taskId: 61, + title: `Subscriber Task`, + }, + }) + + first.unsubscribe() + second.unsubscribe() + tx.rollback() + }) + + it(`coalesces load-more callbacks scheduled within the same context`, () => { + const baseCollection = createCollection({ + id: `loader-users`, + getKey: (user) => user.id, + sync: { + sync: () => () => {}, + }, + }) + + const builder = new CollectionConfigBuilder({ + id: `loader-builder`, + query: (q) => q.from({ user: baseCollection }), + }) + + const contextId = Symbol(`loader-context`) + const loader = vi.fn(() => true) + const config = { + begin: vi.fn(), + write: vi.fn(), + commit: vi.fn(), + markReady: vi.fn(), + truncate: vi.fn(), + } as unknown as Parameters[`sync`]>[0] + + const syncState = { + messagesCount: 0, + subscribedToAllCollections: true, + unsubscribeCallbacks: new Set<() => void>(), + graph: { + pendingWork: () => false, + run: vi.fn(), + }, + inputs: {}, + pipeline: {}, + } as unknown as FullSyncState + + const maybeRunGraphSpy = vi + .spyOn(builder, `maybeRunGraph`) + .mockImplementation((_config, _syncState, combinedLoader) => { + combinedLoader?.() + }) + + builder.scheduleGraphRun(config, syncState, loader, { contextId }) + builder.scheduleGraphRun(config, syncState, loader, { contextId }) + + transactionScopedScheduler.flush(contextId) + + expect(loader).toHaveBeenCalledTimes(1) + expect(maybeRunGraphSpy).toHaveBeenCalledTimes(1) + + maybeRunGraphSpy.mockRestore() + }) +}) From 6a2308b657fad4acbbb03a64bccd0681cc0d804f Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Sat, 4 Oct 2025 12:09:22 +0100 Subject: [PATCH 5/9] add support for dependent live queries running once (diamond dependencies) --- packages/db/src/scheduler.ts | 26 ++- packages/db/tests/query/scheduler.test.ts | 213 ++++++++++++++++++++++ 2 files changed, 225 insertions(+), 14 deletions(-) diff --git a/packages/db/src/scheduler.ts b/packages/db/src/scheduler.ts index ec6dafcb2..ba341f1e5 100644 --- a/packages/db/src/scheduler.ts +++ b/packages/db/src/scheduler.ts @@ -59,26 +59,24 @@ export class Scheduler { } flush(contextId: SchedulerContextId): void { - const context = this.contexts.get(contextId) - if (!context) return + let context = this.contexts.get(contextId) + while (context) { + this.contexts.delete(contextId) - this.contexts.delete(contextId) + for (const entry of context.values()) { + entry.run() + } - for (const entry of context.values()) { - entry.run() + context = this.contexts.get(contextId) } } flushAll(): void { - const contexts = Array.from(this.contexts.keys()) - contexts.forEach((contextId) => { - const context = this.contexts.get(contextId) - if (!context) return - this.contexts.delete(contextId) - for (const entry of context.values()) { - entry.run() - } - }) + let next = this.contexts.keys().next() + while (!next.done) { + this.flush(next.value) + next = this.contexts.keys().next() + } } clear(contextId: SchedulerContextId): void { diff --git a/packages/db/tests/query/scheduler.test.ts b/packages/db/tests/query/scheduler.test.ts index 92aefe1e6..a56c8ae26 100644 --- a/packages/db/tests/query/scheduler.test.ts +++ b/packages/db/tests/query/scheduler.test.ts @@ -218,6 +218,219 @@ describe(`live query scheduler`, () => { tx.rollback() }) + it(`runs join live queries once after their parent queries settle`, async () => { + const collectionA = createCollection<{ id: number; value: string }>({ + id: `diamond-A`, + getKey: (row) => row.id, + startSync: true, + sync: { + sync: ({ begin, commit, markReady }) => { + begin() + commit() + markReady() + }, + }, + }) + + const collectionB = createCollection<{ id: number; value: string }>({ + id: `diamond-B`, + getKey: (row) => row.id, + startSync: true, + sync: { + sync: ({ begin, commit, markReady }) => { + begin() + commit() + markReady() + }, + }, + }) + + const liveQueryA = createLiveQueryCollection({ + id: `diamond-lqA`, + startSync: true, + query: (q) => + q + .from({ a: collectionA }) + .select(({ a }) => ({ id: a.id, value: a.value })), + }) + + const liveQueryB = createLiveQueryCollection({ + id: `diamond-lqB`, + startSync: true, + query: (q) => + q + .from({ b: collectionB }) + .select(({ b }) => ({ id: b.id, value: b.value })), + }) + + const liveQueryJoin = createLiveQueryCollection({ + id: `diamond-join`, + startSync: true, + query: (q) => + q + .from({ left: liveQueryA }) + .join( + { right: liveQueryB }, + ({ left, right }) => eq(left.id, right.id), + `full` // Full join to ensure we would get multiple changes if this doesn't dedupe + ) + .select(({ left, right }) => ({ + left: left?.value, + right: right?.value, + })), + }) + + await Promise.all([ + liveQueryA.preload(), + liveQueryB.preload(), + liveQueryJoin.preload(), + ]) + + const joinBatches = recordBatches(liveQueryJoin) + + const tx = createTransaction({ + mutationFn: async () => {}, + autoCommit: false, + }) + + tx.mutate(() => { + collectionA.insert({ id: 1, value: `A1` }) + collectionB.insert({ id: 1, value: `B1` }) + }) + + expect(joinBatches.batches).toHaveLength(1) + expect(joinBatches.batches[0]![0]).toMatchObject({ type: `insert` }) + expect(joinBatches.batches[0]![0]!.value).toMatchObject({ + left: `A1`, + right: `B1`, + }) + + tx.mutate(() => { + collectionA.update(1, (draft) => { + draft.value = `A1b` + }) + collectionB.update(1, (draft) => { + draft.value = `B1b` + }) + }) + + expect(joinBatches.batches).toHaveLength(2) + expect(joinBatches.batches[1]![0]).toMatchObject({ + type: `update`, + previousValue: { + left: `A1`, + right: `B1`, + }, + }) + expect(joinBatches.batches[1]![0]!.value).toMatchObject({ + left: `A1b`, + right: `B1b`, + }) + + joinBatches.unsubscribe() + tx.rollback() + }) + + it(`runs hybrid joins once when they observe both a live query and a collection`, async () => { + const collectionA = createCollection<{ id: number; value: string }>({ + id: `hybrid-A`, + getKey: (row) => row.id, + startSync: true, + sync: { + sync: ({ begin, commit, markReady }) => { + begin() + commit() + markReady() + }, + }, + }) + + const collectionB = createCollection<{ id: number; value: string }>({ + id: `hybrid-B`, + getKey: (row) => row.id, + startSync: true, + sync: { + sync: ({ begin, commit, markReady }) => { + begin() + commit() + markReady() + }, + }, + }) + + const liveQueryA = createLiveQueryCollection({ + id: `hybrid-lqA`, + startSync: true, + query: (q) => + q + .from({ a: collectionA }) + .select(({ a }) => ({ id: a.id, value: a.value })), + }) + + const hybridJoin = createLiveQueryCollection({ + id: `hybrid-join`, + startSync: true, + query: (q) => + q + .from({ left: liveQueryA }) + .join( + { right: collectionB }, + ({ left, right }) => eq(left.id, right.id), + `full` + ) + .select(({ left, right }) => ({ + left: left?.value, + right: right?.value, + })), + }) + + await Promise.all([liveQueryA.preload(), hybridJoin.preload()]) + + const batches = recordBatches(hybridJoin) + + const tx = createTransaction({ + mutationFn: async () => {}, + autoCommit: false, + }) + + tx.mutate(() => { + collectionA.insert({ id: 7, value: `A7` }) + collectionB.insert({ id: 7, value: `B7` }) + }) + + expect(batches.batches).toHaveLength(1) + expect(batches.batches[0]![0]).toMatchObject({ type: `insert` }) + expect(batches.batches[0]![0]!.value).toMatchObject({ + left: `A7`, + right: `B7`, + }) + + tx.mutate(() => { + collectionA.update(7, (draft) => { + draft.value = `A7b` + }) + collectionB.update(7, (draft) => { + draft.value = `B7b` + }) + }) + + expect(batches.batches).toHaveLength(2) + expect(batches.batches[1]![0]).toMatchObject({ + type: `update`, + previousValue: { + left: `A7`, + right: `B7`, + }, + }) + expect(batches.batches[1]![0]!.value).toMatchObject({ + left: `A7b`, + right: `B7b`, + }) + + batches.unsubscribe() + tx.rollback() + }) + it(`coalesces load-more callbacks scheduled within the same context`, () => { const baseCollection = createCollection({ id: `loader-users`, From 6c782227efd1b9ffcae134b6ba275ada8c0a3239 Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Sat, 4 Oct 2025 13:01:21 +0100 Subject: [PATCH 6/9] add test case where dimond deps fail --- packages/db/tests/query/scheduler.test.ts | 80 +++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/packages/db/tests/query/scheduler.test.ts b/packages/db/tests/query/scheduler.test.ts index a56c8ae26..647510ee7 100644 --- a/packages/db/tests/query/scheduler.test.ts +++ b/packages/db/tests/query/scheduler.test.ts @@ -431,6 +431,86 @@ describe(`live query scheduler`, () => { tx.rollback() }) + it(`currently single batch when the join sees right-side data before the left`, async () => { + const collectionA = createCollection<{ id: number; value: string }>({ + id: `ordering-A`, + getKey: (row) => row.id, + startSync: true, + sync: { + sync: ({ begin, commit, markReady }) => { + begin() + commit() + markReady() + }, + }, + }) + + const collectionB = createCollection<{ id: number; value: string }>({ + id: `ordering-B`, + getKey: (row) => row.id, + startSync: true, + sync: { + sync: ({ begin, commit, markReady }) => { + begin() + commit() + markReady() + }, + }, + }) + + const liveQueryA = createLiveQueryCollection({ + id: `ordering-lqA`, + startSync: true, + query: (q) => + q + .from({ a: collectionA }) + .select(({ a }) => ({ id: a.id, value: a.value })), + }) + + const join = createLiveQueryCollection({ + id: `ordering-join`, + startSync: true, + query: (q) => + q + .from({ left: liveQueryA }) + .join( + { right: collectionB }, + ({ left, right }) => eq(left.id, right.id), + `full` + ) + .select(({ left, right }) => ({ + left: left?.value, + right: right?.value, + })), + }) + + await Promise.all([liveQueryA.preload(), join.preload()]) + + const batches = recordBatches(join) + + const tx = createTransaction({ + mutationFn: async () => {}, + autoCommit: false, + }) + + tx.mutate(() => { + collectionB.insert({ id: 42, value: `right-first` }) + collectionA.insert({ id: 42, value: `left-later` }) + }) + + expect(batches.batches).toHaveLength(1) + expect(batches.batches[0]![0]).toMatchObject({ + type: `insert`, + value: { + left: `left-later`, + right: `right-first`, + }, + }) + + batches.unsubscribe() + tx.rollback() + }) + it(`coalesces load-more callbacks scheduled within the same context`, () => { const baseCollection = createCollection({ id: `loader-users`, From 3422a435fc78ad17ee1d805ee5b84f1a24b9f2ee Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Sat, 4 Oct 2025 15:27:11 +0100 Subject: [PATCH 7/9] working dependency tracking for live queries --- .../db/src/query/live-query-collection.ts | 56 ++++-- .../query/live/collection-config-builder.ts | 74 +++++++- .../db/src/query/live/collection-registry.ts | 40 +++++ .../src/query/live/collection-subscriber.ts | 9 +- packages/db/src/scheduler.ts | 162 +++++++++++++++--- packages/db/tests/query/scheduler.test.ts | 71 ++------ 6 files changed, 302 insertions(+), 110 deletions(-) create mode 100644 packages/db/src/query/live/collection-registry.ts diff --git a/packages/db/src/query/live-query-collection.ts b/packages/db/src/query/live-query-collection.ts index ec631cd8f..30de83aa3 100644 --- a/packages/db/src/query/live-query-collection.ts +++ b/packages/db/src/query/live-query-collection.ts @@ -1,5 +1,10 @@ import { createCollection } from "../collection/index.js" import { CollectionConfigBuilder } from "./live/collection-config-builder.js" +import { + getBuilderFromConfig, + registerCollectionBuilder, +} from "./live/collection-registry.js" +import type { RunCountUtils } from "./live/collection-config-builder.js" import type { LiveQueryCollectionConfig } from "./live/types.js" import type { InitialQueryBuilder, QueryBuilder } from "./builder/index.js" import type { Collection } from "../collection/index.js" @@ -35,7 +40,7 @@ export function liveQueryCollectionOptions< TResult extends object = GetResult, >( config: LiveQueryCollectionConfig -): CollectionConfig { +): CollectionConfig & { utils: RunCountUtils } { const collectionConfigBuilder = new CollectionConfigBuilder< TContext, TResult @@ -83,7 +88,7 @@ export function createLiveQueryCollection< TResult extends object = GetResult, >( query: (q: InitialQueryBuilder) => QueryBuilder -): Collection +): Collection // Overload 2: Accept full config object with optional utilities export function createLiveQueryCollection< @@ -92,7 +97,7 @@ export function createLiveQueryCollection< TUtils extends UtilsRecord = {}, >( config: LiveQueryCollectionConfig & { utils?: TUtils } -): Collection +): Collection // Implementation export function createLiveQueryCollection< @@ -103,7 +108,7 @@ export function createLiveQueryCollection< configOrQuery: | (LiveQueryCollectionConfig & { utils?: TUtils }) | ((q: InitialQueryBuilder) => QueryBuilder) -): Collection { +): Collection { // Determine if the argument is a function (query) or a config object if (typeof configOrQuery === `function`) { // Simple query function case @@ -113,7 +118,11 @@ export function createLiveQueryCollection< ) => QueryBuilder, } const options = liveQueryCollectionOptions(config) - return bridgeToCreateCollection(options) + return bridgeToCreateCollection(options) as Collection< + TResult, + string | number, + RunCountUtils & TUtils + > } else { // Config object case const config = configOrQuery as LiveQueryCollectionConfig< @@ -121,10 +130,18 @@ export function createLiveQueryCollection< TResult > & { utils?: TUtils } const options = liveQueryCollectionOptions(config) - return bridgeToCreateCollection({ - ...options, - utils: config.utils, - }) + + const collection = bridgeToCreateCollection(options) + + if (config.utils) { + Object.assign(collection.utils, config.utils) + } + + return collection as Collection< + TResult, + string | number, + RunCountUtils & TUtils + > } } @@ -132,16 +149,19 @@ export function createLiveQueryCollection< * Bridge function that handles the type compatibility between query2's TResult * and core collection's output type without exposing ugly type assertions to users */ -function bridgeToCreateCollection< - TResult extends object, - TUtils extends UtilsRecord = {}, ->( - options: CollectionConfig & { utils?: TUtils } -): Collection { - // This is the only place we need a type assertion, hidden from user API - return createCollection(options as any) as unknown as Collection< +function bridgeToCreateCollection( + options: CollectionConfig & { utils: RunCountUtils } +): Collection { + const collection = createCollection(options as any) as unknown as Collection< TResult, string | number, - TUtils + RunCountUtils > + + const builder = getBuilderFromConfig(options) + if (builder) { + registerCollectionBuilder(collection, builder) + } + + return collection } diff --git a/packages/db/src/query/live/collection-config-builder.ts b/packages/db/src/query/live/collection-config-builder.ts index 7cbe03253..fe2f2442e 100644 --- a/packages/db/src/query/live/collection-config-builder.ts +++ b/packages/db/src/query/live/collection-config-builder.ts @@ -4,6 +4,10 @@ import { buildQuery, getQueryIR } from "../builder/index.js" import { transactionScopedScheduler } from "../../scheduler.js" import { getActiveTransaction } from "../../transactions.js" import { CollectionSubscriber } from "./collection-subscriber.js" +import { + attachBuilderToConfig, + getCollectionBuilder, +} from "./collection-registry.js" import type { SchedulerContextId } from "../../scheduler.js" import type { CollectionSubscription } from "../../collection/subscription.js" import type { RootStreamBuilder } from "@tanstack/db-ivm" @@ -14,6 +18,7 @@ import type { KeyedStream, ResultStream, SyncConfig, + UtilsRecord, } from "../../types.js" import type { Context, GetResult } from "../builder/types.js" import type { BasicExpression, QueryIR } from "../ir.js" @@ -25,6 +30,10 @@ import type { SyncState, } from "./types.js" +export type RunCountUtils = UtilsRecord & { + getRunCount: () => number +} + type PendingGraphRun = { config: Parameters[`sync`]>[0] syncState: FullSyncState @@ -55,6 +64,7 @@ export class CollectionConfigBuilder< private readonly compare?: (val1: TResult, val2: TResult) => number private isGraphRunning = false + private runCount = 0 private graphCache: D2 | undefined private inputsCache: Record> | undefined @@ -63,6 +73,15 @@ export class CollectionConfigBuilder< | Map> | undefined + private readonly aliasDependencies: Record< + string, + Array> + > = Object.create(null) + + private readonly builderDependencies = new Set< + CollectionConfigBuilder + >() + // Map of source alias to subscription readonly subscriptions: Record = {} // Map of source aliases to functions that load keys for that lazy source @@ -101,8 +120,8 @@ export class CollectionConfigBuilder< this.compileBasePipeline() } - getConfig(): CollectionConfig { - return { + getConfig(): CollectionConfig & { utils: RunCountUtils } { + const config = { id: this.id, getKey: this.config.getKey || @@ -115,7 +134,12 @@ export class CollectionConfigBuilder< onUpdate: this.config.onUpdate, onDelete: this.config.onDelete, startSync: this.config.startSync, - } + utils: { + getRunCount: this.getRunCount.bind(this), + }, + } as CollectionConfig & { utils: RunCountUtils } + attachBuilderToConfig(config, this) + return config } getCollectionIdForAlias(alias: string): string { @@ -189,10 +213,34 @@ export class CollectionConfigBuilder< config: Parameters[`sync`]>[0], syncState: FullSyncState, callback?: () => boolean, - options?: { contextId?: SchedulerContextId; jobId?: unknown } + options?: { + contextId?: SchedulerContextId + jobId?: unknown + alias?: string + dependencies?: Array> + } ) { const contextId = options?.contextId ?? getActiveTransaction()?.id const jobId = options?.jobId ?? this + const dependencyBuilders = (() => { + if (options?.dependencies) { + return options.dependencies + } + + const deps = new Set(this.builderDependencies) + if (options?.alias) { + const aliasDeps = this.aliasDependencies[options.alias] + if (aliasDeps) { + for (const dep of aliasDeps) { + deps.add(dep) + } + } + } + + deps.delete(this) + + return Array.from(deps) + })() // We intentionally scope deduplication to the builder instance. Each instance // owns caches and compiled pipelines, so sharing an entry across instances that // merely reuse the same string id would bind the wrong `this` in the run closure. @@ -211,6 +259,7 @@ export class CollectionConfigBuilder< return { state, run: () => { + this.incrementRunCount() const combinedLoader = state.loadCallbacks.size > 0 ? () => { @@ -257,6 +306,7 @@ export class CollectionConfigBuilder< transactionScopedScheduler.schedule({ contextId, jobId, + dependencies: dependencyBuilders, createEntry, updateEntry, }) @@ -269,6 +319,14 @@ export class CollectionConfigBuilder< } } + incrementRunCount() { + this.runCount++ + } + + getRunCount() { + return this.runCount + } + private syncFn(config: Parameters[`sync`]>[0]) { const syncState: SyncState = { messagesCount: 0, @@ -498,6 +556,14 @@ export class CollectionConfigBuilder< const collection = this.collectionByAlias[alias] ?? this.collections[collectionId]! + const dependencyBuilder = getCollectionBuilder(collection) + if (dependencyBuilder && dependencyBuilder !== this) { + this.aliasDependencies[alias] = [dependencyBuilder] + this.builderDependencies.add(dependencyBuilder) + } else { + this.aliasDependencies[alias] = [] + } + const collectionSubscriber = new CollectionSubscriber( alias, collectionId, diff --git a/packages/db/src/query/live/collection-registry.ts b/packages/db/src/query/live/collection-registry.ts new file mode 100644 index 000000000..ee052fb5d --- /dev/null +++ b/packages/db/src/query/live/collection-registry.ts @@ -0,0 +1,40 @@ +import type { Collection } from "../../collection/index.js" +import type { CollectionConfigBuilder } from "./collection-config-builder.js" + +const BUILDER_SYMBOL = Symbol.for(`@tanstack/db.collection-config-builder`) + +const collectionBuilderRegistry = new WeakMap< + Collection, + CollectionConfigBuilder +>() + +export function attachBuilderToConfig( + config: object, + builder: CollectionConfigBuilder +): void { + Object.defineProperty(config, BUILDER_SYMBOL, { + value: builder, + configurable: false, + enumerable: false, + writable: false, + }) +} + +export function getBuilderFromConfig( + config: object +): CollectionConfigBuilder | undefined { + return (config as any)[BUILDER_SYMBOL] +} + +export function registerCollectionBuilder( + collection: Collection, + builder: CollectionConfigBuilder +): void { + collectionBuilderRegistry.set(collection, builder) +} + +export function getCollectionBuilder( + collection: Collection +): CollectionConfigBuilder | undefined { + return collectionBuilderRegistry.get(collection) +} diff --git a/packages/db/src/query/live/collection-subscriber.ts b/packages/db/src/query/live/collection-subscriber.ts index deb5ac01a..1fc4952d0 100644 --- a/packages/db/src/query/live/collection-subscriber.ts +++ b/packages/db/src/query/live/collection-subscriber.ts @@ -82,18 +82,13 @@ export class CollectionSubscriber< this.collection.config.getKey ) - // Do not provide the callback that loads more data - // if there's no more data to load - // otherwise we end up in an infinite loop trying to load more data const dataLoader = sentChanges > 0 ? callback : undefined - // We need to schedule a graph run even if there's no data to load - // because we need to mark the collection as ready if it's not already - // and that's only done during the graph execution this.collectionConfigBuilder.scheduleGraphRun( this.config, this.syncState, - dataLoader + dataLoader, + { alias: this.alias } ) } diff --git a/packages/db/src/scheduler.ts b/packages/db/src/scheduler.ts index ba341f1e5..0b96146f1 100644 --- a/packages/db/src/scheduler.ts +++ b/packages/db/src/scheduler.ts @@ -1,17 +1,46 @@ +/** + * Identifier used to scope scheduled work. For live queries this maps directly to a + * transaction id so that all mutations performed inside a transaction are flushed together. + */ export type SchedulerContextId = string | symbol +/** + * Internal representation of a job queued by the scheduler. The mutable `state` + * allows callers to accumulate information (callbacks, configuration, etc.) before + * the job eventually runs, while `run` executes the actual work. + */ interface SchedulerEntry { state: TState run: () => void } +/** + * Options accepted by {@link Scheduler.schedule}. Jobs are identified by a `jobId` + * unique within a context and may declare dependencies on other jobs. The entry + * factory is invoked the first time a job is scheduled, and `updateEntry` lets the + * caller merge additional state into existing entries. + */ interface ScheduleOptions { contextId?: SchedulerContextId jobId: unknown + dependencies?: Iterable createEntry: () => SchedulerEntry updateEntry?: (entry: SchedulerEntry) => void } +/** + * State stored per context (transaction). The queue preserves scheduling order, + * `entries` holds the jobs themselves, `dependencies` maps each job to the set of + * prerequisite jobs, and `completed` records which jobs have already run during the + * current flush. + */ +interface SchedulerContextState { + queue: Array + entries: Map> + dependencies: Map> + completed: Set +} + /** * Basic scoped scheduler that coalesces work by context and job. * @@ -25,14 +54,37 @@ interface ScheduleOptions { * schedule calls before the eventual `run()` executes. */ export class Scheduler { - private contexts = new Map< - SchedulerContextId, - Map> - >() + private contexts = new Map() + + /** + * Retrieve the state bucket for a context or create a fresh one if this is the + * first job scheduled for that context. + */ + private getOrCreateContext( + contextId: SchedulerContextId + ): SchedulerContextState { + let context = this.contexts.get(contextId) + if (!context) { + context = { + queue: [], + entries: new Map(), + dependencies: new Map(), + completed: new Set(), + } + this.contexts.set(contextId, context) + } + return context + } + /** + * Schedule work. When no context id is provided the job executes immediately. + * Otherwise we add/merge the job into the current transaction bucket so it can be + * flushed once all dependencies are satisfied. + */ schedule({ contextId, jobId, + dependencies, createEntry, updateEntry, }: ScheduleOptions): void { @@ -43,56 +95,118 @@ export class Scheduler { return } - let context = this.contexts.get(contextId) - if (!context) { - context = new Map() - this.contexts.set(contextId, context) - } + const context = this.getOrCreateContext(contextId) - let entry = context.get(jobId) as SchedulerEntry | undefined + let entry = context.entries.get(jobId) as SchedulerEntry | undefined if (!entry) { entry = createEntry() - context.set(jobId, entry) + context.entries.set(jobId, entry) + context.queue.push(jobId) } updateEntry?.(entry) + + if (dependencies) { + const depSet = new Set() + for (const dep of dependencies) { + if (dep !== jobId) { + depSet.add(dep) + } + } + context.dependencies.set(jobId, depSet) + } else if (!context.dependencies.has(jobId)) { + context.dependencies.set(jobId, new Set()) + } + + context.completed.delete(jobId) } + /** + * Flush all queued work for the provided context. Jobs that still have unmet + * dependencies are rotated to the back of the queue and retried on the next pass. + * If we complete a pass without running any job we throw to signal a dependency cycle. + */ flush(contextId: SchedulerContextId): void { - let context = this.contexts.get(contextId) - while (context) { - this.contexts.delete(contextId) + const context = this.contexts.get(contextId) + if (!context) return - for (const entry of context.values()) { - entry.run() + const { queue, entries, dependencies, completed } = context + + while (queue.length > 0) { + let ranThisPass = false + const jobsThisPass = queue.length + + for (let i = 0; i < jobsThisPass; i++) { + const jobId = queue.shift()! + const entry = entries.get(jobId) + if (!entry) { + dependencies.delete(jobId) + completed.delete(jobId) + continue + } + + const deps = dependencies.get(jobId) + const ready = + !deps || + deps.size === 0 || + [...deps].every((dep) => dep === jobId || completed.has(dep)) + + if (ready) { + entries.delete(jobId) + dependencies.delete(jobId) + entry.run() + completed.add(jobId) + ranThisPass = true + } else { + queue.push(jobId) + } } - context = this.contexts.get(contextId) + if (!ranThisPass) { + throw new Error( + `Scheduler detected unresolved dependencies for context ${String( + contextId + )}.` + ) + } } + + this.contexts.delete(contextId) } + /** + * Flush every context that still has pending work. Useful during tear-down to + * guarantee there are no lingering jobs. + */ flushAll(): void { - let next = this.contexts.keys().next() - while (!next.done) { - this.flush(next.value) - next = this.contexts.keys().next() + for (const contextId of Array.from(this.contexts.keys())) { + this.flush(contextId) } } + /** Clear any scheduled jobs for the given context. */ clear(contextId: SchedulerContextId): void { this.contexts.delete(contextId) } + /** Determine whether a context still has jobs waiting to be executed. */ hasPendingJobs(contextId: SchedulerContextId): boolean { - return this.contexts.has(contextId) + const context = this.contexts.get(contextId) + if (!context) return false + return context.entries.size > 0 } + /** Remove a single job from a context, cleaning up associated dependency data. */ clearJob(contextId: SchedulerContextId, jobId: unknown): void { const context = this.contexts.get(contextId) if (!context) return - context.delete(jobId) - if (context.size === 0) { + context.entries.delete(jobId) + context.dependencies.delete(jobId) + context.completed.delete(jobId) + context.queue = context.queue.filter((id) => id !== jobId) + + if (context.entries.size === 0) { this.contexts.delete(contextId) } } diff --git a/packages/db/tests/query/scheduler.test.ts b/packages/db/tests/query/scheduler.test.ts index 647510ee7..6eab940b2 100644 --- a/packages/db/tests/query/scheduler.test.ts +++ b/packages/db/tests/query/scheduler.test.ts @@ -272,7 +272,7 @@ describe(`live query scheduler`, () => { .join( { right: liveQueryB }, ({ left, right }) => eq(left.id, right.id), - `full` // Full join to ensure we would get multiple changes if this doesn't dedupe + `full` ) .select(({ left, right }) => ({ left: left?.value, @@ -285,8 +285,7 @@ describe(`live query scheduler`, () => { liveQueryB.preload(), liveQueryJoin.preload(), ]) - - const joinBatches = recordBatches(liveQueryJoin) + const baseRunCount = liveQueryJoin.utils.getRunCount() const tx = createTransaction({ mutationFn: async () => {}, @@ -298,12 +297,8 @@ describe(`live query scheduler`, () => { collectionB.insert({ id: 1, value: `B1` }) }) - expect(joinBatches.batches).toHaveLength(1) - expect(joinBatches.batches[0]![0]).toMatchObject({ type: `insert` }) - expect(joinBatches.batches[0]![0]!.value).toMatchObject({ - left: `A1`, - right: `B1`, - }) + expect(liveQueryJoin.toArray).toEqual([{ left: `A1`, right: `B1` }]) + expect(liveQueryJoin.utils.getRunCount()).toBe(baseRunCount + 1) tx.mutate(() => { collectionA.update(1, (draft) => { @@ -314,20 +309,8 @@ describe(`live query scheduler`, () => { }) }) - expect(joinBatches.batches).toHaveLength(2) - expect(joinBatches.batches[1]![0]).toMatchObject({ - type: `update`, - previousValue: { - left: `A1`, - right: `B1`, - }, - }) - expect(joinBatches.batches[1]![0]!.value).toMatchObject({ - left: `A1b`, - right: `B1b`, - }) - - joinBatches.unsubscribe() + expect(liveQueryJoin.toArray).toEqual([{ left: `A1b`, right: `B1b` }]) + expect(liveQueryJoin.utils.getRunCount()).toBe(baseRunCount + 2) tx.rollback() }) @@ -385,8 +368,7 @@ describe(`live query scheduler`, () => { }) await Promise.all([liveQueryA.preload(), hybridJoin.preload()]) - - const batches = recordBatches(hybridJoin) + const baseRunCount = hybridJoin.utils.getRunCount() const tx = createTransaction({ mutationFn: async () => {}, @@ -398,12 +380,8 @@ describe(`live query scheduler`, () => { collectionB.insert({ id: 7, value: `B7` }) }) - expect(batches.batches).toHaveLength(1) - expect(batches.batches[0]![0]).toMatchObject({ type: `insert` }) - expect(batches.batches[0]![0]!.value).toMatchObject({ - left: `A7`, - right: `B7`, - }) + expect(hybridJoin.toArray).toEqual([{ left: `A7`, right: `B7` }]) + expect(hybridJoin.utils.getRunCount()).toBe(baseRunCount + 1) tx.mutate(() => { collectionA.update(7, (draft) => { @@ -414,20 +392,8 @@ describe(`live query scheduler`, () => { }) }) - expect(batches.batches).toHaveLength(2) - expect(batches.batches[1]![0]).toMatchObject({ - type: `update`, - previousValue: { - left: `A7`, - right: `B7`, - }, - }) - expect(batches.batches[1]![0]!.value).toMatchObject({ - left: `A7b`, - right: `B7b`, - }) - - batches.unsubscribe() + expect(hybridJoin.toArray).toEqual([{ left: `A7b`, right: `B7b` }]) + expect(hybridJoin.utils.getRunCount()).toBe(baseRunCount + 2) tx.rollback() }) @@ -485,8 +451,7 @@ describe(`live query scheduler`, () => { }) await Promise.all([liveQueryA.preload(), join.preload()]) - - const batches = recordBatches(join) + const baseRunCount = join.utils.getRunCount() const tx = createTransaction({ mutationFn: async () => {}, @@ -498,16 +463,8 @@ describe(`live query scheduler`, () => { collectionA.insert({ id: 42, value: `left-later` }) }) - expect(batches.batches).toHaveLength(1) - expect(batches.batches[0]![0]).toMatchObject({ - type: `insert`, - value: { - left: `left-later`, - right: `right-first`, - }, - }) - - batches.unsubscribe() + expect(join.toArray).toEqual([{ left: `left-later`, right: `right-first` }]) + expect(join.utils.getRunCount()).toBe(baseRunCount + 1) tx.rollback() }) From 983cb4ea1e8b22161c95841bd92fbc652fc91599 Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Sat, 4 Oct 2025 15:46:38 +0100 Subject: [PATCH 8/9] better comments --- .../query/live/collection-config-builder.ts | 18 ++++++ .../db/src/query/live/collection-registry.ts | 28 ++++++++++ .../src/query/live/collection-subscriber.ts | 4 ++ packages/db/src/scheduler.ts | 55 +++++++------------ 4 files changed, 71 insertions(+), 34 deletions(-) diff --git a/packages/db/src/query/live/collection-config-builder.ts b/packages/db/src/query/live/collection-config-builder.ts index fe2f2442e..d3ef76ae7 100644 --- a/packages/db/src/query/live/collection-config-builder.ts +++ b/packages/db/src/query/live/collection-config-builder.ts @@ -209,6 +209,24 @@ export class CollectionConfigBuilder< } } + /** + * Schedules a graph run with the transaction-scoped scheduler. + * Ensures each builder runs at most once per transaction, with automatic dependency tracking + * to run parent queries before child queries. Outside a transaction, runs immediately. + * + * Multiple calls during a transaction are coalesced into a single execution. + * Dependencies are auto-discovered from subscribed live queries, or can be overridden. + * Load callbacks are combined when entries merge. + * + * @param config - Collection sync configuration with begin/commit/markReady callbacks + * @param syncState - The full sync state containing the D2 graph, inputs, and pipeline + * @param callback - Optional callback to load more data if needed (returns true when done) + * @param options - Optional scheduling configuration + * @param options.contextId - Transaction ID to group work; defaults to active transaction + * @param options.jobId - Unique identifier for this job; defaults to this builder instance + * @param options.alias - Source alias that triggered this schedule; adds alias-specific dependencies + * @param options.dependencies - Explicit dependency list; overrides auto-discovered dependencies + */ scheduleGraphRun( config: Parameters[`sync`]>[0], syncState: FullSyncState, diff --git a/packages/db/src/query/live/collection-registry.ts b/packages/db/src/query/live/collection-registry.ts index ee052fb5d..985465350 100644 --- a/packages/db/src/query/live/collection-registry.ts +++ b/packages/db/src/query/live/collection-registry.ts @@ -8,6 +8,13 @@ const collectionBuilderRegistry = new WeakMap< CollectionConfigBuilder >() +/** + * Attaches a builder to a config object via a non-enumerable symbol property. + * Used for dependency tracking between live queries. + * + * @param config - The collection config object to attach the builder to + * @param builder - The builder instance to attach + */ export function attachBuilderToConfig( config: object, builder: CollectionConfigBuilder @@ -20,12 +27,26 @@ export function attachBuilderToConfig( }) } +/** + * Retrieves the builder attached to a config object. + * + * @param config - The collection config object + * @returns The attached builder, or `undefined` if none exists + */ export function getBuilderFromConfig( config: object ): CollectionConfigBuilder | undefined { return (config as any)[BUILDER_SYMBOL] } +/** + * Registers a builder for a collection in the global registry. + * Used to detect when a live query depends on another live query, + * enabling the scheduler to ensure parent queries run first. + * + * @param collection - The collection to register the builder for + * @param builder - The builder that produces this collection + */ export function registerCollectionBuilder( collection: Collection, builder: CollectionConfigBuilder @@ -33,6 +54,13 @@ export function registerCollectionBuilder( collectionBuilderRegistry.set(collection, builder) } +/** + * Retrieves the builder registered for a collection. + * Used to discover dependencies when a live query subscribes to another live query. + * + * @param collection - The collection to look up + * @returns The registered builder, or `undefined` if none exists + */ export function getCollectionBuilder( collection: Collection ): CollectionConfigBuilder | undefined { diff --git a/packages/db/src/query/live/collection-subscriber.ts b/packages/db/src/query/live/collection-subscriber.ts index 1fc4952d0..b4989878b 100644 --- a/packages/db/src/query/live/collection-subscriber.ts +++ b/packages/db/src/query/live/collection-subscriber.ts @@ -198,6 +198,10 @@ export class CollectionSubscriber< } const trackedChanges = this.trackSentValues(changes, orderByInfo.comparator) + + // Cache the bound loader on the subscription using a symbol property. + // This ensures we pass the same function instance to the scheduler each time, + // allowing it to deduplicate callbacks when multiple changes arrive during a transaction. type SubscriptionWithLoader = CollectionSubscription & { [loadMoreCallbackSymbol]?: () => boolean } diff --git a/packages/db/src/scheduler.ts b/packages/db/src/scheduler.ts index 0b96146f1..b89958cc7 100644 --- a/packages/db/src/scheduler.ts +++ b/packages/db/src/scheduler.ts @@ -1,13 +1,11 @@ /** - * Identifier used to scope scheduled work. For live queries this maps directly to a - * transaction id so that all mutations performed inside a transaction are flushed together. + * Identifier used to scope scheduled work. Maps to a transaction id for live queries. */ export type SchedulerContextId = string | symbol /** - * Internal representation of a job queued by the scheduler. The mutable `state` - * allows callers to accumulate information (callbacks, configuration, etc.) before - * the job eventually runs, while `run` executes the actual work. + * Internal representation of a job queued by the scheduler. + * Mutable `state` accumulates information before `run` executes the work. */ interface SchedulerEntry { state: TState @@ -15,10 +13,8 @@ interface SchedulerEntry { } /** - * Options accepted by {@link Scheduler.schedule}. Jobs are identified by a `jobId` - * unique within a context and may declare dependencies on other jobs. The entry - * factory is invoked the first time a job is scheduled, and `updateEntry` lets the - * caller merge additional state into existing entries. + * Options for {@link Scheduler.schedule}. Jobs are identified by `jobId` within a context + * and may declare dependencies. `createEntry` is called once; `updateEntry` merges state. */ interface ScheduleOptions { contextId?: SchedulerContextId @@ -29,10 +25,8 @@ interface ScheduleOptions { } /** - * State stored per context (transaction). The queue preserves scheduling order, - * `entries` holds the jobs themselves, `dependencies` maps each job to the set of - * prerequisite jobs, and `completed` records which jobs have already run during the - * current flush. + * State per context. Queue preserves order, entries hold jobs, dependencies track + * prerequisites, and completed records which jobs have run during the current flush. */ interface SchedulerContextState { queue: Array @@ -42,23 +36,19 @@ interface SchedulerContextState { } /** - * Basic scoped scheduler that coalesces work by context and job. + * Scoped scheduler that coalesces work by context and job. * - * - A **context** (for example a transaction id) represents the batching boundary. - * Work scheduled with the same context id is queued until that context is flushed. - * - A **job id** deduplicates work inside a context. The first scheduled job determines - * the execution slot; subsequent schedules update the same entry but preserve order. - * - When no context id is provided the work executes immediately (no batching). + * - **context** (e.g. transaction id) defines the batching boundary; work is queued until flushed. + * - **job id** deduplicates work within a context; subsequent schedules update the entry. + * - Without a context id, work executes immediately. * - * Each job entry owns a mutable state object so callers can merge new data between - * schedule calls before the eventual `run()` executes. + * Each entry has mutable state so callers can merge data before `run()` executes. */ export class Scheduler { private contexts = new Map() /** - * Retrieve the state bucket for a context or create a fresh one if this is the - * first job scheduled for that context. + * Get or create the state bucket for a context. */ private getOrCreateContext( contextId: SchedulerContextId @@ -77,9 +67,8 @@ export class Scheduler { } /** - * Schedule work. When no context id is provided the job executes immediately. - * Otherwise we add/merge the job into the current transaction bucket so it can be - * flushed once all dependencies are satisfied. + * Schedule work. Without a context id, executes immediately. + * Otherwise queues the job to be flushed once dependencies are satisfied. */ schedule({ contextId, @@ -122,9 +111,8 @@ export class Scheduler { } /** - * Flush all queued work for the provided context. Jobs that still have unmet - * dependencies are rotated to the back of the queue and retried on the next pass. - * If we complete a pass without running any job we throw to signal a dependency cycle. + * Flush all queued work for a context. Jobs with unmet dependencies are retried. + * Throws if a pass completes without running any job (dependency cycle). */ flush(contextId: SchedulerContextId): void { const context = this.contexts.get(contextId) @@ -175,8 +163,7 @@ export class Scheduler { } /** - * Flush every context that still has pending work. Useful during tear-down to - * guarantee there are no lingering jobs. + * Flush all contexts with pending work. Useful during tear-down. */ flushAll(): void { for (const contextId of Array.from(this.contexts.keys())) { @@ -184,19 +171,19 @@ export class Scheduler { } } - /** Clear any scheduled jobs for the given context. */ + /** Clear all scheduled jobs for a context. */ clear(contextId: SchedulerContextId): void { this.contexts.delete(contextId) } - /** Determine whether a context still has jobs waiting to be executed. */ + /** Check if a context has pending jobs. */ hasPendingJobs(contextId: SchedulerContextId): boolean { const context = this.contexts.get(contextId) if (!context) return false return context.entries.size > 0 } - /** Remove a single job from a context, cleaning up associated dependency data. */ + /** Remove a single job from a context and clean up its dependencies. */ clearJob(contextId: SchedulerContextId, jobId: unknown): void { const context = this.contexts.get(contextId) if (!context) return From 4d5ff087fa55bd5ce4ba615a07682f2e1586e635 Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Sat, 4 Oct 2025 15:55:15 +0100 Subject: [PATCH 9/9] address chatgpt review --- packages/db/src/query/live/collection-config-builder.ts | 9 ++++++++- packages/db/src/scheduler.ts | 2 ++ packages/db/src/transactions.ts | 8 ++++++-- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/packages/db/src/query/live/collection-config-builder.ts b/packages/db/src/query/live/collection-config-builder.ts index d3ef76ae7..86d37b7a3 100644 --- a/packages/db/src/query/live/collection-config-builder.ts +++ b/packages/db/src/query/live/collection-config-builder.ts @@ -239,6 +239,8 @@ export class CollectionConfigBuilder< } ) { const contextId = options?.contextId ?? getActiveTransaction()?.id + // Use the builder instance as the job ID for deduplication. This is memory-safe + // because the scheduler's context Map is deleted after flushing (no long-term retention). const jobId = options?.jobId ?? this const dependencyBuilders = (() => { if (options?.dependencies) { @@ -307,7 +309,12 @@ export class CollectionConfigBuilder< } : undefined - this.maybeRunGraph(state.config, state.syncState, combinedLoader) + try { + this.maybeRunGraph(state.config, state.syncState, combinedLoader) + } finally { + // Clear callbacks after run to avoid carrying stale closures across transactions + state.loadCallbacks.clear() + } }, } } diff --git a/packages/db/src/scheduler.ts b/packages/db/src/scheduler.ts index b89958cc7..bb3f3e688 100644 --- a/packages/db/src/scheduler.ts +++ b/packages/db/src/scheduler.ts @@ -142,6 +142,8 @@ export class Scheduler { if (ready) { entries.delete(jobId) dependencies.delete(jobId) + // Run the job. If it throws, we don't mark it complete, allowing the + // error to propagate while maintaining scheduler state consistency. entry.run() completed.add(jobId) ranThisPass = true diff --git a/packages/db/src/transactions.ts b/packages/db/src/transactions.ts index c8df88b45..9fc79238b 100644 --- a/packages/db/src/transactions.ts +++ b/packages/db/src/transactions.ts @@ -189,8 +189,12 @@ function registerTransaction(tx: Transaction) { function unregisterTransaction(tx: Transaction) { // Always flush pending work for this transaction before removing it from // the ambient stack – this runs even if the mutate callback throws. - transactionScopedScheduler.flush(tx.id) - transactionStack = transactionStack.filter((t) => t.id !== tx.id) + // If flush throws (e.g., due to a job error), we still clean up the stack. + try { + transactionScopedScheduler.flush(tx.id) + } finally { + transactionStack = transactionStack.filter((t) => t.id !== tx.id) + } } function removeFromPendingList(tx: Transaction) {