Skip to content

Commit 7d66b38

Browse files
committed
Feat: Durable event log models (#2940)
* feat: add new models * feat: migration * chore: generate * fix: linter * fix: couple types * Feat: Initial work on CRUD operations for durable events (#2943) * feat: initial query work * feat: first pass at durable events repo + queries * feat: add new payload type for durable event data * chore: gen * fix: payload key * fix: lint
1 parent 4054d17 commit 7d66b38

File tree

9 files changed

+1284
-6
lines changed

9 files changed

+1284
-6
lines changed
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
-- +goose Up
2+
-- +goose StatementBegin
3+
-- v1_durable_event_log represents the log file for the durable event history
4+
-- of a durable task. This table stores metadata like sequence values for entries.
5+
--
6+
-- Important: writers to v1_durable_event_log_entry should lock this row to increment the sequence value.
7+
CREATE TABLE v1_durable_event_log_file (
8+
-- The id and inserted_at of the durable task which created this entry
9+
durable_task_id BIGINT NOT NULL,
10+
durable_task_inserted_at TIMESTAMPTZ NOT NULL,
11+
latest_inserted_at TIMESTAMPTZ NOT NULL,
12+
-- A monotonically increasing node id for this durable event log scoped to the durable task.
13+
-- Starts at 0 and increments by 1 for each new entry.
14+
latest_node_id BIGINT NOT NULL,
15+
-- The latest branch id. Branches represent different execution paths on a replay.
16+
latest_branch_id BIGINT NOT NULL,
17+
-- The parent node id which should be linked to the first node in a new branch to its parent node.
18+
latest_branch_first_parent_node_id BIGINT NOT NULL,
19+
CONSTRAINT v1_durable_event_log_file_pkey PRIMARY KEY (durable_task_id, durable_task_inserted_at)
20+
) PARTITION BY RANGE(durable_task_inserted_at);
21+
22+
CREATE TYPE v1_durable_event_log_entry_kind AS ENUM (
23+
'RUN_TRIGGERED',
24+
'WAIT_FOR_STARTED',
25+
'MEMO_STARTED'
26+
);
27+
28+
CREATE TABLE v1_durable_event_log_entry (
29+
-- need an external id for consistency with the payload store logic (unfortunately)
30+
external_id UUID NOT NULL,
31+
-- The id and inserted_at of the durable task which created this entry
32+
durable_task_id BIGINT NOT NULL,
33+
durable_task_inserted_at TIMESTAMPTZ NOT NULL,
34+
-- The inserted_at time of this event from a DB clock perspective.
35+
-- Important: for consistency, this should always be auto-generated via the CURRENT_TIMESTAMP!
36+
inserted_at TIMESTAMPTZ NOT NULL,
37+
kind v1_durable_event_log_entry_kind,
38+
-- The node number in the durable event log. This represents a monotonically increasing
39+
-- sequence value generated from v1_durable_event_log_file.latest_node_id
40+
node_id BIGINT NOT NULL,
41+
-- The parent node id for this event, if any. This can be null.
42+
parent_node_id BIGINT,
43+
-- The branch id when this event was first seen. A durable event log can be a part of many branches.
44+
branch_id BIGINT NOT NULL,
45+
-- Todo: Associated data for this event should be stored in the v1_payload table!
46+
-- data JSONB,
47+
-- The hash of the data stored in the v1_payload table to check non-determinism violations.
48+
-- This can be null for event types that don't have associated data.
49+
-- TODO: we can add CHECK CONSTRAINT for event types that require data_hash to be non-null.
50+
data_hash BYTEA,
51+
-- Can discuss: adds some flexibility for future hash algorithms
52+
data_hash_alg TEXT,
53+
-- Access patterns:
54+
-- Definite: we'll query directly for the node_id when a durable task is replaying its log
55+
-- Possible: we may want to query a range of node_ids for a durable task
56+
-- Possible: we may want to query a range of inserted_ats for a durable task
57+
CONSTRAINT v1_durable_event_log_entry_pkey PRIMARY KEY (durable_task_id, durable_task_inserted_at, node_id)
58+
) PARTITION BY RANGE(durable_task_inserted_at);
59+
60+
CREATE TYPE v1_durable_event_log_callback_kind AS ENUM (
61+
'RUN_COMPLETED',
62+
-- WAIT_FOR_COMPLETED can represent a durable sleep, an event, or some boolean combination of
63+
-- these.
64+
'WAIT_FOR_COMPLETED',
65+
'MEMO_COMPLETED'
66+
);
67+
68+
-- v1_durable_event_log_callback stores callbacks that complete a durable event log entry. This needs to be stateful
69+
-- so that it persists across worker restarts for the same durable task.
70+
--
71+
-- Implementation notes (should be removed or moved elsewhere once this is done):
72+
-- 1. Why not store callbacks in the core durable event log? Because their entries are not guaranteed to be
73+
-- stable. For example, if a task is replayed, we would need to remove old callback entries and insert new ones,
74+
-- in which case the durable event log would be changing history.
75+
-- 2. The two important access patterns are direct queries from the worker side to check if a callback is satisfied,
76+
-- and direct queries from the engine side to mark a callback as satisfied when we've satisfied a v1_match. Because
77+
-- of this, we likely need to add a `callback_key` field to the v1_match table.
78+
CREATE TABLE v1_durable_event_log_callback (
79+
durable_task_id BIGINT NOT NULL,
80+
durable_task_inserted_at TIMESTAMPTZ NOT NULL,
81+
-- The inserted_at time of this callback from a DB clock perspective.
82+
-- Important: for consistency, this should always be auto-generated via the CURRENT_TIMESTAMP!
83+
inserted_at TIMESTAMPTZ NOT NULL,
84+
kind v1_durable_event_log_callback_kind,
85+
-- A unique, generated key for this callback. This key will change dependent on the callback kind.
86+
-- Important: this key should be easily queryable directly from the durable log writers but also the controllers
87+
-- that are checking if callbacks are satisfied.
88+
key TEXT NOT NULL,
89+
-- The associated log node id that this callback references.
90+
node_id BIGINT NOT NULL,
91+
-- Whether this callback has been seen by the engine or not. Note that is_satisfied _may_ change multiple
92+
-- times through the lifecycle of a callback, and readers should not assume that once it's true it will always be true.
93+
is_satisfied BOOLEAN NOT NULL DEFAULT FALSE,
94+
-- Access patterns:
95+
-- Definite: we'll query directly for the key when a worker is checking if a callback is satisfied
96+
-- Definite: we'll query directly for the key when a v1_match has been satisfied and we need to mark the callback as satisfied
97+
CONSTRAINT v1_durable_event_log_callback_pkey PRIMARY KEY (durable_task_id, durable_task_inserted_at, key)
98+
) PARTITION BY RANGE(durable_task_inserted_at);
99+
-- +goose StatementEnd
100+
101+
-- +goose Down
102+
-- +goose StatementBegin
103+
DROP TABLE v1_durable_event_log_callback;
104+
DROP TABLE v1_durable_event_log_entry;
105+
DROP TABLE v1_durable_event_log_file;
106+
DROP TYPE v1_durable_event_log_entry_kind;
107+
DROP TYPE v1_durable_event_log_callback_kind;
108+
-- +goose StatementEnd
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
-- +goose Up
2+
-- +goose StatementBegin
3+
ALTER TYPE v1_payload_type ADD VALUE IF NOT EXISTS 'DURABLE_EVENT_LOG_ENTRY_DATA';
4+
-- +goose StatementEnd
5+
6+
-- +goose Down
7+
-- +goose StatementBegin
8+
-- Intentionally no down
9+
-- +goose StatementEnd

0 commit comments

Comments
 (0)