|
| 1 | +-- +goose Up |
| 2 | +-- +goose StatementBegin |
| 3 | +-- v1_durable_event_log represents the log file for the durable event history |
| 4 | +-- of a durable task. This table stores metadata like sequence values for entries. |
| 5 | +-- |
| 6 | +-- Important: writers to v1_durable_event_log_entry should lock this row to increment the sequence value. |
| 7 | +CREATE TABLE v1_durable_event_log_file ( |
| 8 | + -- The id and inserted_at of the durable task which created this entry |
| 9 | + durable_task_id BIGINT NOT NULL, |
| 10 | + durable_task_inserted_at TIMESTAMPTZ NOT NULL, |
| 11 | + latest_inserted_at TIMESTAMPTZ NOT NULL, |
| 12 | + -- A monotonically increasing node id for this durable event log scoped to the durable task. |
| 13 | + -- Starts at 0 and increments by 1 for each new entry. |
| 14 | + latest_node_id BIGINT NOT NULL, |
| 15 | + -- The latest branch id. Branches represent different execution paths on a replay. |
| 16 | + latest_branch_id BIGINT NOT NULL, |
| 17 | + -- The parent node id which should be linked to the first node in a new branch to its parent node. |
| 18 | + latest_branch_first_parent_node_id BIGINT NOT NULL, |
| 19 | + CONSTRAINT v1_durable_event_log_file_pkey PRIMARY KEY (durable_task_id, durable_task_inserted_at) |
| 20 | +) PARTITION BY RANGE(durable_task_inserted_at); |
| 21 | + |
| 22 | +CREATE TYPE v1_durable_event_log_entry_kind AS ENUM ( |
| 23 | + 'RUN_TRIGGERED', |
| 24 | + 'WAIT_FOR_STARTED', |
| 25 | + 'MEMO_STARTED' |
| 26 | +); |
| 27 | + |
| 28 | +CREATE TABLE v1_durable_event_log_entry ( |
| 29 | + -- need an external id for consistency with the payload store logic (unfortunately) |
| 30 | + external_id UUID NOT NULL, |
| 31 | + -- The id and inserted_at of the durable task which created this entry |
| 32 | + durable_task_id BIGINT NOT NULL, |
| 33 | + durable_task_inserted_at TIMESTAMPTZ NOT NULL, |
| 34 | + -- The inserted_at time of this event from a DB clock perspective. |
| 35 | + -- Important: for consistency, this should always be auto-generated via the CURRENT_TIMESTAMP! |
| 36 | + inserted_at TIMESTAMPTZ NOT NULL, |
| 37 | + kind v1_durable_event_log_entry_kind, |
| 38 | + -- The node number in the durable event log. This represents a monotonically increasing |
| 39 | + -- sequence value generated from v1_durable_event_log_file.latest_node_id |
| 40 | + node_id BIGINT NOT NULL, |
| 41 | + -- The parent node id for this event, if any. This can be null. |
| 42 | + parent_node_id BIGINT, |
| 43 | + -- The branch id when this event was first seen. A durable event log can be a part of many branches. |
| 44 | + branch_id BIGINT NOT NULL, |
| 45 | + -- Todo: Associated data for this event should be stored in the v1_payload table! |
| 46 | + -- data JSONB, |
| 47 | + -- The hash of the data stored in the v1_payload table to check non-determinism violations. |
| 48 | + -- This can be null for event types that don't have associated data. |
| 49 | + -- TODO: we can add CHECK CONSTRAINT for event types that require data_hash to be non-null. |
| 50 | + data_hash BYTEA, |
| 51 | + -- Can discuss: adds some flexibility for future hash algorithms |
| 52 | + data_hash_alg TEXT, |
| 53 | + -- Access patterns: |
| 54 | + -- Definite: we'll query directly for the node_id when a durable task is replaying its log |
| 55 | + -- Possible: we may want to query a range of node_ids for a durable task |
| 56 | + -- Possible: we may want to query a range of inserted_ats for a durable task |
| 57 | + CONSTRAINT v1_durable_event_log_entry_pkey PRIMARY KEY (durable_task_id, durable_task_inserted_at, node_id) |
| 58 | +) PARTITION BY RANGE(durable_task_inserted_at); |
| 59 | + |
| 60 | +CREATE TYPE v1_durable_event_log_callback_kind AS ENUM ( |
| 61 | + 'RUN_COMPLETED', |
| 62 | + -- WAIT_FOR_COMPLETED can represent a durable sleep, an event, or some boolean combination of |
| 63 | + -- these. |
| 64 | + 'WAIT_FOR_COMPLETED', |
| 65 | + 'MEMO_COMPLETED' |
| 66 | +); |
| 67 | + |
| 68 | +-- v1_durable_event_log_callback stores callbacks that complete a durable event log entry. This needs to be stateful |
| 69 | +-- so that it persists across worker restarts for the same durable task. |
| 70 | +-- |
| 71 | +-- Implementation notes (should be removed or moved elsewhere once this is done): |
| 72 | +-- 1. Why not store callbacks in the core durable event log? Because their entries are not guaranteed to be |
| 73 | +-- stable. For example, if a task is replayed, we would need to remove old callback entries and insert new ones, |
| 74 | +-- in which case the durable event log would be changing history. |
| 75 | +-- 2. The two important access patterns are direct queries from the worker side to check if a callback is satisfied, |
| 76 | +-- and direct queries from the engine side to mark a callback as satisfied when we've satisfied a v1_match. Because |
| 77 | +-- of this, we likely need to add a `callback_key` field to the v1_match table. |
| 78 | +CREATE TABLE v1_durable_event_log_callback ( |
| 79 | + durable_task_id BIGINT NOT NULL, |
| 80 | + durable_task_inserted_at TIMESTAMPTZ NOT NULL, |
| 81 | + -- The inserted_at time of this callback from a DB clock perspective. |
| 82 | + -- Important: for consistency, this should always be auto-generated via the CURRENT_TIMESTAMP! |
| 83 | + inserted_at TIMESTAMPTZ NOT NULL, |
| 84 | + kind v1_durable_event_log_callback_kind, |
| 85 | + -- A unique, generated key for this callback. This key will change dependent on the callback kind. |
| 86 | + -- Important: this key should be easily queryable directly from the durable log writers but also the controllers |
| 87 | + -- that are checking if callbacks are satisfied. |
| 88 | + key TEXT NOT NULL, |
| 89 | + -- The associated log node id that this callback references. |
| 90 | + node_id BIGINT NOT NULL, |
| 91 | + -- Whether this callback has been seen by the engine or not. Note that is_satisfied _may_ change multiple |
| 92 | + -- times through the lifecycle of a callback, and readers should not assume that once it's true it will always be true. |
| 93 | + is_satisfied BOOLEAN NOT NULL DEFAULT FALSE, |
| 94 | + -- Access patterns: |
| 95 | + -- Definite: we'll query directly for the key when a worker is checking if a callback is satisfied |
| 96 | + -- Definite: we'll query directly for the key when a v1_match has been satisfied and we need to mark the callback as satisfied |
| 97 | + CONSTRAINT v1_durable_event_log_callback_pkey PRIMARY KEY (durable_task_id, durable_task_inserted_at, key) |
| 98 | +) PARTITION BY RANGE(durable_task_inserted_at); |
| 99 | +-- +goose StatementEnd |
| 100 | + |
| 101 | +-- +goose Down |
| 102 | +-- +goose StatementBegin |
| 103 | +DROP TABLE v1_durable_event_log_callback; |
| 104 | +DROP TABLE v1_durable_event_log_entry; |
| 105 | +DROP TABLE v1_durable_event_log_file; |
| 106 | +DROP TYPE v1_durable_event_log_entry_kind; |
| 107 | +DROP TYPE v1_durable_event_log_callback_kind; |
| 108 | +-- +goose StatementEnd |
0 commit comments