CQ: Reorder and document #vqstate{}

lhoguin · lhoguin · commit 50e74d8b08f9 · 2025-11-25T15:08:48.000+01:00
diff --git a/deps/rabbit/src/rabbit_variable_queue.erl b/deps/rabbit/src/rabbit_variable_queue.erl
@@ -123,57 +123,113 @@
 
 -behaviour(rabbit_backing_queue).
 
--record(vqstate,
-        { q_head,
-          q_tail,
-          next_seq_id,
-          %% seq_id() of first undelivered message
-          %% everything before this seq_id() was delivered at least once
-          next_deliver_seq_id,
-          ram_pending_ack,    %% msgs still in RAM
-          disk_pending_ack,   %% msgs in store, paged out
-          index_state,
-          store_state,
-          msg_store_clients,
-          durable,
-          transient_threshold,
-          qi_embed_msgs_below,
-
-          bytes,              %% w/o unacked
-          unacked_bytes,
-          persistent_count,   %% w   unacked
-          persistent_bytes,   %% w   unacked
-
-          ram_msg_count,      %% w/o unacked
-          ram_bytes,          %% w   unacked
-          out_counter,
-          in_counter,
-          rates,
-          %% There are two confirms paths: either store/index produce confirms
-          %% separately (v2 with per-vhost message store) or the confirms
-          %% are produced all at once while syncing/flushing (v2 with per-queue
-          %% message store). The latter is more efficient as it avoids many
-          %% sets operations.
-          msgs_on_disk,
-          msg_indices_on_disk,
-          unconfirmed,
-          confirmed,
-          ack_out_counter,
-          ack_in_counter,
-          %% Unlike the other counters these two do not feed into
-          %% #rates{} and get reset
-          disk_read_count,
-          disk_write_count,
-
-          %% Fast path for confirms handling. Instead of having
-          %% index/store keep track of confirms separately and
-          %% doing intersect/subtract/union we just put the messages
-          %% here and on sync move them to 'confirmed'.
-          unconfirmed_simple,
-          %% Queue data is grouped by VHost. We need to store it
-          %% to work with queue index.
-          virtual_host
-        }).
+-record(vqstate, {
+    %% Head of the queue. Index information has been loaded into
+    %% memory, and message body may have been depending on size.
+    q_head,
+    %% Tail of the queue, fully on disk.
+    q_tail,
+
+    %% SeqId of the next message published.
+    next_seq_id,
+    %% Everything before this SeqId was delivered at least once.
+    %% @todo Do we really need this if we add delivery_count?
+    %%       No we don't, we will just check delivery_count to know if was already delivered (delivery_count > 1).
+    %%       NO!! We can also remove the is_delivered in the msg_status since that value --doesn't-- DOES!! survive restarts.
+    %%       Actually we are using next_deliver_seq_id to know whether a message was already delivered and that survives restarts.
+    %%       But we could very well do the same with a delivery_count map. So making the delivery_count map survive restarts
+    %%       (and properly clean up on restart by removing transients) is the key to getting rid of these things.
+    next_deliver_seq_id,
+
+    %% Messages pending acks. These messages have been delivered to the channel
+    %% and we are expecting an ack (or requeue) back. Messages are in ram or disk
+    %% depending on whether the #msg_status{} record contains the message body.
+    %% Typically only smaller message bodies are kept in memory, larger ones are
+    %% read only when needed.
+    ram_pending_ack,
+    disk_pending_ack,
+
+    %% Index, queue store and shared message store states. In the latter's case
+    %% since the shared message store is separate processes, the state only
+    %% contains information to access it.
+    index_state,
+    store_state,
+    msg_store_clients,
+
+    %% Whether the queue is durable. Used to determine whether messages are
+    %% truly persistent (both messages and queue must be durable).
+    durable,
+
+    %% We must keep the virtual host information around in order to write
+    %% terms when terminating as the terms file is per-vhost.
+    virtual_host,
+
+    %% SeqId of the first persistent message. Determined during recovery and
+    %% used to identify which transient messages belong to a previous
+    %% incarnation of the node. We don't remove transient messages during
+    %% recovery to keep recovery fast (otherwise we'd have to go over the
+    %% entire queue contents), instead we mark where we left off and drop
+    %% the messages when they would have been consumed.
+    transient_threshold,
+
+    %% Maximum size of messages written to the queue store. The queue
+    %% store is meant to contain smaller messages, while larger messages
+    %% go to the shared message store. The shared message store benefits
+    %% from mechanisms like compaction and deduplication.
+    qi_embed_msgs_below,
+
+    %% There are two confirms paths: either store/index produce confirms
+    %% separately (per-vhost message store) or the confirms
+    %% are produced all at once while syncing/flushing (per-queue
+    %% message store). The latter is more efficient as it avoids many
+    %% sets operations.
+    msgs_on_disk,
+    msg_indices_on_disk,
+    unconfirmed,
+    confirmed,
+    %% Fast path for confirms handling. Instead of having
+    %% index/store keep track of confirms separately and
+    %% doing intersect/subtract/union we just put the messages
+    %% here and on sync move them to 'confirmed'.
+    unconfirmed_simple,
+
+    %% Metrics that are also used for sanity checking.
+    %%
+    %% They measure (with "unacked" meaning "messages pending acks"):
+    %% * the number of bytes in the queue (excluding unacked)
+    %% * the number of bytes in messages pending acks
+    %% * the number of persistent messages (including unacked)
+    %% * the number of bytes for persistent messages (including unacked)
+    %% * the number of messages currently in memory
+    %%   (excluding unacked because we can get those via `map_size(RPA)`)
+    %% * the number of bytes of messages currently in memory (including unacked)
+    %%
+    %% The total number of bytes of messages in the queue (including unacked)
+    %% is bytes + unacked_bytes.
+    %%
+    %% Messages can be both persistent and in memory at the same time,
+    %% for example when they are close to being delivered.
+    bytes,
+    unacked_bytes,
+    persistent_count,
+    persistent_bytes,
+    ram_msg_count,
+    ram_bytes,
+
+    %% Metrics for outgoing and ingoing messages rates.
+    %%
+    %% Counters get incremented per event and then an average is calculated
+    %% periodically into the #rates{} record.
+    out_counter,
+    in_counter,
+    ack_out_counter,
+    ack_in_counter,
+    rates,
+
+    %% Metrics totalling message reads and writes from/to disk.
+    disk_read_count,
+    disk_write_count
+}).
 
 -record(rates, { in, out, ack_in, ack_out, timestamp }).