Skip to content

Commit ab29295

Browse files
perf: Put canister queues behind Arcs (#3305)
This makes unmutated canister queues basically free to clone, which is important during certification and, more importantly, before and after every message execution (when the `SystemState` is cloned in order to allow for rollbacks). After the changes in #3241 and #3304, under heavy subnet load (millions of best-effort messages) this accounts for about half the message execution time; dropping to well under 10% with this change.
1 parent 5716305 commit ab29295

File tree

1 file changed

+38
-27
lines changed
  • rs/replicated_state/src/canister_state

1 file changed

+38
-27
lines changed

rs/replicated_state/src/canister_state/queues.rs

Lines changed: 38 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ pub struct CanisterQueues {
141141
/// no corresponding message in the message pool; or entry in the compact
142142
/// response maps (which record the `CallbackIds` of expired / shed inbound
143143
/// best-effort responses).
144-
canister_queues: BTreeMap<CanisterId, (InputQueue, OutputQueue)>,
144+
canister_queues: BTreeMap<CanisterId, (Arc<InputQueue>, Arc<OutputQueue>)>,
145145

146146
/// Backing store for `canister_queues` references, combining a `MessagePool`
147147
/// and maps of compact responses (`CallbackIds` of expired / shed responses),
@@ -180,7 +180,7 @@ pub struct CanisterQueues {
180180
pub struct CanisterOutputQueuesIterator<'a> {
181181
/// Priority queue of non-empty output queues. The next message to be popped
182182
/// / peeked is the one at the front of the first queue.
183-
queues: VecDeque<(&'a CanisterId, &'a mut OutputQueue)>,
183+
queues: VecDeque<(&'a CanisterId, &'a mut Arc<OutputQueue>)>,
184184

185185
/// Mutable store holding the messages referenced by `queues`.
186186
store: &'a mut MessageStoreImpl,
@@ -194,7 +194,7 @@ impl<'a> CanisterOutputQueuesIterator<'a> {
194194
/// `CanisterQueues::canister_queues` (a map of `CanisterId` to an input queue,
195195
/// output queue pair) and `MessagePool`.
196196
fn new(
197-
queues: &'a mut BTreeMap<CanisterId, (InputQueue, OutputQueue)>,
197+
queues: &'a mut BTreeMap<CanisterId, (Arc<InputQueue>, Arc<OutputQueue>)>,
198198
store: &'a mut MessageStoreImpl,
199199
) -> Self {
200200
let queues: VecDeque<_> = queues
@@ -281,7 +281,7 @@ impl<'a> CanisterOutputQueuesIterator<'a> {
281281
/// Computes the number of (potentially stale) messages left in `queues`.
282282
///
283283
/// Time complexity: `O(n)`.
284-
fn compute_size(queues: &VecDeque<(&'a CanisterId, &'a mut OutputQueue)>) -> usize {
284+
fn compute_size(queues: &VecDeque<(&'a CanisterId, &'a mut Arc<OutputQueue>)>) -> usize {
285285
queues.iter().map(|(_, q)| q.len()).sum()
286286
}
287287
}
@@ -383,10 +383,15 @@ impl MessageStoreImpl {
383383
/// next non-stale reference.
384384
///
385385
/// Panics if the reference at the front of the queue is stale.
386-
fn queue_pop_and_advance<T: Clone>(&mut self, queue: &mut CanisterQueue<T>) -> Option<T>
386+
fn queue_pop_and_advance<T: Clone>(&mut self, queue: &mut Arc<CanisterQueue<T>>) -> Option<T>
387387
where
388388
MessageStoreImpl: MessageStore<T>,
389389
{
390+
if queue.len() == 0 {
391+
return None;
392+
}
393+
394+
let queue = Arc::make_mut(queue);
390395
let reference = queue.pop()?;
391396

392397
// Advance to the next non-stale reference.
@@ -548,7 +553,7 @@ trait InboundMessageStore: MessageStore<CanisterInput> {
548553
/// Time complexity: `O(n * log(n))`.
549554
fn callbacks_with_enqueued_response(
550555
&self,
551-
canister_queues: &BTreeMap<CanisterId, (InputQueue, OutputQueue)>,
556+
canister_queues: &BTreeMap<CanisterId, (Arc<InputQueue>, Arc<OutputQueue>)>,
552557
) -> Result<BTreeSet<CallbackId>, String>;
553558
}
554559

@@ -561,7 +566,7 @@ impl InboundMessageStore for MessageStoreImpl {
561566

562567
fn callbacks_with_enqueued_response(
563568
&self,
564-
canister_queues: &BTreeMap<CanisterId, (InputQueue, OutputQueue)>,
569+
canister_queues: &BTreeMap<CanisterId, (Arc<InputQueue>, Arc<OutputQueue>)>,
565570
) -> Result<BTreeSet<CallbackId>, String> {
566571
let mut callbacks = BTreeSet::new();
567572
canister_queues
@@ -654,7 +659,11 @@ impl CanisterQueues {
654659
F: FnMut(&CanisterId, &RequestOrResponse) -> Result<(), ()>,
655660
{
656661
for (canister_id, (_, queue)) in self.canister_queues.iter_mut() {
657-
while let Some(reference) = queue.peek() {
662+
loop {
663+
let Some(reference) = queue.peek() else {
664+
break;
665+
};
666+
let queue = Arc::make_mut(queue);
658667
let Some(msg) = self.store.pool.get(reference) else {
659668
// Expired / dropped message. Pop it and advance.
660669
assert_eq!(Some(reference), queue.pop());
@@ -740,10 +749,10 @@ impl CanisterQueues {
740749
}
741750
// Safe to already (attempt to) reserve an output slot here, as the `push()`
742751
// below is guaranteed to succeed due to the check above.
743-
if let Err(e) = output_queue.try_reserve_response_slot() {
752+
if let Err(e) = Arc::make_mut(output_queue).try_reserve_response_slot() {
744753
return Err((e, msg));
745754
}
746-
input_queue
755+
Arc::make_mut(input_queue)
747756
}
748757
RequestOrResponse::Response(ref response) => {
749758
match self.canister_queues.get_mut(&sender) {
@@ -768,7 +777,7 @@ impl CanisterQueues {
768777
return Ok(false);
769778
}
770779
}
771-
queue
780+
Arc::make_mut(queue)
772781
}
773782

774783
// Queue does not exist or has no reserved slot for this response.
@@ -860,7 +869,7 @@ impl CanisterQueues {
860869
}
861870

862871
let reference = self.store.push_inbound_timeout_response(callback_id);
863-
input_queue.push_response(reference);
872+
Arc::make_mut(input_queue).push_response(reference);
864873
self.queue_stats.on_push_timeout_response();
865874

866875
// Add sender canister ID to the appropriate input schedule queue if it is not
@@ -1076,15 +1085,15 @@ impl CanisterQueues {
10761085
if let Err(e) = output_queue.check_has_request_slot() {
10771086
return Err((e, request));
10781087
}
1079-
if let Err(e) = input_queue.try_reserve_response_slot() {
1088+
if let Err(e) = Arc::make_mut(input_queue).try_reserve_response_slot() {
10801089
return Err((e, request));
10811090
}
10821091

10831092
self.queue_stats
10841093
.on_push_request(&request, Context::Outbound);
10851094

10861095
let reference = self.store.pool.insert_outbound_request(request, time);
1087-
output_queue.push_request(reference);
1096+
Arc::make_mut(output_queue).push_request(reference);
10881097

10891098
debug_assert_eq!(Ok(()), self.test_invariants());
10901099
Ok(())
@@ -1113,7 +1122,7 @@ impl CanisterQueues {
11131122

11141123
let (input_queue, _output_queue) =
11151124
get_or_insert_queues(&mut self.canister_queues, &request.receiver);
1116-
input_queue.try_reserve_response_slot()?;
1125+
Arc::make_mut(input_queue).try_reserve_response_slot()?;
11171126
self.queue_stats
11181127
.on_push_request(&request, Context::Outbound);
11191128
debug_assert_eq!(Ok(()), self.test_invariants());
@@ -1172,7 +1181,7 @@ impl CanisterQueues {
11721181
.expect("pushing response into inexistent output queue")
11731182
.1;
11741183
let reference = self.store.pool.insert_outbound_response(response);
1175-
output_queue.push_response(reference);
1184+
Arc::make_mut(output_queue).push_response(reference);
11761185

11771186
debug_assert_eq!(Ok(()), self.test_invariants());
11781187
}
@@ -1491,12 +1500,13 @@ impl CanisterQueues {
14911500
.expect("No matching queue for dropped message.");
14921501

14931502
if input_queue.peek() == Some(reference) {
1503+
let input_queue = Arc::make_mut(input_queue);
14941504
input_queue.pop();
14951505
self.store.queue_advance(input_queue);
14961506
}
14971507

14981508
// Release the outbound response slot.
1499-
output_queue.release_reserved_response_slot();
1509+
Arc::make_mut(output_queue).release_reserved_response_slot();
15001510
self.queue_stats.on_drop_input_request(&request);
15011511
}
15021512
}
@@ -1530,6 +1540,7 @@ impl CanisterQueues {
15301540
// a queue containing references `[1, 2]`; `1` and `2` expire as part of the
15311541
// same `time_out_messages()` call; `on_message_dropped(1)` will also pop `2`).
15321542
if output_queue.peek() == Some(reference) {
1543+
let output_queue = Arc::make_mut(output_queue);
15331544
output_queue.pop();
15341545
self.store.queue_advance(output_queue);
15351546
}
@@ -1550,7 +1561,7 @@ impl CanisterQueues {
15501561
.callbacks_with_enqueued_response
15511562
.insert(response.originator_reply_callback));
15521563
let reference = self.store.insert_inbound(response.into());
1553-
input_queue.push_response(reference);
1564+
Arc::make_mut(input_queue).push_response(reference);
15541565

15551566
// If the input queue is not already in a sender schedule, add it.
15561567
if input_queue.len() == 1 {
@@ -1602,7 +1613,7 @@ impl CanisterQueues {
16021613
self.input_schedule.test_invariants(
16031614
self.canister_queues
16041615
.iter()
1605-
.map(|(canister_id, (input_queue, _))| (canister_id, input_queue)),
1616+
.map(|(canister_id, (input_queue, _))| (canister_id, &**input_queue)),
16061617
&input_queue_type_fn,
16071618
)
16081619
}
@@ -1659,7 +1670,7 @@ impl CanisterQueues {
16591670
///
16601671
/// Time complexity: `O(canister_queues.len())`.
16611672
fn calculate_queue_stats(
1662-
canister_queues: &BTreeMap<CanisterId, (InputQueue, OutputQueue)>,
1673+
canister_queues: &BTreeMap<CanisterId, (Arc<InputQueue>, Arc<OutputQueue>)>,
16631674
guaranteed_response_memory_reservations: usize,
16641675
transient_stream_guaranteed_responses_size_bytes: usize,
16651676
) -> QueueStats {
@@ -1684,12 +1695,12 @@ impl CanisterQueues {
16841695
/// Written as a free function in order to avoid borrowing the full
16851696
/// `CanisterQueues`, which then requires looking up the queues again.
16861697
fn get_or_insert_queues<'a>(
1687-
canister_queues: &'a mut BTreeMap<CanisterId, (InputQueue, OutputQueue)>,
1698+
canister_queues: &'a mut BTreeMap<CanisterId, (Arc<InputQueue>, Arc<OutputQueue>)>,
16881699
canister_id: &CanisterId,
1689-
) -> (&'a mut InputQueue, &'a mut OutputQueue) {
1700+
) -> (&'a mut Arc<InputQueue>, &'a mut Arc<OutputQueue>) {
16901701
let (input_queue, output_queue) = canister_queues.entry(*canister_id).or_insert_with(|| {
1691-
let input_queue = CanisterQueue::new(DEFAULT_QUEUE_CAPACITY);
1692-
let output_queue = CanisterQueue::new(DEFAULT_QUEUE_CAPACITY);
1702+
let input_queue = Arc::new(CanisterQueue::new(DEFAULT_QUEUE_CAPACITY));
1703+
let output_queue = Arc::new(CanisterQueue::new(DEFAULT_QUEUE_CAPACITY));
16931704
(input_queue, output_queue)
16941705
});
16951706
(input_queue, output_queue)
@@ -1749,8 +1760,8 @@ impl From<&CanisterQueues> for pb_queues::CanisterQueues {
17491760
.iter()
17501761
.map(|(canid, (iq, oq))| CanisterQueuePair {
17511762
canister_id: Some(pb_types::CanisterId::from(*canid)),
1752-
input_queue: Some(iq.into()),
1753-
output_queue: Some(oq.into()),
1763+
input_queue: Some((&**iq).into()),
1764+
output_queue: Some((&**oq).into()),
17541765
})
17551766
.collect(),
17561767
pool: if item.store.pool != MessagePool::default() {
@@ -1826,7 +1837,7 @@ impl TryFrom<(pb_queues::CanisterQueues, &dyn CheckpointLoadingMetrics)> for Can
18261837
}
18271838
});
18281839

1829-
Ok((canister_id, (iq, oq)))
1840+
Ok((canister_id, (Arc::new(iq), Arc::new(oq))))
18301841
})
18311842
.collect::<Result<_, Self::Error>>()?;
18321843

0 commit comments

Comments
 (0)