Skip to content

Commit e0572c1

Browse files
perf(node-rewards): batch-fetch node operators and data centers in reward calculation (#9310)
## Summary Replace per-node individual `get_value()` calls for `NodeOperatorRecord` and `DataCenterRecord` with two bulk `get_key_family_with_values()` prefix scans. Results are collected into in-memory `HashMap`s and looked up per node. **Before:** Each of the ~35 daily invocations did 1 prefix scan (nodes) + ~1300 `get_value()` (operators) + ~1300 `get_value()` (data centers) = **~2600 individual registry reads per day**, totaling ~91,000 registry operations per reward calculation. Each `get_value()` internally performs its own prefix scan of the `StableBTreeMap`. **After:** Each daily invocation does 3 prefix scans (nodes + operators + data centers) = **~105 total** over 35 days. The per-node lookups happen against in-memory `HashMap`s at negligible cost. ## Testing Deploying the latest commit of main and my PR shows some improvement but as mentioned by @pietrodimarco-dfinity probably not enough. ``` # latest commit on main [964. 2026-03-12T15:53:12.996491436Z]: get_node_providers_rewards instructions: 30308898790 # my PR [911. 2026-03-12T15:49:41.901088032Z]: get_node_providers_rewards instructions: 25378475054 ```
1 parent 67311aa commit e0572c1

File tree

2 files changed

+77
-102
lines changed

2 files changed

+77
-102
lines changed

rs/node_rewards/canister/src/registry_querier.rs

Lines changed: 77 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -7,27 +7,21 @@ use ic_protobuf::registry::node::v1::{NodeRecord, NodeRewardType};
77
use ic_protobuf::registry::node_operator::v1::NodeOperatorRecord;
88
use ic_protobuf::registry::node_rewards::v2::NodeRewardsTable;
99
use ic_protobuf::registry::subnet::v1::SubnetListRecord;
10-
use ic_registry_canister_client::{CanisterRegistryClient, get_decoded_value};
10+
use ic_registry_canister_client::CanisterRegistryClient;
1111
use ic_registry_keys::{
12-
NODE_RECORD_KEY_PREFIX, NODE_REWARDS_TABLE_KEY, make_data_center_record_key,
13-
make_node_operator_record_key, make_subnet_list_record_key,
12+
DATA_CENTER_KEY_PREFIX, NODE_OPERATOR_RECORD_KEY_PREFIX, NODE_RECORD_KEY_PREFIX,
13+
NODE_REWARDS_TABLE_KEY, make_subnet_list_record_key,
1414
};
1515
use ic_types::registry::RegistryClientError;
16-
use rewards_calculation::types::{Region, RewardableNode, UnixTsNanos};
17-
use std::collections::BTreeMap;
16+
use rewards_calculation::types::{RewardableNode, UnixTsNanos};
17+
use std::collections::{BTreeMap, HashMap};
1818
use std::str::FromStr;
1919
use std::sync::Arc;
2020

2121
pub struct RegistryQuerier {
2222
registry_client: Arc<dyn CanisterRegistryClient>,
2323
}
2424

25-
struct NodeOperatorData {
26-
node_provider_id: PrincipalId,
27-
dc_id: String,
28-
region: Region,
29-
}
30-
3125
impl RegistryQuerier {
3226
pub fn new(registry_client: Arc<dyn CanisterRegistryClient>) -> Self {
3327
RegistryQuerier { registry_client }
@@ -79,6 +73,51 @@ impl RegistryQuerier {
7973
})
8074
.unwrap_or_default()
8175
}
76+
77+
/// Returns all NodeOperatorRecords at the specified version, keyed by operator PrincipalId.
78+
///
79+
/// This uses a single bulk prefix scan instead of individual lookups per operator,
80+
/// which is significantly cheaper in terms of instructions.
81+
fn all_node_operators(
82+
&self,
83+
version: RegistryVersion,
84+
) -> Result<HashMap<PrincipalId, NodeOperatorRecord>, RegistryClientError> {
85+
let prefix_len = NODE_OPERATOR_RECORD_KEY_PREFIX.len();
86+
let records = self
87+
.registry_client
88+
.get_key_family_with_values(NODE_OPERATOR_RECORD_KEY_PREFIX, version)?;
89+
let mut result = HashMap::with_capacity(records.len());
90+
for (key, value) in records {
91+
let principal =
92+
PrincipalId::from_str(&key[prefix_len..]).expect("Invalid node operator key");
93+
let record = NodeOperatorRecord::decode(value.as_slice())
94+
.expect("Failed to decode NodeOperatorRecord");
95+
result.insert(principal, record);
96+
}
97+
Ok(result)
98+
}
99+
100+
/// Returns all DataCenterRecords at the specified version, keyed by DC ID.
101+
///
102+
/// This uses a single bulk prefix scan instead of individual lookups per data center,
103+
/// which is significantly cheaper in terms of instructions.
104+
fn all_data_centers(
105+
&self,
106+
version: RegistryVersion,
107+
) -> Result<HashMap<String, DataCenterRecord>, RegistryClientError> {
108+
let prefix_len = DATA_CENTER_KEY_PREFIX.len();
109+
let records = self
110+
.registry_client
111+
.get_key_family_with_values(DATA_CENTER_KEY_PREFIX, version)?;
112+
let mut result = HashMap::with_capacity(records.len());
113+
for (key, value) in records {
114+
let dc_id = key[prefix_len..].to_string();
115+
let record = DataCenterRecord::decode(value.as_slice())
116+
.expect("Failed to decode DataCenterRecord");
117+
result.insert(dc_id, record);
118+
}
119+
Ok(result)
120+
}
82121
}
83122

84123
// Exposed API Methods
@@ -87,6 +126,9 @@ impl RegistryQuerier {
87126
///
88127
/// A node is considered rewardable on a specific UTC day if it exists in the last registry
89128
/// version of that day.
129+
///
130+
/// Performance: This method bulk-fetches all NodeOperatorRecords and DataCenterRecords
131+
/// in two prefix scans rather than doing individual lookups per node
90132
pub fn get_rewardable_nodes_per_provider(
91133
&self,
92134
date: &NaiveDate,
@@ -96,24 +138,38 @@ impl RegistryQuerier {
96138
let registry_version = self
97139
.version_for_timestamp_nanoseconds(last_unix_timestamp_nanoseconds(date))
98140
.unwrap();
141+
142+
// Bulk-fetch all data upfront instead of per-node individual lookups.
99143
let nodes = self.nodes_in_version(registry_version)?;
144+
let all_operators = self.all_node_operators(registry_version)?;
145+
let all_data_centers = self.all_data_centers(registry_version)?;
100146

101147
for (node_id, node_record) in nodes {
102148
let node_operator_id: PrincipalId = node_record
103149
.node_operator_id
104150
.try_into()
105151
.expect("Failed to parse PrincipalId from node operator ID");
106152

107-
let Some(NodeOperatorData {
108-
node_provider_id,
109-
dc_id,
110-
region,
111-
..
112-
}) = self.node_operator_data(node_operator_id, registry_version)?
113-
else {
114-
ic_cdk::println!("Node {} has no NodeOperatorData: skipping", node_id);
153+
let Some(node_operator_record) = all_operators.get(&node_operator_id) else {
154+
ic_cdk::println!("Node {} has no NodeOperatorRecord: skipping", node_id);
155+
continue;
156+
};
157+
158+
let Some(data_center_record) = all_data_centers.get(&node_operator_record.dc_id) else {
159+
ic_cdk::println!(
160+
"Node {} has NodeOperator but no DataCenterRecord for dc_id {}: skipping",
161+
node_id,
162+
node_operator_record.dc_id
163+
);
115164
continue;
116165
};
166+
167+
let node_provider_id: PrincipalId = node_operator_record
168+
.node_provider_principal_id
169+
.clone()
170+
.try_into()
171+
.expect("Failed to parse PrincipalId");
172+
117173
if let Some(provider_filter) = provider_filter
118174
&& &node_provider_id != provider_filter
119175
{
@@ -133,8 +189,8 @@ impl RegistryQuerier {
133189
.push(RewardableNode {
134190
node_id,
135191
node_reward_type,
136-
dc_id: dc_id.clone(),
137-
region: region.clone(),
192+
dc_id: node_operator_record.dc_id.clone(),
193+
region: data_center_record.region.clone(),
138194
});
139195
}
140196
Ok(rewardable_nodes_per_provider)
@@ -163,49 +219,6 @@ impl RegistryQuerier {
163219
.collect();
164220
Ok(nodes)
165221
}
166-
167-
fn node_operator_data(
168-
&self,
169-
node_operator: PrincipalId,
170-
version: RegistryVersion,
171-
) -> Result<Option<NodeOperatorData>, RegistryClientError> {
172-
let node_operator_record_key = make_node_operator_record_key(node_operator);
173-
let Some(node_operator_record) = get_decoded_value::<NodeOperatorRecord>(
174-
&*self.registry_client,
175-
node_operator_record_key.as_str(),
176-
version,
177-
)
178-
.map_err(|e| RegistryClientError::DecodeError {
179-
error: format!("Failed to decode NodeOperatorRecord: {}", e),
180-
})?
181-
else {
182-
return Ok(None);
183-
};
184-
185-
let data_center_key = make_data_center_record_key(node_operator_record.dc_id.as_str());
186-
let Some(data_center_record) = get_decoded_value::<DataCenterRecord>(
187-
&*self.registry_client,
188-
data_center_key.as_str(),
189-
version,
190-
)
191-
.map_err(|e| RegistryClientError::DecodeError {
192-
error: format!("Failed to decode DataCenterRecord: {}", e),
193-
})?
194-
else {
195-
return Ok(None);
196-
};
197-
198-
let node_provider_id: PrincipalId = node_operator_record
199-
.node_provider_principal_id
200-
.try_into()
201-
.expect("Failed to parse PrincipalId");
202-
203-
Ok(Some(NodeOperatorData {
204-
node_provider_id,
205-
dc_id: node_operator_record.dc_id,
206-
region: data_center_record.region.clone(),
207-
}))
208-
}
209222
}
210223

211224
#[cfg(test)]

rs/node_rewards/canister/src/registry_querier/tests.rs

Lines changed: 0 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -307,41 +307,3 @@ fn test_node_re_registered_after_deletion() {
307307
}
308308
}
309309
}
310-
311-
#[test]
312-
fn test_node_operator_data_returns_expected_data() {
313-
let client = client_for_tests();
314-
315-
let version = 39667;
316-
let no_2_id = PrincipalId::new_user_test_id(30);
317-
let data = client
318-
.node_operator_data(no_2_id, version.into())
319-
.unwrap()
320-
.unwrap();
321-
322-
assert_eq!(data.node_provider_id, PrincipalId::new_user_test_id(20));
323-
assert_eq!(data.dc_id, "y");
324-
assert_eq!(data.region, "A");
325-
326-
let version = 39675;
327-
let no_1_id = PrincipalId::new_user_test_id(10);
328-
let data = client
329-
.node_operator_data(no_1_id, version.into())
330-
.unwrap()
331-
.unwrap();
332-
333-
assert_eq!(data.node_provider_id, PrincipalId::new_user_test_id(20));
334-
assert_eq!(data.dc_id, "x");
335-
assert_eq!(data.region, "A");
336-
337-
let not_yet_added_no_version = 39652;
338-
let data = client
339-
.node_operator_data(no_1_id, not_yet_added_no_version.into())
340-
.unwrap();
341-
342-
assert!(
343-
data.is_none(),
344-
"Data should not exist for version {} because Operator was not yet added",
345-
not_yet_added_no_version
346-
);
347-
}

0 commit comments

Comments
 (0)