diff --git a/crypto/CMakeLists.txt b/crypto/CMakeLists.txt index 1051f0d18..fb42810f4 100644 --- a/crypto/CMakeLists.txt +++ b/crypto/CMakeLists.txt @@ -10,7 +10,6 @@ set(TON_CRYPTO_CORE_SOURCE openssl/residue.cpp openssl/rand.cpp vm/boc.cpp - vm/boc-compression.cpp vm/large-boc-serializer.cpp tl/tlblib.cpp @@ -82,6 +81,7 @@ set(TON_CRYPTO_CORE_SOURCE ellcurve/p256.cpp) set(TON_CRYPTO_SOURCE + vm/boc-compression.cpp vm/stack.cpp vm/atom.cpp vm/continuation.cpp diff --git a/crypto/vm/boc-compression.cpp b/crypto/vm/boc-compression.cpp index 41095efde..5ebe27cfc 100644 --- a/crypto/vm/boc-compression.cpp +++ b/crypto/vm/boc-compression.cpp @@ -18,6 +18,7 @@ */ #include #include +#include #include "td/utils/Slice-decl.h" #include "td/utils/lz4.h" @@ -27,6 +28,9 @@ #include "vm/cellslice.h" #include "boc-compression.h" +#include "common/refint.h" +#include "crypto/block/block-auto.h" +#include "crypto/block/block-parse.h" namespace vm { @@ -77,7 +81,46 @@ inline td::Result read_uint(td::BitSlice& bs, int bits) { return result; } +// Decode DepthBalanceInfo and extract grams using TLB methods +td::RefInt256 extract_balance_from_depth_balance_info(vm::CellSlice& cs) { + // Check hashmap label is empty ('00') + if (cs.size() < 2 || cs.fetch_ulong(2) != 0) { + return td::RefInt256{}; + } + + int split_depth; + Ref balance_cs_ref; + if (!block::gen::t_DepthBalanceInfo.unpack_depth_balance(cs, split_depth, balance_cs_ref)) { + return td::RefInt256{}; + } + if (split_depth != 0) { + return td::RefInt256{}; + } + if (!cs.empty()) { + return td::RefInt256{}; + } + auto balance_cs = balance_cs_ref.write(); + auto res = block::tlb::t_Grams.as_integer_skip(balance_cs); + if (balance_cs.size() != 1 || balance_cs.fetch_ulong(1) != 0) { + return td::RefInt256{}; + } + return res; +} + +// Process ShardAccounts vertex and compute balance difference (right - left) +td::RefInt256 process_shard_accounts_vertex(vm::CellSlice& cs_left, vm::CellSlice& cs_right) { + auto balance_left = extract_balance_from_depth_balance_info(cs_left); + auto balance_right = extract_balance_from_depth_balance_info(cs_right); + if (balance_left.not_null() && balance_right.not_null()) { + td::RefInt256 diff = balance_right; + diff -= balance_left; + return diff; + } + return td::RefInt256{}; +} + td::Result boc_compress_improved_structure_lz4(const std::vector>& boc_roots) { + const bool kMURemoveSubtreeSums = true; // Input validation if (boc_roots.empty()) { return td::Status::Error("No root cells were provided for serialization"); @@ -99,7 +142,12 @@ td::Result boc_compress_improved_structure_lz4(const std::vecto size_t total_size_estimate = 0; // Build graph representation using recursive lambda - const auto build_graph = [&](auto&& self, td::Ref cell) -> td::Result { + // When traversing RIGHT side of a MerkleUpdate, pass corresponding left_cell and non-null sum_diff_out + const auto build_graph = [&](auto&& self, + td::Ref cell, + td::Ref left_cell = td::Ref(), + bool under_mu_right = false, + td::RefInt256* sum_diff_out = nullptr) -> td::Result { if (cell.is_null()) { return td::Status::Error("Error while importing a cell during serialization: cell is null"); } @@ -139,9 +187,44 @@ td::Result boc_compress_improved_structure_lz4(const std::vecto total_size_estimate += cell_bitslice.size(); // Process cell references - for (int i = 0; i < cell_slice.size_refs(); ++i) { - TRY_RESULT(child_id, self(self, cell_slice.prefetch_ref(i))); - boc_graph[current_cell_id][i] = child_id; + if (kMURemoveSubtreeSums && cell_slice.special_type() == vm::CellTraits::SpecialType::MerkleUpdate) { + // Left branch: traverse normally + TRY_RESULT(child_left_id, self(self, cell_slice.prefetch_ref(0))); + boc_graph[current_cell_id][0] = child_left_id; + // Right branch: traverse paired with left and compute diffs inline + TRY_RESULT(child_right_id, self(self, + cell_slice.prefetch_ref(1), + cell_slice.prefetch_ref(0), + true)); + boc_graph[current_cell_id][1] = child_right_id; + } else if (under_mu_right && left_cell.not_null()) { + // Inline computation for RIGHT subtree nodes under MerkleUpdate + vm::CellSlice cs_left(NoVm(), left_cell); + td::RefInt256 sum_child_diff = td::make_refint(0); + // Recurse children first + for (int i = 0; i < cell_slice.size_refs(); ++i) { + TRY_RESULT(child_id, self(self, + cell_slice.prefetch_ref(i), + cs_left.prefetch_ref(i), + true, + &sum_child_diff)); + boc_graph[current_cell_id][i] = child_id; + } + + // Compute this vertex diff and check skippable condition + td::RefInt256 vertex_diff = process_shard_accounts_vertex(cs_left, cell_slice); + if (!is_special && vertex_diff.not_null() && sum_child_diff.not_null() && cmp(sum_child_diff, vertex_diff) == 0) { + cell_data[current_cell_id] = td::BitSlice(); + prunned_branch_level[current_cell_id] = 9; + } + if (sum_diff_out && vertex_diff.not_null()) { + *sum_diff_out += vertex_diff; + } + } else { + for (int i = 0; i < cell_slice.size_refs(); ++i) { + TRY_RESULT(child_id, self(self, cell_slice.prefetch_ref(i))); + boc_graph[current_cell_id][i] = child_id; + } } return current_cell_id; @@ -241,11 +324,11 @@ td::Result boc_compress_improved_structure_lz4(const std::vecto // Store cell types and sizes for (int i = 0; i < node_count; ++i) { size_t node = topo_order[i]; - size_t currrent_cell_type = bool(cell_type[node]) + prunned_branch_level[node]; - append_uint(result, currrent_cell_type, 4); + size_t current_cell_type = bool(cell_type[node]) + prunned_branch_level[node]; + append_uint(result, current_cell_type, 4); append_uint(result, refs_cnt[node], 4); - if (cell_type[node] != 1) { + if (cell_type[node] != 1 && current_cell_type != 9) { if (is_data_small[node]) { append_uint(result, 1, 1); append_uint(result, cell_data[node].size(), 7); @@ -269,6 +352,9 @@ td::Result boc_compress_improved_structure_lz4(const std::vecto // Store cell data for (size_t node : topo_order) { + if (prunned_branch_level[node] == 9) { + continue; + } if (cell_type[node] != 1 && !is_data_small[node]) { continue; } @@ -308,6 +394,9 @@ td::Result boc_compress_improved_structure_lz4(const std::vecto // Store remaining cell data for (size_t node : topo_order) { + if (prunned_branch_level[node] == 9) { + continue; + } if (cell_type[node] == 1 || is_data_small[node]) { size_t prefix_size = cell_data[node].size() % 8; result.append(cell_data[node].subslice(prefix_size, cell_data[node].size() - prefix_size)); @@ -404,7 +493,7 @@ td::Result>> boc_decompress_improved_structure_lz4 // Initialize data structures std::vector cell_data_length(node_count), is_data_small(node_count); - std::vector is_special(node_count), cell_refs_cnt(node_count); + std::vector is_special(node_count), cell_refs_cnt(node_count), is_depth_balance(node_count); std::vector prunned_branch_level(node_count, 0); std::vector cell_builders(node_count); @@ -418,7 +507,8 @@ td::Result>> boc_decompress_improved_structure_lz4 } size_t cell_type = bit_reader.bits().get_uint(4); - is_special[i] = bool(cell_type); + is_special[i] = (cell_type == 9 ? false : bool(cell_type)); + is_depth_balance[i] = cell_type == 9; if (is_special[i]) { prunned_branch_level[i] = cell_type - 1; } @@ -429,8 +519,9 @@ td::Result>> boc_decompress_improved_structure_lz4 if (cell_refs_cnt[i] > 4) { return td::Status::Error("BOC decompression failed: invalid cell refs count"); } - - if (prunned_branch_level[i]) { + if (is_depth_balance[i]) { + cell_data_length[i] = 0; + } else if (prunned_branch_level[i]) { size_t coef = std::bitset<4>(prunned_branch_level[i]).count(); cell_data_length[i] = (256 + 16) * coef; } else { @@ -470,6 +561,9 @@ td::Result>> boc_decompress_improved_structure_lz4 // Read initial cell data for (int i = 0; i < node_count; ++i) { + if (is_depth_balance[i]) { + continue; + } if (prunned_branch_level[i]) { cell_builders[i].store_long((1 << 8) + prunned_branch_level[i], 16); } @@ -538,6 +632,9 @@ td::Result>> boc_decompress_improved_structure_lz4 // Read remaining cell data for (int i = 0; i < node_count; ++i) { + if (is_depth_balance[i]) { + continue; + } size_t padding_bits = 0; if (!prunned_branch_level[i] && !is_data_small[i]) { while (bit_reader.size() > 0 && bit_reader.bits()[0] == 0) { @@ -559,33 +656,143 @@ td::Result>> boc_decompress_improved_structure_lz4 bit_reader.advance(remaining_data_bits); } - // Build cell tree - std::vector> nodes(node_count); - for (int i = node_count - 1; i >= 0; --i) { - try { - for (int child_index = 0; child_index < cell_refs_cnt[i]; ++child_index) { - size_t child = boc_graph[i][child_index]; - cell_builders[i].store_ref(nodes[child]); + // Build cell tree + std::vector> nodes(node_count); + + // Helper: write ShardAccounts augmentation (DepthBalanceInfo with grams) into builder + auto write_depth_balance_grams = [&](vm::CellBuilder& cb, const td::RefInt256& grams) -> bool { + if (!cb.store_zeroes_bool(7)) { // empty HmLabel and split_depth + return false; + } + if (!block::tlb::t_CurrencyCollection.pack_special(cb, grams, td::Ref())) { + return false; } + return true; + }; + + // Helper: detect MerkleUpdate (is_special AND first byte == 0x04) without finalizing + auto is_merkle_update_node = [&](size_t idx) -> bool { + if (!is_special[idx]) { + return false; + } + // Need at least one full byte in data to read the tag + if (cell_builders[idx].get_bits() < 8) { + return false; + } + unsigned first_byte = cell_builders[idx].get_data()[0]; + return first_byte == 0x04; + }; + + // Helper: finalize a node by storing refs and finalizing the builder + auto finalize_node = [&](size_t idx) -> td::Status { try { - nodes[i] = cell_builders[i].finalize(is_special[i]); - } catch (vm::CellBuilder::CellWriteError& e) { - return td::Status::Error("BOC decompression failed: write error while finalizing cell."); + for (int j = 0; j < cell_refs_cnt[idx]; ++j) { + cell_builders[idx].store_ref(nodes[boc_graph[idx][j]]); + } + try { + nodes[idx] = cell_builders[idx].finalize(is_special[idx]); + } catch (vm::CellBuilder::CellWriteError& e) { + return td::Status::Error(PSTRING() << "BOC decompression failed: failed to finalize node (CellWriteError)"); + } + } catch (vm::VmError& e) { + return td::Status::Error(PSTRING() << "BOC decompression failed: failed to finalize node (VmError)"); } - } catch (vm::VmError& e) { - return td::Status::Error("BOC decompression failed: VM error during cell construction"); - } - } - - std::vector> root_nodes; - root_nodes.reserve(root_count); - for (size_t index : root_indexes) { - root_nodes.push_back(nodes[index]); + return td::Status::OK(); + }; + + // Recursively build right subtree under MerkleUpdate, pairing with left subtree, computing sum diffs. + // Sum is accumulated into sum_diff_out (if non-null), similar to compression flow. + std::function build_right_under_mu = + [&](size_t right_idx, size_t left_idx, td::RefInt256* sum_diff_out) -> td::Status { + if (nodes[right_idx].not_null()) { + if (left_idx != std::numeric_limits::max() && sum_diff_out) { + vm::CellSlice cs_left(NoVm(), nodes[left_idx]); + vm::CellSlice cs_right(NoVm(), nodes[right_idx]); + td::RefInt256 vertex_diff = process_shard_accounts_vertex(cs_left, cs_right); + if (vertex_diff.not_null()) { + *sum_diff_out += vertex_diff; + } + } + return td::Status::OK(); + } + td::RefInt256 cur_right_left_diff; + // Build children first + td::RefInt256 sum_child_diff = td::make_refint(0); + for (int j = 0; j < cell_refs_cnt[right_idx]; ++j) { + size_t right_child = boc_graph[right_idx][j]; + size_t left_child = (left_idx != std::numeric_limits::max() && j < cell_refs_cnt[left_idx]) + ? boc_graph[left_idx][j] + : std::numeric_limits::max(); + TRY_STATUS(build_right_under_mu(right_child, left_child, &sum_child_diff)); + } + // If this vertex was depth-balance-compressed, reconstruct its data from left + children sum + if (is_depth_balance[right_idx]) { + vm::CellSlice cs_left(NoVm(), nodes[left_idx]); + td::RefInt256 left_grams = extract_balance_from_depth_balance_info(cs_left); + if (left_grams.is_null()) { + return td::Status::Error("BOC decompression failed: depth-balance left vertex has no grams"); + } + td::RefInt256 expected_right_grams = left_grams; + expected_right_grams += sum_child_diff; + if (!write_depth_balance_grams(cell_builders[right_idx], expected_right_grams)) { + return td::Status::Error("BOC decompression failed: failed to write depth-balance grams"); + } + cur_right_left_diff = sum_child_diff; + } + + // Store children refs and finalize this right node + TRY_STATUS(finalize_node(right_idx)); + + // Compute this vertex diff (right - left) to propagate upward + if (cur_right_left_diff.is_null() &&left_idx != std::numeric_limits::max()) { + vm::CellSlice cs_left(NoVm(), nodes[left_idx]); + vm::CellSlice cs_right(NoVm(), nodes[right_idx]); + cur_right_left_diff = process_shard_accounts_vertex(cs_left, cs_right); + } + if (sum_diff_out && cur_right_left_diff.not_null()) { + *sum_diff_out += cur_right_left_diff; + } + return td::Status::OK(); + }; + + // General recursive build that handles MerkleUpdate by pairing left/right subtrees + std::function build_node = [&](size_t idx) -> td::Status { + if (nodes[idx].not_null()) { + return td::Status::OK(); + } + // If this node is a MerkleUpdate, build left subtree normally first, then right subtree paired with left + if (is_merkle_update_node(idx)) { + size_t left_idx = boc_graph[idx][0]; + size_t right_idx = boc_graph[idx][1]; + TRY_STATUS(build_node(left_idx)); + TRY_STATUS(build_right_under_mu(right_idx, left_idx, nullptr)); + TRY_STATUS(finalize_node(idx)); + return td::Status::OK(); + } else { + // Default: build children normally then finalize + for (int j = 0; j < cell_refs_cnt[idx]; ++j) { + TRY_STATUS(build_node(boc_graph[idx][j])); + } + } + + TRY_STATUS(finalize_node(idx)); + return td::Status::OK(); + }; + + // Build from roots using DFS + for (size_t index : root_indexes) { + TRY_STATUS(build_node(index)); + } + + std::vector> root_nodes; + root_nodes.reserve(root_count); + for (size_t index : root_indexes) { + root_nodes.push_back(nodes[index]); + } + + return root_nodes; } - return root_nodes; -} - td::Result boc_compress(const std::vector>& boc_roots, CompressionAlgorithm algo) { // Check for empty input if (boc_roots.empty()) { diff --git a/validator-session/candidate-serializer.cpp b/validator-session/candidate-serializer.cpp index 94dfe2048..260686729 100644 --- a/validator-session/candidate-serializer.cpp +++ b/validator-session/candidate-serializer.cpp @@ -29,10 +29,9 @@ td::Result serialize_candidate(const tl_object_ptrdata_, block->collated_data_, decompressed_size)) - return create_serialize_tl_object( - 0, block->src_, block->round_, block->root_hash_, (int)decompressed_size, std::move(compressed)); + TRY_RESULT(compressed, compress_candidate_data(block->data_, block->collated_data_)) + return create_serialize_tl_object( + 0, block->src_, block->round_, block->root_hash_, std::move(compressed)); } td::Result> deserialize_candidate(td::Slice data, @@ -73,8 +72,7 @@ td::Result> deserialize_candi return res; } -td::Result compress_candidate_data(td::Slice block, td::Slice collated_data, - size_t& decompressed_size) { +td::Result compress_candidate_data(td::Slice block, td::Slice collated_data) { vm::BagOfCells boc1, boc2; TRY_STATUS(boc1.deserialize(block)); if (boc1.get_root_count() != 1) { @@ -85,9 +83,7 @@ td::Result compress_candidate_data(td::Slice block, td::Slice c for (int i = 0; i < boc2.get_root_count(); ++i) { roots.push_back(boc2.get_root_cell(i)); } - TRY_RESULT(data, vm::std_boc_serialize_multi(std::move(roots), 2)); - decompressed_size = data.size(); - td::BufferSlice compressed = td::lz4_compress(data); + TRY_RESULT(compressed, vm::boc_compress(roots, vm::CompressionAlgorithm::ImprovedStructureLZ4)); LOG(DEBUG) << "Compressing block candidate: " << block.size() + collated_data.size() << " -> " << compressed.size(); return compressed; } diff --git a/validator-session/candidate-serializer.h b/validator-session/candidate-serializer.h index cf688a692..9151d1787 100644 --- a/validator-session/candidate-serializer.h +++ b/validator-session/candidate-serializer.h @@ -27,8 +27,7 @@ td::Result> deserialize_candi int max_decompressed_data_size, int proto_version); -td::Result compress_candidate_data(td::Slice block, td::Slice collated_data, - size_t& decompressed_size); +td::Result compress_candidate_data(td::Slice block, td::Slice collated_data); td::Result> decompress_candidate_data(td::Slice compressed, bool improved_compression, int decompressed_size, diff --git a/validator/collator-node/utils.cpp b/validator/collator-node/utils.cpp index 41b6dc432..62caa499e 100644 --- a/validator/collator-node/utils.cpp +++ b/validator/collator-node/utils.cpp @@ -29,12 +29,10 @@ tl_object_ptr serialize_candidate(const BlockCa PublicKey{pubkeys::Ed25519{block.pubkey.as_bits256()}}.tl(), create_tl_block_id(block.id), block.data.clone(), block.collated_data.clone()); } - size_t decompressed_size; td::BufferSlice compressed = - validatorsession::compress_candidate_data(block.data, block.collated_data, decompressed_size).move_as_ok(); - return create_tl_object( - 0, PublicKey{pubkeys::Ed25519{block.pubkey.as_bits256()}}.tl(), create_tl_block_id(block.id), - (int)decompressed_size, std::move(compressed)); + validatorsession::compress_candidate_data(block.data, block.collated_data).move_as_ok(); + return create_tl_object( + 0, PublicKey{pubkeys::Ed25519{block.pubkey.as_bits256()}}.tl(), create_tl_block_id(block.id), std::move(compressed)); } td::Result deserialize_candidate(tl_object_ptr f, diff --git a/validator/full-node-serializer.cpp b/validator/full-node-serializer.cpp index adce0fe22..472463f4e 100644 --- a/validator/full-node-serializer.cpp +++ b/validator/full-node-serializer.cpp @@ -41,16 +41,13 @@ td::Result serialize_block_broadcast(const BlockBroadcast& broa TRY_RESULT(proof_root, vm::std_boc_deserialize(broadcast.proof)); TRY_RESULT(data_root, vm::std_boc_deserialize(broadcast.data)); - TRY_RESULT(boc, vm::std_boc_serialize_multi({proof_root, data_root}, 2)); - td::BufferSlice data = - create_serialize_tl_object(std::move(sigs), std::move(boc)); - td::BufferSlice compressed = td::lz4_compress(data); + TRY_RESULT(compressed_boc, vm::boc_compress({proof_root, data_root}, vm::CompressionAlgorithm::ImprovedStructureLZ4)); VLOG(FULL_NODE_DEBUG) << "Compressing block broadcast: " << broadcast.data.size() + broadcast.proof.size() + broadcast.signatures.size() * 96 << " -> " - << compressed.size(); - return create_serialize_tl_object( - create_tl_block_id(broadcast.block_id), broadcast.catchain_seqno, broadcast.validator_set_hash, 0, - std::move(compressed)); + << compressed_boc.size() + broadcast.signatures.size() * 96; + return create_serialize_tl_object( + create_tl_block_id(broadcast.block_id), broadcast.catchain_seqno, broadcast.validator_set_hash, std::move(sigs), + 0, std::move(compressed_boc)); } static td::Result deserialize_block_broadcast(ton_api::tonNode_blockBroadcast& f) { @@ -135,10 +132,10 @@ td::Result serialize_block_full(const BlockIdExt& id, td::Slice } TRY_RESULT(proof_root, vm::std_boc_deserialize(proof)); TRY_RESULT(data_root, vm::std_boc_deserialize(data)); - TRY_RESULT(boc, vm::std_boc_serialize_multi({proof_root, data_root}, 2)); - td::BufferSlice compressed = td::lz4_compress(boc); + TRY_RESULT(compressed, vm::boc_compress({proof_root, data_root}, vm::CompressionAlgorithm::ImprovedStructureLZ4)); + VLOG(FULL_NODE_DEBUG) << "Compressing block full: " << data.size() + proof.size() << " -> " << compressed.size(); - return create_serialize_tl_object(create_tl_block_id(id), 0, + return create_serialize_tl_object(create_tl_block_id(id), 0, std::move(compressed), is_proof_link); } @@ -207,10 +204,9 @@ td::Result serialize_block_candidate_broadcast(BlockIdExt block create_tl_object(Bits256::zero(), td::BufferSlice()), td::BufferSlice(data)); } TRY_RESULT(root, vm::std_boc_deserialize(data)); - TRY_RESULT(data_new, vm::std_boc_serialize(root, 2)); - td::BufferSlice compressed = td::lz4_compress(data_new); + TRY_RESULT(compressed, vm::boc_compress({root}, vm::CompressionAlgorithm::ImprovedStructureLZ4)); VLOG(FULL_NODE_DEBUG) << "Compressing block candidate broadcast: " << data.size() << " -> " << compressed.size(); - return create_serialize_tl_object( + return create_serialize_tl_object( create_tl_block_id(block_id), cc_seqno, validator_set_hash, create_tl_object(Bits256::zero(), td::BufferSlice()), 0, std::move(compressed)); }