@@ -53,6 +53,27 @@ enum class QualityLevel
53
53
NONE,
54
54
};
55
55
56
+ /* * Information about a transaction inside TxGraphImpl::Trim. */
57
+ struct TrimTxData
58
+ {
59
+ // Fields populated by Cluster::AppendTrimData(). These are immutable after TrimTxData
60
+ // construction.
61
+ /* * Chunk feerate for this transaction. */
62
+ FeePerWeight m_chunk_feerate;
63
+ /* * GraphIndex of the transaction. */
64
+ TxGraph::GraphIndex m_index;
65
+ /* * Size of the transaction. */
66
+ uint32_t m_tx_size;
67
+
68
+ // Fields only used internally by TxGraphImpl::Trim():
69
+ /* * Number of unmet dependencies this transaction has. -1 if the transaction is included. */
70
+ uint32_t m_deps_left;
71
+ /* * Number of dependencies that apply to this transaction as parent. */
72
+ uint32_t m_children_count;
73
+ /* * Where in deps those dependencies begin. */
74
+ uint32_t m_children_offset;
75
+ };
76
+
56
77
/* * A grouping of connected transactions inside a TxGraphImpl::ClusterSet. */
57
78
class Cluster
58
79
{
@@ -152,6 +173,10 @@ class Cluster
152
173
void Relinearize (TxGraphImpl& graph, uint64_t max_iters) noexcept ;
153
174
/* * For every chunk in the cluster, append its FeeFrac to ret. */
154
175
void AppendChunkFeerates (std::vector<FeeFrac>& ret) const noexcept ;
176
+ /* * Add a TrimTxData entry (filling m_chunk_feerate, m_index, m_tx_size) for every
177
+ * transaction in the Cluster to ret. Implicit dependencies between consecutive transactions
178
+ * in the linearization are added to deps. Return the Cluster's total transaction size. */
179
+ uint64_t AppendTrimData (std::vector<TrimTxData>& ret, std::vector<std::pair<GraphIndex, GraphIndex>>& deps) const noexcept ;
155
180
156
181
// Functions that implement the Cluster-specific side of public TxGraph functions.
157
182
@@ -563,6 +588,7 @@ class TxGraphImpl final : public TxGraph
563
588
std::strong_ordering CompareMainOrder (const Ref& a, const Ref& b) noexcept final ;
564
589
GraphIndex CountDistinctClusters (std::span<const Ref* const > refs, bool main_only = false ) noexcept final ;
565
590
std::pair<std::vector<FeeFrac>, std::vector<FeeFrac>> GetMainStagingDiagrams () noexcept final ;
591
+ std::vector<Ref*> Trim () noexcept final ;
566
592
567
593
std::unique_ptr<BlockBuilder> GetBlockBuilder () noexcept final ;
568
594
std::pair<std::vector<Ref*>, FeePerWeight> GetWorstMainChunk () noexcept final ;
@@ -875,6 +901,37 @@ void Cluster::AppendChunkFeerates(std::vector<FeeFrac>& ret) const noexcept
875
901
ret.insert (ret.end (), chunk_feerates.begin (), chunk_feerates.end ());
876
902
}
877
903
904
+ uint64_t Cluster::AppendTrimData (std::vector<TrimTxData>& ret, std::vector<std::pair<GraphIndex, GraphIndex>>& deps) const noexcept
905
+ {
906
+ const LinearizationChunking linchunking (m_depgraph, m_linearization);
907
+ LinearizationIndex pos{0 };
908
+ uint64_t size{0 };
909
+ auto prev_index = GraphIndex (-1 );
910
+ // Iterate over the chunks of this cluster's linearization.
911
+ for (unsigned i = 0 ; i < linchunking.NumChunksLeft (); ++i) {
912
+ const auto & [chunk, chunk_feerate] = linchunking.GetChunk (i);
913
+ // Iterate over the transactions of that chunk, in linearization order.
914
+ auto chunk_tx_count = chunk.Count ();
915
+ for (unsigned j = 0 ; j < chunk_tx_count; ++j) {
916
+ auto cluster_idx = m_linearization[pos];
917
+ // The transaction must appear in the chunk.
918
+ Assume (chunk[cluster_idx]);
919
+ // Construct a new element in ret.
920
+ auto & entry = ret.emplace_back ();
921
+ entry.m_chunk_feerate = FeePerWeight::FromFeeFrac (chunk_feerate);
922
+ entry.m_index = m_mapping[cluster_idx];
923
+ // If this is not the first transaction of the cluster linearization, it has an
924
+ // implicit dependency on its predecessor.
925
+ if (pos != 0 ) deps.emplace_back (prev_index, entry.m_index );
926
+ prev_index = entry.m_index ;
927
+ entry.m_tx_size = m_depgraph.FeeRate (cluster_idx).size ;
928
+ size += entry.m_tx_size ;
929
+ ++pos;
930
+ }
931
+ }
932
+ return size;
933
+ }
934
+
878
935
bool Cluster::Split (TxGraphImpl& graph) noexcept
879
936
{
880
937
// This function can only be called when the Cluster needs splitting.
@@ -2525,6 +2582,195 @@ std::pair<std::vector<TxGraph::Ref*>, FeePerWeight> TxGraphImpl::GetWorstMainChu
2525
2582
return ret;
2526
2583
}
2527
2584
2585
+ std::vector<TxGraph::Ref*> TxGraphImpl::Trim () noexcept
2586
+ {
2587
+ int level = GetTopLevel ();
2588
+ Assume (m_main_chunkindex_observers == 0 || level != 0 );
2589
+ std::vector<TxGraph::Ref*> ret;
2590
+
2591
+ // Compute the groups of to-be-merged Clusters (which also applies all removals, and splits).
2592
+ auto & clusterset = GetClusterSet (level);
2593
+ if (clusterset.m_oversized == false ) return ret;
2594
+ GroupClusters (level);
2595
+ Assume (clusterset.m_group_data .has_value ());
2596
+ // Nothing to do if not oversized.
2597
+ Assume (clusterset.m_oversized .has_value ());
2598
+ if (clusterset.m_oversized == false ) return ret;
2599
+
2600
+ // In this function, would-be clusters (as precomputed in m_group_data by GroupClusters) are
2601
+ // trimmed by removing transactions in them such that the resulting clusters satisfy the size
2602
+ // and count limits.
2603
+ //
2604
+ // It works by defining for each would-be cluster a rudimentary linearization: at every point
2605
+ // the highest-chunk-feerate remaining transaction is picked among those with no unmet
2606
+ // dependencies. "Dependency" here means either a to-be-added dependency (m_deps_to_add), or
2607
+ // an implicit dependency added between any two consecutive transaction in their current
2608
+ // cluster linearization. So it can be seen as a "merge sort" of the chunks of the clusters,
2609
+ // but respecting the dependencies being added.
2610
+ //
2611
+ // This rudimentary linearization is computed lazily, by putting all eligible (no unmet
2612
+ // dependencies) transactions in a heap, and popping the highest-feerate one from it. This
2613
+ // continues as long as the number or size of all picked transactions together does not exceed
2614
+ // the graph's configured cluster limits. All remaining transactions are then marked as
2615
+ // removed.
2616
+ //
2617
+ // A next invocation of GroupClusters (after applying the removals) will compute the new
2618
+ // resulting clusters, and none of them will violate the limits.
2619
+
2620
+ /* * All dependencies (both to be added ones, and implicit ones between consecutive transactions
2621
+ * in existing cluster linearizations). */
2622
+ std::vector<std::pair<GraphIndex, GraphIndex>> deps;
2623
+ /* * Information about all transactions involved in a Cluster group to be trimmed, sorted by
2624
+ * GraphIndex. It contains entries both for transactions that have already been included,
2625
+ * and ones that have not yet been. */
2626
+ std::vector<TrimTxData> trim_data;
2627
+ /* * Iterators into trim_data, treated as a max heap according to cmp_fn below. Each entry is
2628
+ * a transaction that has not yet been included yet, but all its ancestors have. */
2629
+ std::vector<std::vector<TrimTxData>::iterator> trim_heap;
2630
+
2631
+ /* * Function to define the ordering of trim_heap. */
2632
+ static constexpr auto cmp_fn = [](auto a, auto b) noexcept {
2633
+ // Sort by increasing chunk feerate, and then by decreasing size.
2634
+ // We do not need to sort by cluster or within clusters, because due to the implicit
2635
+ // dependency between consecutive linearization elements, no two transactions from the
2636
+ // same Cluster will ever simultaneously be in the heap.
2637
+ return a->m_chunk_feerate < b->m_chunk_feerate ;
2638
+ };
2639
+
2640
+ /* * Get iterator to TrimTxData entry for a given index. */
2641
+ auto locate_fn = [&](GraphIndex index) noexcept {
2642
+ auto it = std::lower_bound (trim_data.begin (), trim_data.end (), index, [](TrimTxData& elem, GraphIndex idx) noexcept {
2643
+ return elem.m_index < idx;
2644
+ });
2645
+ Assume (it != trim_data.end () && it->m_index == index);
2646
+ return it;
2647
+ };
2648
+
2649
+ // For each group of to-be-merged Clusters.
2650
+ for (const auto & group_data : clusterset.m_group_data ->m_groups ) {
2651
+ trim_data.clear ();
2652
+ trim_heap.clear ();
2653
+ deps.clear ();
2654
+
2655
+ // Gather trim data and implicit dependency data from all involved Clusters.
2656
+ auto cluster_span = std::span{clusterset.m_group_data ->m_group_clusters }
2657
+ .subspan (group_data.m_cluster_offset , group_data.m_cluster_count );
2658
+ uint64_t size{0 };
2659
+ for (Cluster* cluster : cluster_span) {
2660
+ size += cluster->AppendTrimData (trim_data, deps);
2661
+ }
2662
+ // If this group of Clusters does not violate any limits, continue to the next group.
2663
+ if (trim_data.size () <= m_max_cluster_count && size <= m_max_cluster_size) continue ;
2664
+ // Sort the trim data by GraphIndex. In what follows, we will treat this sorted vector as
2665
+ // a map from GraphIndex to TrimTxData via locate_fn, and its ordering will not change
2666
+ // anymore.
2667
+ std::sort (trim_data.begin (), trim_data.end (), [](auto & a, auto & b) noexcept { return a.m_index < b.m_index ; });
2668
+
2669
+ // Add the explicitly added dependencies to deps.
2670
+ deps.insert (deps.end (),
2671
+ clusterset.m_deps_to_add .begin () + group_data.m_deps_offset ,
2672
+ clusterset.m_deps_to_add .begin () + group_data.m_deps_offset + group_data.m_deps_count );
2673
+
2674
+ // Sort deps by child transaction GraphIndex.
2675
+ std::sort (deps.begin (), deps.end (), [](auto & a, auto & b) noexcept { return a.second < b.second ; });
2676
+ // Fill m_deps_left in trim_data, and initially populate trim_heap. Because of the sort
2677
+ // above, all dependencies involving the same child are grouped together, so a single
2678
+ // linear scan suffices.
2679
+ auto deps_it = deps.begin ();
2680
+ for (auto trim_it = trim_data.begin (); trim_it != trim_data.end (); ++trim_it) {
2681
+ trim_it->m_deps_left = 0 ;
2682
+ while (deps_it != deps.end () && deps_it->second == trim_it->m_index ) {
2683
+ ++trim_it->m_deps_left ;
2684
+ ++deps_it;
2685
+ }
2686
+ // If this transaction has no unmet dependencies, and is not oversized, add it to the
2687
+ // heap (just append for now, the heapification happens below).
2688
+ if (trim_it->m_deps_left == 0 && trim_it->m_tx_size <= m_max_cluster_size) {
2689
+ trim_heap.push_back (trim_it);
2690
+ }
2691
+ }
2692
+ Assume (deps_it == deps.end ());
2693
+
2694
+ // Sort deps by parent transaction GraphIndex. The order will not be changed anymore after
2695
+ // this.
2696
+ std::sort (deps.begin (), deps.end (), [](auto & a, auto & b) noexcept { return a.first < b.first ; });
2697
+ // Fill m_children_offset and m_children_count in trim_data. Because of the sort above, all
2698
+ // dependencies involving the same parent are grouped together, so a single linear scan
2699
+ // suffices.
2700
+ deps_it = deps.begin ();
2701
+ for (auto & trim_entry : trim_data) {
2702
+ trim_entry.m_children_count = 0 ;
2703
+ trim_entry.m_children_offset = deps_it - deps.begin ();
2704
+ while (deps_it != deps.end () && deps_it->first == trim_entry.m_index ) {
2705
+ ++trim_entry.m_children_count ;
2706
+ ++deps_it;
2707
+ }
2708
+ }
2709
+ Assume (deps_it == deps.end ());
2710
+
2711
+ // Build a heap of all transactions with 0 unmet dependencies.
2712
+ std::make_heap (trim_heap.begin (), trim_heap.end (), cmp_fn);
2713
+
2714
+ // Iterate over to-be-included transactions, and convert them to included transactions, or
2715
+ // decide to stop if doing so would violate resource limits.
2716
+ //
2717
+ // It is possible that the heap empties without ever hitting either cluster limit, in case
2718
+ // the implied graph (to be added dependencies plus implicit dependency between each
2719
+ // original transaction and its predecessor in the linearization it came from) contains
2720
+ // cycles. Such cycles will be removed entirely, because each of the transactions in the
2721
+ // cycle permanently have unmet dependencies. However, this cannot occur in real scenarios
2722
+ // where Trim() is called to deal with reorganizations that would violate cluster limits,
2723
+ // as all added dependencies are in the same direction (from old mempool transactions to
2724
+ // new from-block transactions); cycles require dependencies in both directions to be
2725
+ // added.
2726
+ uint32_t total_count{0 };
2727
+ uint64_t total_size{0 };
2728
+ while (!trim_heap.empty ()) {
2729
+ // Move the best remaining transaction to the end of trim_heap.
2730
+ std::pop_heap (trim_heap.begin (), trim_heap.end (), cmp_fn);
2731
+ // Pop it, and find its TrimTxData.
2732
+ auto & entry = *trim_heap.back ();
2733
+ trim_heap.pop_back ();
2734
+
2735
+ // Compute resource counts.
2736
+ total_count += 1 ;
2737
+ total_size += entry.m_tx_size ;
2738
+ // Stop if this would violate any limit.
2739
+ if (total_count > m_max_cluster_count || total_size > m_max_cluster_size) break ;
2740
+
2741
+ // Mark the entry as included (so the loop below will not remove the transaction).
2742
+ entry.m_deps_left = uint32_t (-1 );
2743
+ // Mark each to-be-added dependency involving this transaction as parent satisfied.
2744
+ for (auto & [par, chl] : std::span{deps}.subspan (entry.m_children_offset , entry.m_children_count )) {
2745
+ Assume (par == entry.m_index );
2746
+ auto chl_it = locate_fn (chl);
2747
+ // Reduce the number of unmet dependencies of chl_it, and if that brings the number
2748
+ // to zero, add it to the heap of includable transactions.
2749
+ Assume (chl_it->m_deps_left > 0 );
2750
+ if (--chl_it->m_deps_left == 0 ) {
2751
+ trim_heap.push_back (chl_it);
2752
+ std::push_heap (trim_heap.begin (), trim_heap.end (), cmp_fn);
2753
+ }
2754
+ }
2755
+ }
2756
+
2757
+ // Remove all the transactions that were not processed above. Because nothing gets
2758
+ // processed until/unless all its dependencies are met, this automatically guarantees
2759
+ // that if a transaction is removed, all its descendants, or would-be descendants, are
2760
+ // removed as well.
2761
+ for (const auto & trim_entry : trim_data) {
2762
+ if (trim_entry.m_deps_left != uint32_t (-1 )) {
2763
+ ret.push_back (m_entries[trim_entry.m_index ].m_ref );
2764
+ clusterset.m_to_remove .push_back (trim_entry.m_index );
2765
+ }
2766
+ }
2767
+ }
2768
+ clusterset.m_group_data .reset ();
2769
+ clusterset.m_oversized = false ;
2770
+ Assume (!ret.empty ());
2771
+ return ret;
2772
+ }
2773
+
2528
2774
} // namespace
2529
2775
2530
2776
TxGraph::Ref::~Ref ()
0 commit comments