@@ -68,10 +68,30 @@ struct TrimTxData
68
68
// Fields only used internally by TxGraphImpl::Trim():
69
69
/* * Number of unmet dependencies this transaction has. -1 if the transaction is included. */
70
70
uint32_t m_deps_left;
71
+ /* * Number of dependencies that apply to this transaction as child. */
72
+ uint32_t m_parent_count;
73
+ /* * Where in deps_by_child those dependencies begin. */
74
+ uint32_t m_parent_offset;
71
75
/* * Number of dependencies that apply to this transaction as parent. */
72
76
uint32_t m_children_count;
73
- /* * Where in deps those dependencies begin. */
77
+ /* * Where in deps_by_parent those dependencies begin. */
74
78
uint32_t m_children_offset;
79
+
80
+ // Fields only used internally by TxGraphImpl::Trim()'s union-find implementation, and only for
81
+ // transactions that are definitely included or definitely rejected.
82
+ //
83
+ // As transactions get processed, they get organized into trees which form partitions
84
+ // representing the would-be clusters up to that point. The root of each tree is a
85
+ // representative for that partition. See
86
+ // https://en.wikipedia.org/wiki/Disjoint-set_data_structure.
87
+ //
88
+ /* * Pointer to another TrimTxData, towards the root of the tree. If this is a root, m_uf_parent
89
+ * is equal to this itself. */
90
+ TrimTxData* m_uf_parent;
91
+ /* * If this is a root, the total number of transactions in the partition. */
92
+ uint32_t m_uf_count;
93
+ /* * If this is a root, the total size of transactions in the partition. */
94
+ uint64_t m_uf_size;
75
95
};
76
96
77
97
/* * A grouping of connected transactions inside a TxGraphImpl::ClusterSet. */
@@ -2609,24 +2629,28 @@ std::vector<TxGraph::Ref*> TxGraphImpl::Trim() noexcept
2609
2629
// but respecting the dependencies being added.
2610
2630
//
2611
2631
// This rudimentary linearization is computed lazily, by putting all eligible (no unmet
2612
- // dependencies) transactions in a heap, and popping the highest-feerate one from it. This
2613
- // continues as long as the number or size of all picked transactions together does not exceed
2614
- // the graph's configured cluster limits. All remaining transactions are then marked as
2615
- // removed.
2632
+ // dependencies) transactions in a heap, and popping the highest-feerate one from it. Along the
2633
+ // way, the counts and sizes of the would-be clusters up to that point are tracked (by
2634
+ // partitioning the involved transactions using a union-find structure). Any transaction whose
2635
+ // addition would cause a violation is removed, along with all their descendants .
2616
2636
//
2617
2637
// A next invocation of GroupClusters (after applying the removals) will compute the new
2618
2638
// resulting clusters, and none of them will violate the limits.
2619
2639
2620
2640
/* * All dependencies (both to be added ones, and implicit ones between consecutive transactions
2621
- * in existing cluster linearizations). */
2622
- std::vector<std::pair<GraphIndex, GraphIndex>> deps;
2641
+ * in existing cluster linearizations), sorted by parent. */
2642
+ std::vector<std::pair<GraphIndex, GraphIndex>> deps_by_parent;
2643
+ /* * Same, but sorted by child. */
2644
+ std::vector<std::pair<GraphIndex, GraphIndex>> deps_by_child;
2623
2645
/* * Information about all transactions involved in a Cluster group to be trimmed, sorted by
2624
2646
* GraphIndex. It contains entries both for transactions that have already been included,
2625
2647
* and ones that have not yet been. */
2626
2648
std::vector<TrimTxData> trim_data;
2627
2649
/* * Iterators into trim_data, treated as a max heap according to cmp_fn below. Each entry is
2628
2650
* a transaction that has not yet been included yet, but all its ancestors have. */
2629
2651
std::vector<std::vector<TrimTxData>::iterator> trim_heap;
2652
+ /* * The list of representatives of the partitions a given transaction depends on. */
2653
+ std::vector<TrimTxData*> current_deps;
2630
2654
2631
2655
/* * Function to define the ordering of trim_heap. */
2632
2656
static constexpr auto cmp_fn = [](auto a, auto b) noexcept {
@@ -2637,6 +2661,38 @@ std::vector<TxGraph::Ref*> TxGraphImpl::Trim() noexcept
2637
2661
return a->m_chunk_feerate < b->m_chunk_feerate ;
2638
2662
};
2639
2663
2664
+ /* * Given a TrimTxData entry, find the representative of the partition it is in. */
2665
+ static constexpr auto find_fn = [](TrimTxData* arg) noexcept {
2666
+ while (arg != arg->m_uf_parent ) {
2667
+ // Replace pointer to parent with pointer to grandparent (path splitting).
2668
+ // See https://en.wikipedia.org/wiki/Disjoint-set_data_structure#Finding_set_representatives.
2669
+ auto par = arg->m_uf_parent ;
2670
+ arg->m_uf_parent = par->m_uf_parent ;
2671
+ arg = par;
2672
+ }
2673
+ return arg;
2674
+ };
2675
+
2676
+ /* * Given two TrimTxData entries, union the partitions they are in, and return the
2677
+ * representative. */
2678
+ static constexpr auto union_fn = [](TrimTxData* arg1, TrimTxData* arg2) noexcept {
2679
+ // Replace arg1 and arg2 by their representatives.
2680
+ auto rep1 = find_fn (arg1);
2681
+ auto rep2 = find_fn (arg2);
2682
+ // Bail out if both representatives are the same, because that means arg1 and arg2 are in
2683
+ // the same partition already.
2684
+ if (rep1 == rep2) return rep1;
2685
+ // Pick the lower-count root to become a child of the higher-count one.
2686
+ // See https://en.wikipedia.org/wiki/Disjoint-set_data_structure#Union_by_size.
2687
+ if (rep1->m_uf_count < rep2->m_uf_count ) std::swap (rep1, rep2);
2688
+ rep2->m_uf_parent = rep1;
2689
+ // Add the statistics of arg2 (which is no longer a representative) to those of arg1 (which
2690
+ // is now the representative for both).
2691
+ rep1->m_uf_size += rep2->m_uf_size ;
2692
+ rep1->m_uf_count += rep2->m_uf_count ;
2693
+ return rep1;
2694
+ };
2695
+
2640
2696
/* * Get iterator to TrimTxData entry for a given index. */
2641
2697
auto locate_fn = [&](GraphIndex index) noexcept {
2642
2698
auto it = std::lower_bound (trim_data.begin (), trim_data.end (), index, [](TrimTxData& elem, GraphIndex idx) noexcept {
@@ -2650,14 +2706,15 @@ std::vector<TxGraph::Ref*> TxGraphImpl::Trim() noexcept
2650
2706
for (const auto & group_data : clusterset.m_group_data ->m_groups ) {
2651
2707
trim_data.clear ();
2652
2708
trim_heap.clear ();
2653
- deps.clear ();
2709
+ deps_by_child.clear ();
2710
+ deps_by_parent.clear ();
2654
2711
2655
2712
// Gather trim data and implicit dependency data from all involved Clusters.
2656
2713
auto cluster_span = std::span{clusterset.m_group_data ->m_group_clusters }
2657
2714
.subspan (group_data.m_cluster_offset , group_data.m_cluster_count );
2658
2715
uint64_t size{0 };
2659
2716
for (Cluster* cluster : cluster_span) {
2660
- size += cluster->AppendTrimData (trim_data, deps );
2717
+ size += cluster->AppendTrimData (trim_data, deps_by_child );
2661
2718
}
2662
2719
// If this group of Clusters does not violate any limits, continue to the next group.
2663
2720
if (trim_data.size () <= m_max_cluster_count && size <= m_max_cluster_size) continue ;
@@ -2666,53 +2723,57 @@ std::vector<TxGraph::Ref*> TxGraphImpl::Trim() noexcept
2666
2723
// anymore.
2667
2724
std::sort (trim_data.begin (), trim_data.end (), [](auto & a, auto & b) noexcept { return a.m_index < b.m_index ; });
2668
2725
2669
- // Add the explicitly added dependencies to deps.
2670
- deps.insert (deps.end (),
2671
- clusterset.m_deps_to_add .begin () + group_data.m_deps_offset ,
2672
- clusterset.m_deps_to_add .begin () + group_data.m_deps_offset + group_data.m_deps_count );
2673
-
2674
- // Sort deps by child transaction GraphIndex.
2675
- std::sort (deps.begin (), deps.end (), [](auto & a, auto & b) noexcept { return a.second < b.second ; });
2676
- // Fill m_deps_left in trim_data, and initially populate trim_heap. Because of the sort
2677
- // above, all dependencies involving the same child are grouped together, so a single
2678
- // linear scan suffices.
2679
- auto deps_it = deps.begin ();
2726
+ // Add the explicitly added dependencies to deps_by_child.
2727
+ deps_by_child.insert (deps_by_child.end (),
2728
+ clusterset.m_deps_to_add .begin () + group_data.m_deps_offset ,
2729
+ clusterset.m_deps_to_add .begin () + group_data.m_deps_offset + group_data.m_deps_count );
2730
+
2731
+ // Sort deps_by_child by child transaction GraphIndex. The order will not be changed
2732
+ // anymore after this.
2733
+ std::sort (deps_by_child.begin (), deps_by_child.end (), [](auto & a, auto & b) noexcept { return a.second < b.second ; });
2734
+ // Fill m_parents_count and m_parents_offset in trim_data, as well as m_deps_left, and
2735
+ // initially populate trim_heap. Because of the sort above, all dependencies involving the
2736
+ // same child are grouped together, so a single linear scan suffices.
2737
+ auto deps_it = deps_by_child.begin ();
2680
2738
for (auto trim_it = trim_data.begin (); trim_it != trim_data.end (); ++trim_it) {
2739
+ trim_it->m_parent_offset = deps_it - deps_by_child.begin ();
2681
2740
trim_it->m_deps_left = 0 ;
2682
- while (deps_it != deps .end () && deps_it->second == trim_it->m_index ) {
2741
+ while (deps_it != deps_by_child .end () && deps_it->second == trim_it->m_index ) {
2683
2742
++trim_it->m_deps_left ;
2684
2743
++deps_it;
2685
2744
}
2745
+ trim_it->m_parent_count = trim_it->m_deps_left ;
2686
2746
// If this transaction has no unmet dependencies, and is not oversized, add it to the
2687
2747
// heap (just append for now, the heapification happens below).
2688
2748
if (trim_it->m_deps_left == 0 && trim_it->m_tx_size <= m_max_cluster_size) {
2689
2749
trim_heap.push_back (trim_it);
2690
2750
}
2691
2751
}
2692
- Assume (deps_it == deps .end ());
2752
+ Assume (deps_it == deps_by_child .end ());
2693
2753
2694
- // Sort deps by parent transaction GraphIndex. The order will not be changed anymore after
2695
- // this.
2696
- std::sort (deps.begin (), deps.end (), [](auto & a, auto & b) noexcept { return a.first < b.first ; });
2754
+ // Construct deps_by_parent, sorted by parent transaction GraphIndex. The order will not be
2755
+ // changed anymore after this.
2756
+ deps_by_parent = deps_by_child;
2757
+ std::sort (deps_by_parent.begin (), deps_by_parent.end (), [](auto & a, auto & b) noexcept { return a.first < b.first ; });
2697
2758
// Fill m_children_offset and m_children_count in trim_data. Because of the sort above, all
2698
2759
// dependencies involving the same parent are grouped together, so a single linear scan
2699
2760
// suffices.
2700
- deps_it = deps .begin ();
2761
+ deps_it = deps_by_parent .begin ();
2701
2762
for (auto & trim_entry : trim_data) {
2702
2763
trim_entry.m_children_count = 0 ;
2703
- trim_entry.m_children_offset = deps_it - deps .begin ();
2704
- while (deps_it != deps .end () && deps_it->first == trim_entry.m_index ) {
2764
+ trim_entry.m_children_offset = deps_it - deps_by_parent .begin ();
2765
+ while (deps_it != deps_by_parent .end () && deps_it->first == trim_entry.m_index ) {
2705
2766
++trim_entry.m_children_count ;
2706
2767
++deps_it;
2707
2768
}
2708
2769
}
2709
- Assume (deps_it == deps .end ());
2770
+ Assume (deps_it == deps_by_parent .end ());
2710
2771
2711
2772
// Build a heap of all transactions with 0 unmet dependencies.
2712
2773
std::make_heap (trim_heap.begin (), trim_heap.end (), cmp_fn);
2713
2774
2714
2775
// Iterate over to-be-included transactions, and convert them to included transactions, or
2715
- // decide to stop if doing so would violate resource limits.
2776
+ // skip them if adding them would violate resource limits of the would-be cluster .
2716
2777
//
2717
2778
// It is possible that the heap empties without ever hitting either cluster limit, in case
2718
2779
// the implied graph (to be added dependencies plus implicit dependency between each
@@ -2723,25 +2784,44 @@ std::vector<TxGraph::Ref*> TxGraphImpl::Trim() noexcept
2723
2784
// as all added dependencies are in the same direction (from old mempool transactions to
2724
2785
// new from-block transactions); cycles require dependencies in both directions to be
2725
2786
// added.
2726
- uint32_t total_count{0 };
2727
- uint64_t total_size{0 };
2728
2787
while (!trim_heap.empty ()) {
2729
2788
// Move the best remaining transaction to the end of trim_heap.
2730
2789
std::pop_heap (trim_heap.begin (), trim_heap.end (), cmp_fn);
2731
2790
// Pop it, and find its TrimTxData.
2732
2791
auto & entry = *trim_heap.back ();
2733
2792
trim_heap.pop_back ();
2734
2793
2794
+ // Initialize it as a singleton partition.
2795
+ entry.m_uf_parent = &entry;
2796
+ entry.m_uf_count = 1 ;
2797
+ entry.m_uf_size = entry.m_tx_size ;
2798
+
2799
+ // Find the distinct transaction partitions this entry depends on.
2800
+ current_deps.clear ();
2801
+ for (auto & [par, chl] : std::span{deps_by_child}.subspan (entry.m_parent_offset , entry.m_parent_count )) {
2802
+ Assume (chl == entry.m_index );
2803
+ current_deps.push_back (find_fn (&*locate_fn (par)));
2804
+ }
2805
+ std::sort (current_deps.begin (), current_deps.end ());
2806
+ current_deps.erase (std::unique (current_deps.begin (), current_deps.end ()), current_deps.end ());
2807
+
2735
2808
// Compute resource counts.
2736
- total_count += 1 ;
2737
- total_size += entry.m_tx_size ;
2738
- // Stop if this would violate any limit.
2739
- if (total_count > m_max_cluster_count || total_size > m_max_cluster_size) break ;
2809
+ uint32_t new_count = 1 ;
2810
+ uint64_t new_size = entry.m_tx_size ;
2811
+ for (TrimTxData* ptr : current_deps) {
2812
+ new_count += ptr->m_uf_count ;
2813
+ new_size += ptr->m_uf_size ;
2814
+ }
2815
+ // Skip the entry if this would violate any limit.
2816
+ if (new_count > m_max_cluster_count || new_size > m_max_cluster_size) continue ;
2740
2817
2818
+ // Union the partitions this transaction and all its dependencies are in together.
2819
+ auto rep = &entry;
2820
+ for (TrimTxData* ptr : current_deps) rep = union_fn (ptr, rep);
2741
2821
// Mark the entry as included (so the loop below will not remove the transaction).
2742
2822
entry.m_deps_left = uint32_t (-1 );
2743
2823
// Mark each to-be-added dependency involving this transaction as parent satisfied.
2744
- for (auto & [par, chl] : std::span{deps }.subspan (entry.m_children_offset , entry.m_children_count )) {
2824
+ for (auto & [par, chl] : std::span{deps_by_parent }.subspan (entry.m_children_offset , entry.m_children_count )) {
2745
2825
Assume (par == entry.m_index );
2746
2826
auto chl_it = locate_fn (chl);
2747
2827
// Reduce the number of unmet dependencies of chl_it, and if that brings the number
0 commit comments