Skip to content

Commit a04e205

Browse files
sipainstagibbs
andcommitted
txgraph: Add ability to trim oversized clusters (feature)
During reorganisations, it is possible that dependencies get add which result in clusters that violate limits (count, size), when linking the new from-block transactions to the old from-mempool transactions. Unlike RBF scenarios, we cannot simply reject these policy violations when they are due to received blocks. To accomodate this, add a Trim() function to TxGraph, which removes transactions (including descendants) in order to make all resulting clusters satisfy the limits. In the initial version of the function added here, the following approach is used: - Lazily compute a naive linearization for the to-be-merged cluster (using an O(n log n) algorithm, optimized for far larger groups of transactions than the normal linearization code). - Initialize a set of accepted transactions to {} - Iterate over the transactions in this cluster one by one: - If adding the transaction to the set makes it exceed the max cluster size or count limit, stop. - Add the transaction to the set. - Remove all transactions from the cluster that were not included in the set (note that this necessarily includes all descendants too, because they appear later in the naive linearization). Co-authored-by: Greg Sanders <[email protected]>
1 parent eabcd0e commit a04e205

File tree

3 files changed

+300
-0
lines changed

3 files changed

+300
-0
lines changed

src/test/fuzz/txgraph.cpp

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,31 @@ struct SimTxGraph
247247
}
248248
}
249249
}
250+
251+
252+
/** Verify that set contains transactions from every oversized cluster, and nothing from
253+
* non-oversized ones. */
254+
bool MatchesOversizedClusters(const SetType& set)
255+
{
256+
if (set.Any() && !IsOversized()) return false;
257+
258+
auto todo = graph.Positions();
259+
if (!set.IsSubsetOf(todo)) return false;
260+
261+
// Walk all clusters, and make sure all of set doesn't come from non-oversized clusters
262+
while (todo.Any()) {
263+
auto component = graph.FindConnectedComponent(todo);
264+
// Determine whether component is oversized, due to either the size or count limit.
265+
bool is_oversized = component.Count() > max_cluster_count;
266+
uint64_t component_size{0};
267+
for (auto i : component) component_size += graph.FeeRate(i).size;
268+
is_oversized |= component_size > max_cluster_size;
269+
// Check whether overlap with set matches is_oversized.
270+
if (is_oversized != set.Overlaps(component)) return false;
271+
todo -= component;
272+
}
273+
return true;
274+
}
250275
};
251276

252277
} // namespace
@@ -789,6 +814,30 @@ FUZZ_TARGET(txgraph)
789814
assert(sum == worst_chunk_feerate);
790815
}
791816
break;
817+
} else if ((block_builders.empty() || sims.size() > 1) && command-- == 0) {
818+
// Trim.
819+
bool was_oversized = top_sim.IsOversized();
820+
auto removed = real->Trim();
821+
// Verify that something was removed if and only if there was an oversized cluster.
822+
assert(was_oversized == !removed.empty());
823+
if (!was_oversized) break;
824+
auto removed_set = top_sim.MakeSet(removed);
825+
// The removed set must contain all its own descendants.
826+
for (auto simpos : removed_set) {
827+
assert(top_sim.graph.Descendants(simpos).IsSubsetOf(removed_set));
828+
}
829+
// Something from every oversized cluster should have been removed, and nothing
830+
// else.
831+
assert(top_sim.MatchesOversizedClusters(removed_set));
832+
833+
// Apply all removals to the simulation, and verify the result is no longer
834+
// oversized. Don't query the real graph for oversizedness; it is compared
835+
// against the simulation anyway later.
836+
for (auto simpos : removed_set) {
837+
top_sim.RemoveTransaction(top_sim.GetRef(simpos));
838+
}
839+
assert(!top_sim.IsOversized());
840+
break;
792841
}
793842
}
794843
}

src/txgraph.cpp

Lines changed: 246 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,27 @@ enum class QualityLevel
5353
NONE,
5454
};
5555

56+
/** Information about a transaction inside TxGraphImpl::Trim. */
57+
struct TrimTxData
58+
{
59+
// Fields populated by Cluster::AppendTrimData(). These are immutable after TrimTxData
60+
// construction.
61+
/** Chunk feerate for this transaction. */
62+
FeePerWeight m_chunk_feerate;
63+
/** GraphIndex of the transaction. */
64+
TxGraph::GraphIndex m_index;
65+
/** Size of the transaction. */
66+
uint32_t m_tx_size;
67+
68+
// Fields only used internally by TxGraphImpl::Trim():
69+
/** Number of unmet dependencies this transaction has. -1 if the transaction is included. */
70+
uint32_t m_deps_left;
71+
/** Number of dependencies that apply to this transaction as parent. */
72+
uint32_t m_children_count;
73+
/** Where in deps those dependencies begin. */
74+
uint32_t m_children_offset;
75+
};
76+
5677
/** A grouping of connected transactions inside a TxGraphImpl::ClusterSet. */
5778
class Cluster
5879
{
@@ -152,6 +173,10 @@ class Cluster
152173
void Relinearize(TxGraphImpl& graph, uint64_t max_iters) noexcept;
153174
/** For every chunk in the cluster, append its FeeFrac to ret. */
154175
void AppendChunkFeerates(std::vector<FeeFrac>& ret) const noexcept;
176+
/** Add a TrimTxData entry (filling m_chunk_feerate, m_index, m_tx_size) for every
177+
* transaction in the Cluster to ret. Implicit dependencies between consecutive transactions
178+
* in the linearization are added to deps. Return the Cluster's total transaction size. */
179+
uint64_t AppendTrimData(std::vector<TrimTxData>& ret, std::vector<std::pair<GraphIndex, GraphIndex>>& deps) const noexcept;
155180

156181
// Functions that implement the Cluster-specific side of public TxGraph functions.
157182

@@ -563,6 +588,7 @@ class TxGraphImpl final : public TxGraph
563588
std::strong_ordering CompareMainOrder(const Ref& a, const Ref& b) noexcept final;
564589
GraphIndex CountDistinctClusters(std::span<const Ref* const> refs, bool main_only = false) noexcept final;
565590
std::pair<std::vector<FeeFrac>, std::vector<FeeFrac>> GetMainStagingDiagrams() noexcept final;
591+
std::vector<Ref*> Trim() noexcept final;
566592

567593
std::unique_ptr<BlockBuilder> GetBlockBuilder() noexcept final;
568594
std::pair<std::vector<Ref*>, FeePerWeight> GetWorstMainChunk() noexcept final;
@@ -875,6 +901,37 @@ void Cluster::AppendChunkFeerates(std::vector<FeeFrac>& ret) const noexcept
875901
ret.insert(ret.end(), chunk_feerates.begin(), chunk_feerates.end());
876902
}
877903

904+
uint64_t Cluster::AppendTrimData(std::vector<TrimTxData>& ret, std::vector<std::pair<GraphIndex, GraphIndex>>& deps) const noexcept
905+
{
906+
const LinearizationChunking linchunking(m_depgraph, m_linearization);
907+
LinearizationIndex pos{0};
908+
uint64_t size{0};
909+
auto prev_index = GraphIndex(-1);
910+
// Iterate over the chunks of this cluster's linearization.
911+
for (unsigned i = 0; i < linchunking.NumChunksLeft(); ++i) {
912+
const auto& [chunk, chunk_feerate] = linchunking.GetChunk(i);
913+
// Iterate over the transactions of that chunk, in linearization order.
914+
auto chunk_tx_count = chunk.Count();
915+
for (unsigned j = 0; j < chunk_tx_count; ++j) {
916+
auto cluster_idx = m_linearization[pos];
917+
// The transaction must appear in the chunk.
918+
Assume(chunk[cluster_idx]);
919+
// Construct a new element in ret.
920+
auto& entry = ret.emplace_back();
921+
entry.m_chunk_feerate = FeePerWeight::FromFeeFrac(chunk_feerate);
922+
entry.m_index = m_mapping[cluster_idx];
923+
// If this is not the first transaction of the cluster linearization, it has an
924+
// implicit dependency on its predecessor.
925+
if (pos != 0) deps.emplace_back(prev_index, entry.m_index);
926+
prev_index = entry.m_index;
927+
entry.m_tx_size = m_depgraph.FeeRate(cluster_idx).size;
928+
size += entry.m_tx_size;
929+
++pos;
930+
}
931+
}
932+
return size;
933+
}
934+
878935
bool Cluster::Split(TxGraphImpl& graph) noexcept
879936
{
880937
// This function can only be called when the Cluster needs splitting.
@@ -2525,6 +2582,195 @@ std::pair<std::vector<TxGraph::Ref*>, FeePerWeight> TxGraphImpl::GetWorstMainChu
25252582
return ret;
25262583
}
25272584

2585+
std::vector<TxGraph::Ref*> TxGraphImpl::Trim() noexcept
2586+
{
2587+
int level = GetTopLevel();
2588+
Assume(m_main_chunkindex_observers == 0 || level != 0);
2589+
std::vector<TxGraph::Ref*> ret;
2590+
2591+
// Compute the groups of to-be-merged Clusters (which also applies all removals, and splits).
2592+
auto& clusterset = GetClusterSet(level);
2593+
if (clusterset.m_oversized == false) return ret;
2594+
GroupClusters(level);
2595+
Assume(clusterset.m_group_data.has_value());
2596+
// Nothing to do if not oversized.
2597+
Assume(clusterset.m_oversized.has_value());
2598+
if (clusterset.m_oversized == false) return ret;
2599+
2600+
// In this function, would-be clusters (as precomputed in m_group_data by GroupClusters) are
2601+
// trimmed by removing transactions in them such that the resulting clusters satisfy the size
2602+
// and count limits.
2603+
//
2604+
// It works by defining for each would-be cluster a rudimentary linearization: at every point
2605+
// the highest-chunk-feerate remaining transaction is picked among those with no unmet
2606+
// dependencies. "Dependency" here means either a to-be-added dependency (m_deps_to_add), or
2607+
// an implicit dependency added between any two consecutive transaction in their current
2608+
// cluster linearization. So it can be seen as a "merge sort" of the chunks of the clusters,
2609+
// but respecting the dependencies being added.
2610+
//
2611+
// This rudimentary linearization is computed lazily, by putting all eligible (no unmet
2612+
// dependencies) transactions in a heap, and popping the highest-feerate one from it. This
2613+
// continues as long as the number or size of all picked transactions together does not exceed
2614+
// the graph's configured cluster limits. All remaining transactions are then marked as
2615+
// removed.
2616+
//
2617+
// A next invocation of GroupClusters (after applying the removals) will compute the new
2618+
// resulting clusters, and none of them will violate the limits.
2619+
2620+
/** All dependencies (both to be added ones, and implicit ones between consecutive transactions
2621+
* in existing cluster linearizations). */
2622+
std::vector<std::pair<GraphIndex, GraphIndex>> deps;
2623+
/** Information about all transactions involved in a Cluster group to be trimmed, sorted by
2624+
* GraphIndex. It contains entries both for transactions that have already been included,
2625+
* and ones that have not yet been. */
2626+
std::vector<TrimTxData> trim_data;
2627+
/** Iterators into trim_data, treated as a max heap according to cmp_fn below. Each entry is
2628+
* a transaction that has not yet been included yet, but all its ancestors have. */
2629+
std::vector<std::vector<TrimTxData>::iterator> trim_heap;
2630+
2631+
/** Function to define the ordering of trim_heap. */
2632+
static constexpr auto cmp_fn = [](auto a, auto b) noexcept {
2633+
// Sort by increasing chunk feerate, and then by decreasing size.
2634+
// We do not need to sort by cluster or within clusters, because due to the implicit
2635+
// dependency between consecutive linearization elements, no two transactions from the
2636+
// same Cluster will ever simultaneously be in the heap.
2637+
return a->m_chunk_feerate < b->m_chunk_feerate;
2638+
};
2639+
2640+
/** Get iterator to TrimTxData entry for a given index. */
2641+
auto locate_fn = [&](GraphIndex index) noexcept {
2642+
auto it = std::lower_bound(trim_data.begin(), trim_data.end(), index, [](TrimTxData& elem, GraphIndex idx) noexcept {
2643+
return elem.m_index < idx;
2644+
});
2645+
Assume(it != trim_data.end() && it->m_index == index);
2646+
return it;
2647+
};
2648+
2649+
// For each group of to-be-merged Clusters.
2650+
for (const auto& group_data : clusterset.m_group_data->m_groups) {
2651+
trim_data.clear();
2652+
trim_heap.clear();
2653+
deps.clear();
2654+
2655+
// Gather trim data and implicit dependency data from all involved Clusters.
2656+
auto cluster_span = std::span{clusterset.m_group_data->m_group_clusters}
2657+
.subspan(group_data.m_cluster_offset, group_data.m_cluster_count);
2658+
uint64_t size{0};
2659+
for (Cluster* cluster : cluster_span) {
2660+
size += cluster->AppendTrimData(trim_data, deps);
2661+
}
2662+
// If this group of Clusters does not violate any limits, continue to the next group.
2663+
if (trim_data.size() <= m_max_cluster_count && size <= m_max_cluster_size) continue;
2664+
// Sort the trim data by GraphIndex. In what follows, we will treat this sorted vector as
2665+
// a map from GraphIndex to TrimTxData via locate_fn, and its ordering will not change
2666+
// anymore.
2667+
std::sort(trim_data.begin(), trim_data.end(), [](auto& a, auto& b) noexcept { return a.m_index < b.m_index; });
2668+
2669+
// Add the explicitly added dependencies to deps.
2670+
deps.insert(deps.end(),
2671+
clusterset.m_deps_to_add.begin() + group_data.m_deps_offset,
2672+
clusterset.m_deps_to_add.begin() + group_data.m_deps_offset + group_data.m_deps_count);
2673+
2674+
// Sort deps by child transaction GraphIndex.
2675+
std::sort(deps.begin(), deps.end(), [](auto& a, auto& b) noexcept { return a.second < b.second; });
2676+
// Fill m_deps_left in trim_data, and initially populate trim_heap. Because of the sort
2677+
// above, all dependencies involving the same child are grouped together, so a single
2678+
// linear scan suffices.
2679+
auto deps_it = deps.begin();
2680+
for (auto trim_it = trim_data.begin(); trim_it != trim_data.end(); ++trim_it) {
2681+
trim_it->m_deps_left = 0;
2682+
while (deps_it != deps.end() && deps_it->second == trim_it->m_index) {
2683+
++trim_it->m_deps_left;
2684+
++deps_it;
2685+
}
2686+
// If this transaction has no unmet dependencies, and is not oversized, add it to the
2687+
// heap (just append for now, the heapification happens below).
2688+
if (trim_it->m_deps_left == 0 && trim_it->m_tx_size <= m_max_cluster_size) {
2689+
trim_heap.push_back(trim_it);
2690+
}
2691+
}
2692+
Assume(deps_it == deps.end());
2693+
2694+
// Sort deps by parent transaction GraphIndex. The order will not be changed anymore after
2695+
// this.
2696+
std::sort(deps.begin(), deps.end(), [](auto& a, auto& b) noexcept { return a.first < b.first; });
2697+
// Fill m_children_offset and m_children_count in trim_data. Because of the sort above, all
2698+
// dependencies involving the same parent are grouped together, so a single linear scan
2699+
// suffices.
2700+
deps_it = deps.begin();
2701+
for (auto& trim_entry : trim_data) {
2702+
trim_entry.m_children_count = 0;
2703+
trim_entry.m_children_offset = deps_it - deps.begin();
2704+
while (deps_it != deps.end() && deps_it->first == trim_entry.m_index) {
2705+
++trim_entry.m_children_count;
2706+
++deps_it;
2707+
}
2708+
}
2709+
Assume(deps_it == deps.end());
2710+
2711+
// Build a heap of all transactions with 0 unmet dependencies.
2712+
std::make_heap(trim_heap.begin(), trim_heap.end(), cmp_fn);
2713+
2714+
// Iterate over to-be-included transactions, and convert them to included transactions, or
2715+
// decide to stop if doing so would violate resource limits.
2716+
//
2717+
// It is possible that the heap empties without ever hitting either cluster limit, in case
2718+
// the implied graph (to be added dependencies plus implicit dependency between each
2719+
// original transaction and its predecessor in the linearization it came from) contains
2720+
// cycles. Such cycles will be removed entirely, because each of the transactions in the
2721+
// cycle permanently have unmet dependencies. However, this cannot occur in real scenarios
2722+
// where Trim() is called to deal with reorganizations that would violate cluster limits,
2723+
// as all added dependencies are in the same direction (from old mempool transactions to
2724+
// new from-block transactions); cycles require dependencies in both directions to be
2725+
// added.
2726+
uint32_t total_count{0};
2727+
uint64_t total_size{0};
2728+
while (!trim_heap.empty()) {
2729+
// Move the best remaining transaction to the end of trim_heap.
2730+
std::pop_heap(trim_heap.begin(), trim_heap.end(), cmp_fn);
2731+
// Pop it, and find its TrimTxData.
2732+
auto& entry = *trim_heap.back();
2733+
trim_heap.pop_back();
2734+
2735+
// Compute resource counts.
2736+
total_count += 1;
2737+
total_size += entry.m_tx_size;
2738+
// Stop if this would violate any limit.
2739+
if (total_count > m_max_cluster_count || total_size > m_max_cluster_size) break;
2740+
2741+
// Mark the entry as included (so the loop below will not remove the transaction).
2742+
entry.m_deps_left = uint32_t(-1);
2743+
// Mark each to-be-added dependency involving this transaction as parent satisfied.
2744+
for (auto& [par, chl] : std::span{deps}.subspan(entry.m_children_offset, entry.m_children_count)) {
2745+
Assume(par == entry.m_index);
2746+
auto chl_it = locate_fn(chl);
2747+
// Reduce the number of unmet dependencies of chl_it, and if that brings the number
2748+
// to zero, add it to the heap of includable transactions.
2749+
Assume(chl_it->m_deps_left > 0);
2750+
if (--chl_it->m_deps_left == 0) {
2751+
trim_heap.push_back(chl_it);
2752+
std::push_heap(trim_heap.begin(), trim_heap.end(), cmp_fn);
2753+
}
2754+
}
2755+
}
2756+
2757+
// Remove all the transactions that were not processed above. Because nothing gets
2758+
// processed until/unless all its dependencies are met, this automatically guarantees
2759+
// that if a transaction is removed, all its descendants, or would-be descendants, are
2760+
// removed as well.
2761+
for (const auto& trim_entry : trim_data) {
2762+
if (trim_entry.m_deps_left != uint32_t(-1)) {
2763+
ret.push_back(m_entries[trim_entry.m_index].m_ref);
2764+
clusterset.m_to_remove.push_back(trim_entry.m_index);
2765+
}
2766+
}
2767+
}
2768+
clusterset.m_group_data.reset();
2769+
clusterset.m_oversized = false;
2770+
Assume(!ret.empty());
2771+
return ret;
2772+
}
2773+
25282774
} // namespace
25292775

25302776
TxGraph::Ref::~Ref()

src/txgraph.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,11 @@ class TxGraph
169169
* that appear identically in both. Use FeeFrac rather than FeePerWeight so CompareChunks is
170170
* usable without type-conversion. */
171171
virtual std::pair<std::vector<FeeFrac>, std::vector<FeeFrac>> GetMainStagingDiagrams() noexcept = 0;
172+
/** Remove transactions (including their own descendants) according to a fast but best-effort
173+
* strategy such that the TxGraph's cluster and size limits are respected. Applies to staging
174+
* if it exists, and to main otherwise. Returns the list of all removed transactions in
175+
* unspecified order. This has no effect unless the relevant graph is oversized. */
176+
virtual std::vector<Ref*> Trim() noexcept = 0;
172177

173178
/** Interface returned by GetBlockBuilder. */
174179
class BlockBuilder

0 commit comments

Comments
 (0)