Skip to content

Commit a52b539

Browse files
committed
clusterlin: add GetConnectedComponent
This abstracts out the finding of the connected component that includes a given element from FindConnectedComponent (which just finds any connected component). Use this in the txgraph fuzz test, which was effectively reimplementing this logic. At the same time, improve its performance by replacing a vector with a set.
1 parent c7d5dca commit a52b539

File tree

3 files changed

+44
-28
lines changed

3 files changed

+44
-28
lines changed

src/cluster_linearize.h

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -250,10 +250,8 @@ class DepGraph
250250
return ret;
251251
}
252252

253-
/** Find some connected component within the subset "todo" of this graph.
254-
*
255-
* Specifically, this finds the connected component which contains the first transaction of
256-
* todo (if any).
253+
/** Get the connected component within the subset "todo" that contains tx (which must be in
254+
* todo).
257255
*
258256
* Two transactions are considered connected if they are both in `todo`, and one is an ancestor
259257
* of the other in the entire graph (so not just within `todo`), or transitively there is a
@@ -262,10 +260,11 @@ class DepGraph
262260
*
263261
* Complexity: O(ret.Count()).
264262
*/
265-
SetType FindConnectedComponent(const SetType& todo) const noexcept
263+
SetType GetConnectedComponent(const SetType& todo, DepGraphIndex tx) const noexcept
266264
{
267-
if (todo.None()) return todo;
268-
auto to_add = SetType::Singleton(todo.First());
265+
Assume(todo[tx]);
266+
Assume(todo.IsSubsetOf(m_used));
267+
auto to_add = SetType::Singleton(tx);
269268
SetType ret;
270269
do {
271270
SetType old = ret;
@@ -279,6 +278,19 @@ class DepGraph
279278
return ret;
280279
}
281280

281+
/** Find some connected component within the subset "todo" of this graph.
282+
*
283+
* Specifically, this finds the connected component which contains the first transaction of
284+
* todo (if any).
285+
*
286+
* Complexity: O(ret.Count()).
287+
*/
288+
SetType FindConnectedComponent(const SetType& todo) const noexcept
289+
{
290+
if (todo.None()) return todo;
291+
return GetConnectedComponent(todo, todo.First());
292+
}
293+
282294
/** Determine if a subset is connected.
283295
*
284296
* Complexity: O(subset.Count()).

src/test/fuzz/cluster_linearize.cpp

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -446,19 +446,36 @@ FUZZ_TARGET(clusterlin_components)
446446
// Construct a depgraph.
447447
SpanReader reader(buffer);
448448
DepGraph<TestBitSet> depgraph;
449+
std::vector<DepGraphIndex> linearization;
449450
try {
450451
reader >> Using<DepGraphFormatter>(depgraph);
451452
} catch (const std::ios_base::failure&) {}
452453

453454
TestBitSet todo = depgraph.Positions();
454455
while (todo.Any()) {
455-
// Find a connected component inside todo.
456-
auto component = depgraph.FindConnectedComponent(todo);
456+
// Pick a transaction in todo, or nothing.
457+
std::optional<DepGraphIndex> picked;
458+
{
459+
uint64_t picked_num{0};
460+
try {
461+
reader >> VARINT(picked_num);
462+
} catch (const std::ios_base::failure&) {}
463+
if (picked_num < todo.Size() && todo[picked_num]) {
464+
picked = picked_num;
465+
}
466+
}
467+
468+
// Find a connected component inside todo, including picked if any.
469+
auto component = picked ? depgraph.GetConnectedComponent(todo, *picked)
470+
: depgraph.FindConnectedComponent(todo);
457471

458472
// The component must be a subset of todo and non-empty.
459473
assert(component.IsSubsetOf(todo));
460474
assert(component.Any());
461475

476+
// If picked was provided, the component must include it.
477+
if (picked) assert(component[*picked]);
478+
462479
// If todo is the entire graph, and the entire graph is connected, then the component must
463480
// be the entire graph.
464481
if (todo == depgraph.Positions()) {

src/test/fuzz/txgraph.cpp

Lines changed: 6 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -561,36 +561,23 @@ FUZZ_TARGET(txgraph)
561561
std::shuffle(refs.begin(), refs.end(), rng);
562562
// Invoke the real function.
563563
auto result = real->CountDistinctClusters(refs, use_main);
564-
// Build a vector with representatives of the clusters the Refs occur in in the
564+
// Build a set with representatives of the clusters the Refs occur in in the
565565
// simulated graph. For each, remember the lowest-index transaction SimPos in the
566566
// cluster.
567-
std::vector<DepGraphIndex> sim_reps;
567+
SimTxGraph::SetType sim_reps;
568568
for (auto ref : refs) {
569569
// Skip Refs that do not occur in the simulated graph.
570570
auto simpos = sel_sim.Find(ref);
571571
if (simpos == SimTxGraph::MISSING) continue;
572-
// Start with component equal to just the Ref's SimPos.
573-
auto component = SimTxGraph::SetType::Singleton(simpos);
574-
// Keep adding ancestors/descendants of all elements in component until it no
575-
// longer changes.
576-
while (true) {
577-
auto old_component = component;
578-
for (auto i : component) {
579-
component |= sel_sim.graph.Ancestors(i);
580-
component |= sel_sim.graph.Descendants(i);
581-
}
582-
if (component == old_component) break;
583-
}
572+
// Find the component that includes ref.
573+
auto component = sel_sim.graph.GetConnectedComponent(sel_sim.graph.Positions(), simpos);
584574
// Remember the lowest-index SimPos in component, as a representative for it.
585575
assert(component.Any());
586-
sim_reps.push_back(component.First());
576+
sim_reps.Set(component.First());
587577
}
588-
// Remove duplicates from sim_reps.
589-
std::sort(sim_reps.begin(), sim_reps.end());
590-
sim_reps.erase(std::unique(sim_reps.begin(), sim_reps.end()), sim_reps.end());
591578
// Compare the number of deduplicated representatives with the value returned by
592579
// the real function.
593-
assert(result == sim_reps.size());
580+
assert(result == sim_reps.Count());
594581
break;
595582
} else if (command-- == 0) {
596583
// DoWork.

0 commit comments

Comments
 (0)