-
-
Notifications
You must be signed in to change notification settings - Fork 36
Added functionality of parallel maximal independent set #145
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 7 commits
e3d65be
562c725
393ff44
6c94be7
375f0b1
dacab14
c592668
0a73d7b
e76f9ef
31770ff
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,3 +15,4 @@ | |
| from .cluster import * | ||
| from .link_prediction import * | ||
| from .dag import * | ||
| from .mis import * | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,194 @@ | ||||||
| import inspect | ||||||
| from joblib import Parallel, delayed | ||||||
| import nx_parallel as nxp | ||||||
| import networkx as nx | ||||||
|
|
||||||
| __all__ = ["maximal_independent_set"] | ||||||
|
|
||||||
| # Import the actual NetworkX implementation (fully unwrapped, not the dispatcher) | ||||||
| from networkx.algorithms.mis import maximal_independent_set as _nx_mis_dispatcher | ||||||
| _nx_mis = inspect.unwrap(_nx_mis_dispatcher) | ||||||
|
|
||||||
|
|
||||||
| @nxp._configure_if_nx_active(should_run=nxp.should_run_if_large(50000)) | ||||||
| def maximal_independent_set(G, nodes=None, seed=None, get_chunks="chunks"): | ||||||
| """Returns a random maximal independent set guaranteed to contain | ||||||
| a given set of nodes. | ||||||
|
|
||||||
| This parallel implementation processes nodes in chunks across multiple | ||||||
| cores, using a Luby-style randomized parallel algorithm for speedup | ||||||
| on large graphs. | ||||||
|
|
||||||
| An independent set is a set of nodes such that the subgraph | ||||||
| of G induced by these nodes contains no edges. A maximal | ||||||
| independent set is an independent set such that it is not possible | ||||||
| to add a new node and still get an independent set. | ||||||
|
|
||||||
| The parallel computation divides nodes into chunks and processes them | ||||||
| in parallel, iteratively building the independent set faster than | ||||||
| sequential processing on large graphs. | ||||||
|
|
||||||
| networkx.maximal_independent_set: https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.mis.maximal_independent_set.html | ||||||
|
|
||||||
| Parameters | ||||||
| ---------- | ||||||
| G : NetworkX graph | ||||||
| An undirected graph. | ||||||
|
|
||||||
| nodes : list or iterable, optional | ||||||
| Nodes that must be part of the independent set. This set of nodes | ||||||
| must be independent. If not provided, a random starting node is chosen. | ||||||
|
|
||||||
| seed : integer, random_state, or None (default) | ||||||
| Indicator of random number generation state. | ||||||
| See :ref:`Randomness<randomness>`. | ||||||
|
|
||||||
| get_chunks : str, function (default = "chunks") | ||||||
| A function that takes in a list of nodes and returns chunks. | ||||||
| The default chunking divides nodes into n_jobs chunks. | ||||||
|
|
||||||
| Returns | ||||||
| ------- | ||||||
| indep_nodes : list | ||||||
| List of nodes that are part of a maximal independent set. | ||||||
|
|
||||||
| Raises | ||||||
| ------ | ||||||
| NetworkXUnfeasible | ||||||
| If the nodes in the provided list are not part of the graph or | ||||||
| do not form an independent set, an exception is raised. | ||||||
|
|
||||||
| NetworkXNotImplemented | ||||||
| If `G` is directed. | ||||||
|
|
||||||
| Examples | ||||||
| -------- | ||||||
| >>> import networkx as nx | ||||||
| >>> import nx_parallel as nxp | ||||||
| >>> G = nx.path_graph(5) | ||||||
| >>> nxp.maximal_independent_set(G) # doctest: +SKIP | ||||||
| [4, 0, 2] | ||||||
| >>> nxp.maximal_independent_set(G, [1]) # doctest: +SKIP | ||||||
| [1, 3] | ||||||
|
|
||||||
| Notes | ||||||
| ----- | ||||||
| This algorithm does not solve the maximum independent set problem. | ||||||
| The parallel version uses a chunk-based parallel algorithm that | ||||||
| provides speedup on large graphs (>= 50000 nodes). For smaller graphs, | ||||||
| the NetworkX sequential version is used automatically. | ||||||
|
|
||||||
| """ | ||||||
| if hasattr(G, "graph_object"): | ||||||
| G = G.graph_object | ||||||
|
|
||||||
| # Validate directed graph | ||||||
| if G.is_directed(): | ||||||
| raise nx.NetworkXNotImplemented("Not implemented for directed graphs.") | ||||||
|
||||||
|
|
||||||
| # Convert seed to Random object if needed (for fallback and parallel execution) | ||||||
| import random | ||||||
| if seed is not None: | ||||||
| if hasattr(seed, 'random'): | ||||||
| # It's already a RandomState/Random object | ||||||
| rng = seed | ||||||
| else: | ||||||
| # It's a seed value | ||||||
| rng = random.Random(seed) | ||||||
| else: | ||||||
| rng = random.Random() | ||||||
|
||||||
|
|
||||||
| # Check if we should run parallel version | ||||||
| # This is needed when backend is explicitly specified | ||||||
| should_run_result = maximal_independent_set.should_run(G, nodes, seed) | ||||||
| if should_run_result is not True: | ||||||
| # Fall back to NetworkX sequential (unwrapped version needs Random object) | ||||||
| return _nx_mis(G, nodes=nodes, seed=rng) | ||||||
|
Comment on lines
+102
to
+107
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This definitely gets called by the backend machinery. This code should never be reached. |
||||||
|
|
||||||
| # Validate nodes parameter | ||||||
| if nodes is not None: | ||||||
| nodes_set = set(nodes) | ||||||
| if not nodes_set.issubset(G): | ||||||
| raise nx.NetworkXUnfeasible(f"{nodes} is not a subset of the nodes of G") | ||||||
| neighbors = set.union(*[set(G.adj[v]) for v in nodes_set]) if nodes_set else set() | ||||||
| if set.intersection(neighbors, nodes_set): | ||||||
| raise nx.NetworkXUnfeasible(f"{nodes} is not an independent set of G") | ||||||
| else: | ||||||
| nodes_set = set() | ||||||
|
|
||||||
| n_jobs = nxp.get_n_jobs() | ||||||
|
|
||||||
| # Parallel strategy: Run complete MIS algorithm on node chunks independently | ||||||
| # Then merge results by resolving conflicts | ||||||
| all_nodes = list(G.nodes()) | ||||||
|
||||||
| all_nodes = list(G.nodes()) | |
| all_nodes = list(G) |
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Dont these nodes already get removed two lines up?
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Didn't we do this already with available?
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,125 @@ | ||
| import networkx as nx | ||
| import nx_parallel as nxp | ||
| import pytest | ||
|
|
||
|
|
||
| def test_maximal_independent_set_basic(): | ||
| G = nx.path_graph(5) | ||
| H = nxp.ParallelGraph(G) | ||
| result = nxp.maximal_independent_set(H) | ||
|
|
||
| result_set = set(result) | ||
| for node in result: | ||
| neighbors = set(G.neighbors(node)) | ||
| assert not result_set.intersection(neighbors) | ||
|
|
||
| for node in G.nodes(): | ||
| if node not in result_set: | ||
| neighbors = set(G.neighbors(node)) | ||
| assert result_set.intersection(neighbors) | ||
|
|
||
|
|
||
| def test_maximal_independent_set_with_required_nodes(): | ||
| G = nx.path_graph(7) | ||
| H = nxp.ParallelGraph(G) | ||
| required_nodes = [1, 3] | ||
| result = nxp.maximal_independent_set(H, nodes=required_nodes) | ||
|
|
||
| assert 1 in result | ||
| assert 3 in result | ||
|
|
||
| result_set = set(result) | ||
| for node in result: | ||
| neighbors = set(G.neighbors(node)) | ||
| assert not result_set.intersection(neighbors) | ||
|
|
||
|
|
||
| def test_maximal_independent_set_invalid_nodes(): | ||
| G = nx.path_graph(5) | ||
| H = nxp.ParallelGraph(G) | ||
|
|
||
| with pytest.raises(nx.NetworkXUnfeasible): | ||
| nxp.maximal_independent_set(H, nodes=[10, 20]) | ||
|
|
||
| with pytest.raises(nx.NetworkXUnfeasible): | ||
| nxp.maximal_independent_set(H, nodes=[0, 1]) | ||
|
|
||
|
|
||
| def test_maximal_independent_set_directed_graph(): | ||
| G = nx.DiGraph([(0, 1), (1, 2)]) | ||
| H = nxp.ParallelGraph(G) | ||
|
|
||
| with pytest.raises(nx.NetworkXNotImplemented): | ||
| nxp.maximal_independent_set(H) | ||
|
|
||
|
|
||
| def test_maximal_independent_set_deterministic_with_seed(): | ||
| G = nx.karate_club_graph() | ||
| H = nxp.ParallelGraph(G) | ||
|
|
||
| result1 = nxp.maximal_independent_set(H, seed=42) | ||
| result2 = nxp.maximal_independent_set(H, seed=42) | ||
|
|
||
| assert result1 == result2 | ||
|
|
||
|
|
||
| def test_maximal_independent_set_different_seeds(): | ||
| G = nx.karate_club_graph() | ||
| H = nxp.ParallelGraph(G) | ||
|
|
||
| result1 = nxp.maximal_independent_set(H, seed=42) | ||
| result2 = nxp.maximal_independent_set(H, seed=100) | ||
|
|
||
| for result in [result1, result2]: | ||
| result_set = set(result) | ||
| for node in result: | ||
| neighbors = set(G.neighbors(node)) | ||
| assert not result_set.intersection(neighbors) | ||
|
|
||
|
|
||
| def test_maximal_independent_set_complete_graph(): | ||
| G = nx.complete_graph(5) | ||
| H = nxp.ParallelGraph(G) | ||
| result = nxp.maximal_independent_set(H) | ||
|
|
||
| assert len(result) == 1 | ||
|
|
||
|
|
||
| def test_maximal_independent_set_empty_graph(): | ||
| G = nx.empty_graph(5) | ||
| H = nxp.ParallelGraph(G) | ||
| result = nxp.maximal_independent_set(H) | ||
|
|
||
| assert len(result) == 5 | ||
|
|
||
|
|
||
| def test_maximal_independent_set_large_graph(): | ||
| G = nx.fast_gnp_random_graph(150, 0.1, seed=42) | ||
| H = nxp.ParallelGraph(G) | ||
| result = nxp.maximal_independent_set(H, seed=42) | ||
|
|
||
| result_set = set(result) | ||
| for node in result: | ||
| neighbors = set(G.neighbors(node)) | ||
| assert not result_set.intersection(neighbors) | ||
|
|
||
| for node in G.nodes(): | ||
| if node not in result_set: | ||
| neighbors = set(G.neighbors(node)) | ||
| assert result_set.intersection(neighbors) | ||
|
|
||
|
|
||
| def test_maximal_independent_set_random_graph(): | ||
| G = nx.fast_gnp_random_graph(50, 0.1, seed=42) | ||
| H = nxp.ParallelGraph(G) | ||
| result = nxp.maximal_independent_set(H, seed=42) | ||
|
|
||
| result_set = set(result) | ||
| for node in result: | ||
| neighbors = set(G.neighbors(node)) | ||
| assert not result_set.intersection(neighbors) | ||
|
|
||
| for node in G.nodes(): | ||
| if node not in result_set: | ||
| neighbors = set(G.neighbors(node)) | ||
| assert result_set.intersection(neighbors) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,20 +9,39 @@ | |
| ] | ||
|
|
||
|
|
||
| def should_skip_parallel(*_): | ||
| def should_skip_parallel(*_, **__): | ||
| return "Fast algorithm; skip parallel execution" | ||
|
|
||
|
|
||
| def should_run_if_large(G, *_): | ||
| if hasattr(G, "graph_object"): | ||
| G = G.graph_object | ||
| def should_run_if_large(nodes_threshold=200, *_, **__): | ||
| # If nodes_threshold is a graph-like object, it's being used as a direct should_run | ||
| # function instead of a factory. Use default threshold. | ||
|
||
| if hasattr(nodes_threshold, '__len__') and hasattr(nodes_threshold, 'nodes'): | ||
| # nodes_threshold is actually a graph, use it as G with default threshold | ||
| G = nodes_threshold | ||
| threshold = 200 | ||
|
|
||
| if len(G) <= 200: | ||
| return "Graph too small for parallel execution" | ||
| return True | ||
| if hasattr(G, "graph_object"): | ||
| G = G.graph_object | ||
|
|
||
| if len(G) < threshold: | ||
| return "Graph too small for parallel execution" | ||
| return True | ||
|
|
||
| # Otherwise, it's being used as a factory, return a wrapper | ||
| threshold = nodes_threshold | ||
| def wrapper(G, *_, **__): | ||
| if hasattr(G, "graph_object"): | ||
| G = G.graph_object | ||
dschult marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| if len(G) < threshold: | ||
| return "Graph too small for parallel execution" | ||
| return True | ||
|
|
||
| return wrapper | ||
|
|
||
|
|
||
| def default_should_run(*_): | ||
| def default_should_run(*_, **__): | ||
| n_jobs = nxp.get_n_jobs() | ||
| print(f"{n_jobs=}") | ||
| if n_jobs in (None, 0, 1): | ||
|
|
@@ -31,7 +50,7 @@ def default_should_run(*_): | |
|
|
||
|
|
||
| def should_run_if_sparse(threshold=0.3): | ||
| def wrapper(G, *_): | ||
| def wrapper(G, *_, **__): | ||
| if hasattr(G, "graph_object"): | ||
| G = G.graph_object | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think you can use
nx.maximal_independent_set._orig_funcas the way to call the original networkx function. Can you check if that works? It is possible I haven't fully understood how that works. That way we don't have to useinspect.