|
| 1 | +//// Community detection and clustering algorithms. |
| 2 | +//// |
| 3 | +//// Provides types and utility functions for working with community structures |
| 4 | +//// in graphs. Community detection algorithms identify groups of nodes that |
| 5 | +//// are more densely connected internally than with the rest of the graph. |
| 6 | +//// |
| 7 | +//// ## Algorithms |
| 8 | +//// |
| 9 | +//// | Algorithm | Module | Best For | |
| 10 | +//// |-----------|--------|----------| |
| 11 | +//// | [Louvain](https://en.wikipedia.org/wiki/Louvain_method) | `yog/community/louvain` | Large graphs, modularity optimization | |
| 12 | +//// | [Leiden](https://en.wikipedia.org/wiki/Leiden_algorithm) | `yog/community/leiden` | Quality guarantee, well-connected communities | |
| 13 | +//// | [Label Propagation](https://en.wikipedia.org/wiki/Label_propagation_algorithm) | `yog/community/label_propagation` | Speed, near-linear time | |
| 14 | +//// | [Girvan-Newman](https://en.wikipedia.org/wiki/Girvan%E2%80%93Newman_algorithm) | `yog/community/girvan_newman` | Hierarchical structure, edge betweenness | |
| 15 | +//// | [Infomap](https://www.mapequation.org/) | `yog/community/infomap` | Information-theoretic, flow-based | |
| 16 | +//// | [Clique Percolation](https://en.wikipedia.org/wiki/Clique_percolation_method) | `yog/community/clique_percolation` | Overlapping communities | |
| 17 | +//// | [Walktrap](https://doi.org/10.1080/15427951.2007.10129237) | `yog/community/walktrap` | Random walk-based distances | |
| 18 | +//// |
| 19 | +//// ## Core Types |
| 20 | +//// |
| 21 | +//// - **`Communities`** - Community assignment mapping nodes to community IDs |
| 22 | +//// - **`Dendrogram`** - Hierarchical community structure with multiple levels |
| 23 | +//// - **`CommunityId`** - Integer identifier for a community |
| 24 | +//// |
| 25 | +//// ## Example |
| 26 | +//// |
| 27 | +//// ```gleam |
| 28 | +//// import yog |
| 29 | +//// import yog/community |
| 30 | +//// import yog/community/louvain |
| 31 | +//// |
| 32 | +//// let graph = // ... build your graph |
| 33 | +//// |
| 34 | +//// // Detect communities |
| 35 | +//// let communities = louvain.detect(graph) |
| 36 | +//// io.debug(communities.num_communities) // => 4 |
| 37 | +//// |
| 38 | +//// // Get nodes in each community |
| 39 | +//// let communities_dict = community.communities_to_dict(communities) |
| 40 | +//// // => dict.from_list([#(0, set.from_list([1, 2, 3])), #(1, set.from_list([4, 5]))]) |
| 41 | +//// |
| 42 | +//// // Find largest community |
| 43 | +//// case community.largest_community(communities) { |
| 44 | +//// Some(community_id) -> io.debug(community_id) |
| 45 | +//// None -> io.println("No communities found") |
| 46 | +//// } |
| 47 | +//// ``` |
| 48 | +//// |
| 49 | +//// ## Choosing an Algorithm |
| 50 | +//// |
| 51 | +//// - **Louvain**: Fast and widely used, good for most cases |
| 52 | +//// - **Leiden**: Better quality than Louvain, guarantees well-connected communities |
| 53 | +//// - **Label Propagation**: Fastest option for very large graphs |
| 54 | +//// - **Girvan-Newman**: When you need hierarchical structure |
| 55 | +//// - **Infomap**: When flow/random walk structure matters |
| 56 | +//// - **Clique Percolation**: When nodes may belong to multiple communities |
| 57 | +//// - **Walktrap**: Good for capturing local structure via random walks |
| 58 | + |
| 59 | +import gleam/dict.{type Dict} |
| 60 | +import gleam/int |
| 61 | +import gleam/list |
| 62 | +import gleam/option.{type Option} |
| 63 | +import gleam/set.{type Set} |
| 64 | +import yog/model.{type NodeId} |
| 65 | + |
| 66 | +/// Community assignment for nodes |
| 67 | +pub type CommunityId = |
| 68 | + Int |
| 69 | + |
| 70 | +/// Represents a community partition of a graph. |
| 71 | +/// |
| 72 | +/// ## Fields |
| 73 | +/// |
| 74 | +/// - `assignments`: Dictionary mapping each node ID to its community ID |
| 75 | +/// - `num_communities`: Total number of distinct communities |
| 76 | +/// |
| 77 | +/// ## Example |
| 78 | +/// |
| 79 | +/// ```gleam |
| 80 | +/// Communities( |
| 81 | +/// assignments: dict.from_list([#(1, 0), #(2, 0), #(3, 1)]), |
| 82 | +/// num_communities: 2 |
| 83 | +/// ) |
| 84 | +/// // Node 1 and 2 are in community 0, node 3 is in community 1 |
| 85 | +/// ``` |
| 86 | +pub type Communities { |
| 87 | + Communities(assignments: Dict(NodeId, CommunityId), num_communities: Int) |
| 88 | +} |
| 89 | + |
| 90 | +/// Hierarchical community structure with multiple levels of granularity. |
| 91 | +/// |
| 92 | +/// ## Fields |
| 93 | +/// |
| 94 | +/// - `levels`: List of community partitions from finest to coarsest |
| 95 | +/// - `merge_order`: Order in which communities were merged (for dendrogram reconstruction) |
| 96 | +/// |
| 97 | +/// ## Example |
| 98 | +/// |
| 99 | +/// A dendrogram might have 3 levels: |
| 100 | +/// - Level 0: Each node in its own community (finest) |
| 101 | +/// - Level 1: Communities merged based on similarity |
| 102 | +/// - Level 2: All nodes in one community (coarsest) |
| 103 | +pub type Dendrogram { |
| 104 | + Dendrogram( |
| 105 | + levels: List(Communities), |
| 106 | + merge_order: List(#(CommunityId, CommunityId)), |
| 107 | + ) |
| 108 | +} |
| 109 | + |
| 110 | +/// Converts community assignments to a dictionary mapping community IDs to sets of node IDs. |
| 111 | +/// |
| 112 | +/// This is useful when you need to iterate over all nodes in each community |
| 113 | +/// rather than looking up the community for each node. |
| 114 | +/// |
| 115 | +/// ## Example |
| 116 | +/// |
| 117 | +/// ```gleam |
| 118 | +/// let communities = Communities( |
| 119 | +/// assignments: dict.from_list([#(1, 0), #(2, 0), #(3, 1)]), |
| 120 | +/// num_communities: 2 |
| 121 | +/// ) |
| 122 | +/// |
| 123 | +/// community.communities_to_dict(communities) |
| 124 | +/// // => dict.from_list([ |
| 125 | +/// // #(0, set.from_list([1, 2])), |
| 126 | +/// // #(1, set.from_list([3])) |
| 127 | +/// // ]) |
| 128 | +/// ``` |
| 129 | +pub fn communities_to_dict( |
| 130 | + communities: Communities, |
| 131 | +) -> Dict(CommunityId, Set(NodeId)) { |
| 132 | + dict.fold( |
| 133 | + over: communities.assignments, |
| 134 | + from: dict.new(), |
| 135 | + with: fn(acc, node, community) { |
| 136 | + let current_set = |
| 137 | + dict.get(acc, community) |
| 138 | + |> option.from_result |
| 139 | + |> option.unwrap(set.new()) |
| 140 | + dict.insert(acc, community, set.insert(current_set, node)) |
| 141 | + }, |
| 142 | + ) |
| 143 | +} |
| 144 | + |
| 145 | +/// Returns the community ID with the largest number of nodes. |
| 146 | +/// |
| 147 | +/// Returns `None` if there are no communities (empty graph or no assignments). |
| 148 | +/// |
| 149 | +/// ## Example |
| 150 | +/// |
| 151 | +/// ```gleam |
| 152 | +/// let communities = Communities( |
| 153 | +/// assignments: dict.from_list([#(1, 0), #(2, 0), #(3, 0), #(4, 1)]), |
| 154 | +/// num_communities: 2 |
| 155 | +/// ) |
| 156 | +/// |
| 157 | +/// community.largest_community(communities) |
| 158 | +/// // => Some(0) // Community 0 has 3 nodes vs 1 for community 1 |
| 159 | +/// ``` |
| 160 | +pub fn largest_community(communities: Communities) -> Option(CommunityId) { |
| 161 | + community_sizes(communities) |
| 162 | + |> dict.to_list |
| 163 | + |> list.sort(fn(a, b) { int.compare(b.1, a.1) }) |
| 164 | + |> list.first |
| 165 | + |> option.from_result |
| 166 | + |> option.map(fn(pair) { pair.0 }) |
| 167 | +} |
| 168 | + |
| 169 | +/// Returns a dictionary mapping community IDs to their sizes (number of nodes). |
| 170 | +/// |
| 171 | +/// ## Example |
| 172 | +/// |
| 173 | +/// ```gleam |
| 174 | +/// let communities = Communities( |
| 175 | +/// assignments: dict.from_list([#(1, 0), #(2, 0), #(3, 1), #(4, 1), #(5, 1)]), |
| 176 | +/// num_communities: 2 |
| 177 | +/// ) |
| 178 | +/// |
| 179 | +/// community.community_sizes(communities) |
| 180 | +/// // => dict.from_list([#(0, 2), #(1, 3)]) |
| 181 | +/// ``` |
| 182 | +pub fn community_sizes(communities: Communities) -> Dict(CommunityId, Int) { |
| 183 | + dict.fold( |
| 184 | + over: communities.assignments, |
| 185 | + from: dict.new(), |
| 186 | + with: fn(acc, _node, community) { |
| 187 | + let current_size = |
| 188 | + dict.get(acc, community) |
| 189 | + |> option.from_result |
| 190 | + |> option.unwrap(0) |
| 191 | + dict.insert(acc, community, current_size + 1) |
| 192 | + }, |
| 193 | + ) |
| 194 | +} |
| 195 | + |
| 196 | +/// Merges two communities into one. |
| 197 | +/// |
| 198 | +/// All nodes from the source community are reassigned to the target community. |
| 199 | +/// The source community ID is effectively removed. |
| 200 | +/// |
| 201 | +/// ## Parameters |
| 202 | +/// |
| 203 | +/// - `communities`: The current community partition |
| 204 | +/// - `source`: The community ID to merge from (will be removed) |
| 205 | +/// - `target`: The community ID to merge into (will be kept) |
| 206 | +/// |
| 207 | +/// ## Example |
| 208 | +/// |
| 209 | +/// ```gleam |
| 210 | +/// let communities = Communities( |
| 211 | +/// assignments: dict.from_list([#(1, 0), #(2, 0), #(3, 1), #(4, 1)]), |
| 212 | +/// num_communities: 2 |
| 213 | +/// ) |
| 214 | +/// |
| 215 | +/// // Merge community 1 into community 0 |
| 216 | +/// let merged = community.merge_communities(communities, source: 1, target: 0) |
| 217 | +/// // merged.assignments => dict.from_list([#(1, 0), #(2, 0), #(3, 0), #(4, 0)]) |
| 218 | +/// // merged.num_communities => 1 |
| 219 | +/// ``` |
| 220 | +pub fn merge_communities( |
| 221 | + communities: Communities, |
| 222 | + source: CommunityId, |
| 223 | + target: CommunityId, |
| 224 | +) -> Communities { |
| 225 | + let new_assignments = |
| 226 | + dict.fold( |
| 227 | + over: communities.assignments, |
| 228 | + from: communities.assignments, |
| 229 | + with: fn(acc, node, community) { |
| 230 | + case community == source { |
| 231 | + True -> dict.insert(acc, node, target) |
| 232 | + False -> acc |
| 233 | + } |
| 234 | + }, |
| 235 | + ) |
| 236 | + |
| 237 | + let num_communities = case source == target { |
| 238 | + True -> communities.num_communities |
| 239 | + False -> communities.num_communities - 1 |
| 240 | + } |
| 241 | + |
| 242 | + Communities(assignments: new_assignments, num_communities: num_communities) |
| 243 | +} |
0 commit comments