""" Moody and White algorithm for k-components """ from collections import defaultdict from itertools import combinations from operator import itemgetter import networkx as nx # Define the default maximum flow function. from networkx.algorithms.flow import edmonds_karp from networkx.utils import not_implemented_for default_flow_func = edmonds_karp __all__ = ["k_components"] @not_implemented_for("directed") def k_components(G, flow_func=None): r"""Returns the k-component structure of a graph G. A `k`-component is a maximal subgraph of a graph G that has, at least, node connectivity `k`: we need to remove at least `k` nodes to break it into more components. `k`-components have an inherent hierarchical structure because they are nested in terms of connectivity: a connected graph can contain several 2-components, each of which can contain one or more 3-components, and so forth. Parameters ---------- G : NetworkX graph flow_func : function Function to perform the underlying flow computations. Default value :meth:`edmonds_karp`. This function performs better in sparse graphs with right tailed degree distributions. :meth:`shortest_augmenting_path` will perform better in denser graphs. Returns ------- k_components : dict Dictionary with all connectivity levels `k` in the input Graph as keys and a list of sets of nodes that form a k-component of level `k` as values. Raises ------ NetworkXNotImplemented If the input graph is directed. Examples -------- >>> # Petersen graph has 10 nodes and it is triconnected, thus all >>> # nodes are in a single component on all three connectivity levels >>> G = nx.petersen_graph() >>> k_components = nx.k_components(G) Notes ----- Moody and White [1]_ (appendix A) provide an algorithm for identifying k-components in a graph, which is based on Kanevsky's algorithm [2]_ for finding all minimum-size node cut-sets of a graph (implemented in :meth:`all_node_cuts` function): 1. Compute node connectivity, k, of the input graph G. 2. Identify all k-cutsets at the current level of connectivity using Kanevsky's algorithm. 3. Generate new graph components based on the removal of these cutsets. Nodes in a cutset belong to both sides of the induced cut. 4. If the graph is neither complete nor trivial, return to 1; else end. This implementation also uses some heuristics (see [3]_ for details) to speed up the computation. See also -------- node_connectivity all_node_cuts biconnected_components : special case of this function when k=2 k_edge_components : similar to this function, but uses edge-connectivity instead of node-connectivity References ---------- .. [1] Moody, J. and D. White (2003). Social cohesion and embeddedness: A hierarchical conception of social groups. American Sociological Review 68(1), 103--28. http://www2.asanet.org/journals/ASRFeb03MoodyWhite.pdf .. [2] Kanevsky, A. (1993). Finding all minimum-size separating vertex sets in a graph. Networks 23(6), 533--541. http://onlinelibrary.wiley.com/doi/10.1002/net.3230230604/abstract .. [3] Torrents, J. and F. Ferraro (2015). Structural Cohesion: Visualization and Heuristics for Fast Computation. https://arxiv.org/pdf/1503.04476v1 """ # Dictionary with connectivity level (k) as keys and a list of # sets of nodes that form a k-component as values. Note that # k-compoents can overlap (but only k - 1 nodes). k_components = defaultdict(list) # Define default flow function if flow_func is None: flow_func = default_flow_func # Bicomponents as a base to check for higher order k-components for component in nx.connected_components(G): # isolated nodes have connectivity 0 comp = set(component) if len(comp) > 1: k_components[1].append(comp) bicomponents = [G.subgraph(c) for c in nx.biconnected_components(G)] for bicomponent in bicomponents: bicomp = set(bicomponent) # avoid considering dyads as bicomponents if len(bicomp) > 2: k_components[2].append(bicomp) for B in bicomponents: if len(B) <= 2: continue k = nx.node_connectivity(B, flow_func=flow_func) if k > 2: k_components[k].append(set(B)) # Perform cuts in a DFS like order. cuts = list(nx.all_node_cuts(B, k=k, flow_func=flow_func)) stack = [(k, _generate_partition(B, cuts, k))] while stack: (parent_k, partition) = stack[-1] try: nodes = next(partition) C = B.subgraph(nodes) this_k = nx.node_connectivity(C, flow_func=flow_func) if this_k > parent_k and this_k > 2: k_components[this_k].append(set(C)) cuts = list(nx.all_node_cuts(C, k=this_k, flow_func=flow_func)) if cuts: stack.append((this_k, _generate_partition(C, cuts, this_k))) except StopIteration: stack.pop() # This is necessary because k-components may only be reported at their # maximum k level. But we want to return a dictionary in which keys are # connectivity levels and values list of sets of components, without # skipping any connectivity level. Also, it's possible that subsets of # an already detected k-component appear at a level k. Checking for this # in the while loop above penalizes the common case. Thus we also have to # _consolidate all connectivity levels in _reconstruct_k_components. return _reconstruct_k_components(k_components) def _consolidate(sets, k): """Merge sets that share k or more elements. See: http://rosettacode.org/wiki/Set_consolidation The iterative python implementation posted there is faster than this because of the overhead of building a Graph and calling nx.connected_components, but it's not clear for us if we can use it in NetworkX because there is no licence for the code. """ G = nx.Graph() nodes = {i: s for i, s in enumerate(sets)} G.add_nodes_from(nodes) G.add_edges_from( (u, v) for u, v in combinations(nodes, 2) if len(nodes[u] & nodes[v]) >= k ) for component in nx.connected_components(G): yield set.union(*[nodes[n] for n in component]) def _generate_partition(G, cuts, k): def has_nbrs_in_partition(G, node, partition): for n in G[node]: if n in partition: return True return False components = [] nodes = {n for n, d in G.degree() if d > k} - {n for cut in cuts for n in cut} H = G.subgraph(nodes) for cc in nx.connected_components(H): component = set(cc) for cut in cuts: for node in cut: if has_nbrs_in_partition(G, node, cc): component.add(node) if len(component) < G.order(): components.append(component) yield from _consolidate(components, k + 1) def _reconstruct_k_components(k_comps): result = dict() max_k = max(k_comps) for k in reversed(range(1, max_k + 1)): if k == max_k: result[k] = list(_consolidate(k_comps[k], k)) elif k not in k_comps: result[k] = list(_consolidate(result[k + 1], k)) else: nodes_at_k = set.union(*k_comps[k]) to_add = [c for c in result[k + 1] if any(n not in nodes_at_k for n in c)] if to_add: result[k] = list(_consolidate(k_comps[k] + to_add, k)) else: result[k] = list(_consolidate(k_comps[k], k)) return result def build_k_number_dict(kcomps): result = {} for k, comps in sorted(kcomps.items(), key=itemgetter(0)): for comp in comps: for node in comp: result[node] = k return result