352 lines
13 KiB
Python
352 lines
13 KiB
Python
"""Function for detecting communities based on Louvain Community Detection
|
||
Algorithm"""
|
||
|
||
from collections import defaultdict, deque
|
||
|
||
import networkx as nx
|
||
from networkx.algorithms.community import modularity
|
||
from networkx.utils import py_random_state
|
||
|
||
__all__ = ["louvain_communities", "louvain_partitions"]
|
||
|
||
|
||
@py_random_state("seed")
|
||
def louvain_communities(
|
||
G, weight="weight", resolution=1, threshold=0.0000001, seed=None
|
||
):
|
||
r"""Find the best partition of a graph using the Louvain Community Detection
|
||
Algorithm.
|
||
|
||
Louvain Community Detection Algorithm is a simple method to extract the community
|
||
structure of a network. This is a heuristic method based on modularity optimization. [1]_
|
||
|
||
The algorithm works in 2 steps. On the first step it assigns every node to be
|
||
in its own community and then for each node it tries to find the maximum positive
|
||
modularity gain by moving each node to all of its neighbor communities. If no positive
|
||
gain is achieved the node remains in its original community.
|
||
|
||
The modularity gain obtained by moving an isolated node $i$ into a community $C$ can
|
||
easily be calculated by the following formula (combining [1]_ [2]_ and some algebra):
|
||
|
||
.. math::
|
||
\Delta Q = \frac{k_{i,in}}{2m} - \gamma\frac{ \Sigma_{tot} \cdot k_i}{2m^2}
|
||
|
||
where $m$ is the size of the graph, $k_{i,in}$ is the sum of the weights of the links
|
||
from $i$ to nodes in $C$, $k_i$ is the sum of the weights of the links incident to node $i$,
|
||
$\Sigma_{tot}$ is the sum of the weights of the links incident to nodes in $C$ and $\gamma$
|
||
is the resolution parameter.
|
||
|
||
For the directed case the modularity gain can be computed using this formula according to [3]_
|
||
|
||
.. math::
|
||
\Delta Q = \frac{k_{i,in}}{m}
|
||
- \gamma\frac{k_i^{out} \cdot\Sigma_{tot}^{in} + k_i^{in} \cdot \Sigma_{tot}^{out}}{m^2}
|
||
|
||
where $k_i^{out}$, $k_i^{in}$ are the outer and inner weighted degrees of node $i$ and
|
||
$\Sigma_{tot}^{in}$, $\Sigma_{tot}^{out}$ are the sum of in-going and out-going links incident
|
||
to nodes in $C$.
|
||
|
||
The first phase continues until no individual move can improve the modularity.
|
||
|
||
The second phase consists in building a new network whose nodes are now the communities
|
||
found in the first phase. To do so, the weights of the links between the new nodes are given by
|
||
the sum of the weight of the links between nodes in the corresponding two communities. Once this
|
||
phase is complete it is possible to reapply the first phase creating bigger communities with
|
||
increased modularity.
|
||
|
||
The above two phases are executed until no modularity gain is achieved (or is less than
|
||
the `threshold`).
|
||
|
||
Parameters
|
||
----------
|
||
G : NetworkX graph
|
||
weight : string or None, optional (default="weight")
|
||
The name of an edge attribute that holds the numerical value
|
||
used as a weight. If None then each edge has weight 1.
|
||
resolution : float, optional (default=1)
|
||
If resolution is less than 1, the algorithm favors larger communities.
|
||
Greater than 1 favors smaller communities
|
||
threshold : float, optional (default=0.0000001)
|
||
Modularity gain threshold for each level. If the gain of modularity
|
||
between 2 levels of the algorithm is less than the given threshold
|
||
then the algorithm stops and returns the resulting communities.
|
||
seed : integer, random_state, or None (default)
|
||
Indicator of random number generation state.
|
||
See :ref:`Randomness<randomness>`.
|
||
|
||
Returns
|
||
-------
|
||
list
|
||
A list of sets (partition of `G`). Each set represents one community and contains
|
||
all the nodes that constitute it.
|
||
|
||
Examples
|
||
--------
|
||
>>> import networkx as nx
|
||
>>> import networkx.algorithms.community as nx_comm
|
||
>>> G = nx.petersen_graph()
|
||
>>> nx_comm.louvain_communities(G, seed=123)
|
||
[{0, 4, 5, 7, 9}, {1, 2, 3, 6, 8}]
|
||
|
||
Notes
|
||
-----
|
||
The order in which the nodes are considered can affect the final output. In the algorithm
|
||
the ordering happens using a random shuffle.
|
||
|
||
References
|
||
----------
|
||
.. [1] Blondel, V.D. et al. Fast unfolding of communities in
|
||
large networks. J. Stat. Mech 10008, 1-12(2008). https://doi.org/10.1088/1742-5468/2008/10/P10008
|
||
.. [2] Traag, V.A., Waltman, L. & van Eck, N.J. From Louvain to Leiden: guaranteeing
|
||
well-connected communities. Sci Rep 9, 5233 (2019). https://doi.org/10.1038/s41598-019-41695-z
|
||
.. [3] Nicolas Dugué, Anthony Perez. Directed Louvain : maximizing modularity in directed networks.
|
||
[Research Report] Université d’Orléans. 2015. hal-01231784. https://hal.archives-ouvertes.fr/hal-01231784
|
||
|
||
See Also
|
||
--------
|
||
louvain_partitions
|
||
"""
|
||
|
||
d = louvain_partitions(G, weight, resolution, threshold, seed)
|
||
q = deque(d, maxlen=1)
|
||
return q.pop()
|
||
|
||
|
||
@py_random_state("seed")
|
||
def louvain_partitions(
|
||
G, weight="weight", resolution=1, threshold=0.0000001, seed=None
|
||
):
|
||
"""Yields partitions for each level of the Louvain Community Detection Algorithm
|
||
|
||
Louvain Community Detection Algorithm is a simple method to extract the community
|
||
structure of a network. This is a heuristic method based on modularity optimization. [1]_
|
||
|
||
The partitions at each level (step of the algorithm) form a dendogram of communities.
|
||
A dendrogram is a diagram representing a tree and each level represents
|
||
a partition of the G graph. The top level contains the smallest communities
|
||
and as you traverse to the bottom of the tree the communities get bigger
|
||
and the overal modularity increases making the partition better.
|
||
|
||
Each level is generated by executing the two phases of the Louvain Community
|
||
Detection Algorithm.
|
||
|
||
Parameters
|
||
----------
|
||
G : NetworkX graph
|
||
weight : string or None, optional (default="weight")
|
||
The name of an edge attribute that holds the numerical value
|
||
used as a weight. If None then each edge has weight 1.
|
||
resolution : float, optional (default=1)
|
||
If resolution is less than 1, the algorithm favors larger communities.
|
||
Greater than 1 favors smaller communities
|
||
threshold : float, optional (default=0.0000001)
|
||
Modularity gain threshold for each level. If the gain of modularity
|
||
between 2 levels of the algorithm is less than the given threshold
|
||
then the algorithm stops and returns the resulting communities.
|
||
seed : integer, random_state, or None (default)
|
||
Indicator of random number generation state.
|
||
See :ref:`Randomness<randomness>`.
|
||
|
||
Yields
|
||
------
|
||
list
|
||
A list of sets (partition of `G`). Each set represents one community and contains
|
||
all the nodes that constitute it.
|
||
|
||
References
|
||
----------
|
||
.. [1] Blondel, V.D. et al. Fast unfolding of communities in
|
||
large networks. J. Stat. Mech 10008, 1-12(2008)
|
||
|
||
See Also
|
||
--------
|
||
louvain_communities
|
||
"""
|
||
|
||
partition = [{u} for u in G.nodes()]
|
||
mod = modularity(G, partition, resolution=resolution, weight=weight)
|
||
is_directed = G.is_directed()
|
||
if G.is_multigraph():
|
||
graph = _convert_multigraph(G, weight, is_directed)
|
||
else:
|
||
graph = G.__class__()
|
||
graph.add_nodes_from(G)
|
||
graph.add_weighted_edges_from(G.edges(data=weight, default=1))
|
||
|
||
m = graph.size(weight="weight")
|
||
partition, inner_partition, improvement = _one_level(
|
||
graph, m, partition, resolution, is_directed, seed
|
||
)
|
||
improvement = True
|
||
while improvement:
|
||
# gh-5901 protect the sets in the yielded list from further manipulation here
|
||
yield [s.copy() for s in partition]
|
||
new_mod = modularity(
|
||
graph, inner_partition, resolution=resolution, weight="weight"
|
||
)
|
||
if new_mod - mod <= threshold:
|
||
return
|
||
mod = new_mod
|
||
graph = _gen_graph(graph, inner_partition)
|
||
partition, inner_partition, improvement = _one_level(
|
||
graph, m, partition, resolution, is_directed, seed
|
||
)
|
||
|
||
|
||
def _one_level(G, m, partition, resolution=1, is_directed=False, seed=None):
|
||
"""Calculate one level of the Louvain partitions tree
|
||
|
||
Parameters
|
||
----------
|
||
G : NetworkX Graph/DiGraph
|
||
The graph from which to detect communities
|
||
m : number
|
||
The size of the graph `G`.
|
||
partition : list of sets of nodes
|
||
A valid partition of the graph `G`
|
||
resolution : positive number
|
||
The resolution parameter for computing the modularity of a partition
|
||
is_directed : bool
|
||
True if `G` is a directed graph.
|
||
seed : integer, random_state, or None (default)
|
||
Indicator of random number generation state.
|
||
See :ref:`Randomness<randomness>`.
|
||
|
||
"""
|
||
node2com = {u: i for i, u in enumerate(G.nodes())}
|
||
inner_partition = [{u} for u in G.nodes()]
|
||
if is_directed:
|
||
in_degrees = dict(G.in_degree(weight="weight"))
|
||
out_degrees = dict(G.out_degree(weight="weight"))
|
||
Stot_in = [deg for deg in in_degrees.values()]
|
||
Stot_out = [deg for deg in out_degrees.values()]
|
||
# Calculate weights for both in and out neighbours
|
||
nbrs = {}
|
||
for u in G:
|
||
nbrs[u] = defaultdict(float)
|
||
for _, n, wt in G.out_edges(u, data="weight"):
|
||
nbrs[u][n] += wt
|
||
for n, _, wt in G.in_edges(u, data="weight"):
|
||
nbrs[u][n] += wt
|
||
else:
|
||
degrees = dict(G.degree(weight="weight"))
|
||
Stot = [deg for deg in degrees.values()]
|
||
nbrs = {u: {v: data["weight"] for v, data in G[u].items() if v != u} for u in G}
|
||
rand_nodes = list(G.nodes)
|
||
seed.shuffle(rand_nodes)
|
||
nb_moves = 1
|
||
improvement = False
|
||
while nb_moves > 0:
|
||
nb_moves = 0
|
||
for u in rand_nodes:
|
||
best_mod = 0
|
||
best_com = node2com[u]
|
||
weights2com = _neighbor_weights(nbrs[u], node2com)
|
||
if is_directed:
|
||
in_degree = in_degrees[u]
|
||
out_degree = out_degrees[u]
|
||
Stot_in[best_com] -= in_degree
|
||
Stot_out[best_com] -= out_degree
|
||
remove_cost = (
|
||
-weights2com[best_com] / m
|
||
+ resolution
|
||
* (out_degree * Stot_in[best_com] + in_degree * Stot_out[best_com])
|
||
/ m**2
|
||
)
|
||
else:
|
||
degree = degrees[u]
|
||
Stot[best_com] -= degree
|
||
remove_cost = -weights2com[best_com] / m + resolution * (
|
||
Stot[best_com] * degree
|
||
) / (2 * m**2)
|
||
for nbr_com, wt in weights2com.items():
|
||
if is_directed:
|
||
gain = (
|
||
remove_cost
|
||
+ wt / m
|
||
- resolution
|
||
* (
|
||
out_degree * Stot_in[nbr_com]
|
||
+ in_degree * Stot_out[nbr_com]
|
||
)
|
||
/ m**2
|
||
)
|
||
else:
|
||
gain = (
|
||
remove_cost
|
||
+ wt / m
|
||
- resolution * (Stot[nbr_com] * degree) / (2 * m**2)
|
||
)
|
||
if gain > best_mod:
|
||
best_mod = gain
|
||
best_com = nbr_com
|
||
if is_directed:
|
||
Stot_in[best_com] += in_degree
|
||
Stot_out[best_com] += out_degree
|
||
else:
|
||
Stot[best_com] += degree
|
||
if best_com != node2com[u]:
|
||
com = G.nodes[u].get("nodes", {u})
|
||
partition[node2com[u]].difference_update(com)
|
||
inner_partition[node2com[u]].remove(u)
|
||
partition[best_com].update(com)
|
||
inner_partition[best_com].add(u)
|
||
improvement = True
|
||
nb_moves += 1
|
||
node2com[u] = best_com
|
||
partition = list(filter(len, partition))
|
||
inner_partition = list(filter(len, inner_partition))
|
||
return partition, inner_partition, improvement
|
||
|
||
|
||
def _neighbor_weights(nbrs, node2com):
|
||
"""Calculate weights between node and its neighbor communities.
|
||
|
||
Parameters
|
||
----------
|
||
nbrs : dictionary
|
||
Dictionary with nodes' neighbours as keys and their edge weight as value.
|
||
node2com : dictionary
|
||
Dictionary with all graph's nodes as keys and their community index as value.
|
||
|
||
"""
|
||
weights = defaultdict(float)
|
||
for nbr, wt in nbrs.items():
|
||
weights[node2com[nbr]] += wt
|
||
return weights
|
||
|
||
|
||
def _gen_graph(G, partition):
|
||
"""Generate a new graph based on the partitions of a given graph"""
|
||
H = G.__class__()
|
||
node2com = {}
|
||
for i, part in enumerate(partition):
|
||
nodes = set()
|
||
for node in part:
|
||
node2com[node] = i
|
||
nodes.update(G.nodes[node].get("nodes", {node}))
|
||
H.add_node(i, nodes=nodes)
|
||
|
||
for node1, node2, wt in G.edges(data=True):
|
||
wt = wt["weight"]
|
||
com1 = node2com[node1]
|
||
com2 = node2com[node2]
|
||
temp = H.get_edge_data(com1, com2, {"weight": 0})["weight"]
|
||
H.add_edge(com1, com2, **{"weight": wt + temp})
|
||
return H
|
||
|
||
|
||
def _convert_multigraph(G, weight, is_directed):
|
||
"""Convert a Multigraph to normal Graph"""
|
||
if is_directed:
|
||
H = nx.DiGraph()
|
||
else:
|
||
H = nx.Graph()
|
||
H.add_nodes_from(G)
|
||
for u, v, wt in G.edges(data=weight, default=1):
|
||
if H.has_edge(u, v):
|
||
H[u][v]["weight"] += wt
|
||
else:
|
||
H.add_edge(u, v, weight=wt)
|
||
return H
|