352 lines
13 KiB
Python
352 lines
13 KiB
Python
|
"""Function for detecting communities based on Louvain Community Detection
|
|||
|
Algorithm"""
|
|||
|
|
|||
|
from collections import defaultdict, deque
|
|||
|
|
|||
|
import networkx as nx
|
|||
|
from networkx.algorithms.community import modularity
|
|||
|
from networkx.utils import py_random_state
|
|||
|
|
|||
|
__all__ = ["louvain_communities", "louvain_partitions"]
|
|||
|
|
|||
|
|
|||
|
@py_random_state("seed")
|
|||
|
def louvain_communities(
|
|||
|
G, weight="weight", resolution=1, threshold=0.0000001, seed=None
|
|||
|
):
|
|||
|
r"""Find the best partition of a graph using the Louvain Community Detection
|
|||
|
Algorithm.
|
|||
|
|
|||
|
Louvain Community Detection Algorithm is a simple method to extract the community
|
|||
|
structure of a network. This is a heuristic method based on modularity optimization. [1]_
|
|||
|
|
|||
|
The algorithm works in 2 steps. On the first step it assigns every node to be
|
|||
|
in its own community and then for each node it tries to find the maximum positive
|
|||
|
modularity gain by moving each node to all of its neighbor communities. If no positive
|
|||
|
gain is achieved the node remains in its original community.
|
|||
|
|
|||
|
The modularity gain obtained by moving an isolated node $i$ into a community $C$ can
|
|||
|
easily be calculated by the following formula (combining [1]_ [2]_ and some algebra):
|
|||
|
|
|||
|
.. math::
|
|||
|
\Delta Q = \frac{k_{i,in}}{2m} - \gamma\frac{ \Sigma_{tot} \cdot k_i}{2m^2}
|
|||
|
|
|||
|
where $m$ is the size of the graph, $k_{i,in}$ is the sum of the weights of the links
|
|||
|
from $i$ to nodes in $C$, $k_i$ is the sum of the weights of the links incident to node $i$,
|
|||
|
$\Sigma_{tot}$ is the sum of the weights of the links incident to nodes in $C$ and $\gamma$
|
|||
|
is the resolution parameter.
|
|||
|
|
|||
|
For the directed case the modularity gain can be computed using this formula according to [3]_
|
|||
|
|
|||
|
.. math::
|
|||
|
\Delta Q = \frac{k_{i,in}}{m}
|
|||
|
- \gamma\frac{k_i^{out} \cdot\Sigma_{tot}^{in} + k_i^{in} \cdot \Sigma_{tot}^{out}}{m^2}
|
|||
|
|
|||
|
where $k_i^{out}$, $k_i^{in}$ are the outer and inner weighted degrees of node $i$ and
|
|||
|
$\Sigma_{tot}^{in}$, $\Sigma_{tot}^{out}$ are the sum of in-going and out-going links incident
|
|||
|
to nodes in $C$.
|
|||
|
|
|||
|
The first phase continues until no individual move can improve the modularity.
|
|||
|
|
|||
|
The second phase consists in building a new network whose nodes are now the communities
|
|||
|
found in the first phase. To do so, the weights of the links between the new nodes are given by
|
|||
|
the sum of the weight of the links between nodes in the corresponding two communities. Once this
|
|||
|
phase is complete it is possible to reapply the first phase creating bigger communities with
|
|||
|
increased modularity.
|
|||
|
|
|||
|
The above two phases are executed until no modularity gain is achieved (or is less than
|
|||
|
the `threshold`).
|
|||
|
|
|||
|
Parameters
|
|||
|
----------
|
|||
|
G : NetworkX graph
|
|||
|
weight : string or None, optional (default="weight")
|
|||
|
The name of an edge attribute that holds the numerical value
|
|||
|
used as a weight. If None then each edge has weight 1.
|
|||
|
resolution : float, optional (default=1)
|
|||
|
If resolution is less than 1, the algorithm favors larger communities.
|
|||
|
Greater than 1 favors smaller communities
|
|||
|
threshold : float, optional (default=0.0000001)
|
|||
|
Modularity gain threshold for each level. If the gain of modularity
|
|||
|
between 2 levels of the algorithm is less than the given threshold
|
|||
|
then the algorithm stops and returns the resulting communities.
|
|||
|
seed : integer, random_state, or None (default)
|
|||
|
Indicator of random number generation state.
|
|||
|
See :ref:`Randomness<randomness>`.
|
|||
|
|
|||
|
Returns
|
|||
|
-------
|
|||
|
list
|
|||
|
A list of sets (partition of `G`). Each set represents one community and contains
|
|||
|
all the nodes that constitute it.
|
|||
|
|
|||
|
Examples
|
|||
|
--------
|
|||
|
>>> import networkx as nx
|
|||
|
>>> import networkx.algorithms.community as nx_comm
|
|||
|
>>> G = nx.petersen_graph()
|
|||
|
>>> nx_comm.louvain_communities(G, seed=123)
|
|||
|
[{0, 4, 5, 7, 9}, {1, 2, 3, 6, 8}]
|
|||
|
|
|||
|
Notes
|
|||
|
-----
|
|||
|
The order in which the nodes are considered can affect the final output. In the algorithm
|
|||
|
the ordering happens using a random shuffle.
|
|||
|
|
|||
|
References
|
|||
|
----------
|
|||
|
.. [1] Blondel, V.D. et al. Fast unfolding of communities in
|
|||
|
large networks. J. Stat. Mech 10008, 1-12(2008). https://doi.org/10.1088/1742-5468/2008/10/P10008
|
|||
|
.. [2] Traag, V.A., Waltman, L. & van Eck, N.J. From Louvain to Leiden: guaranteeing
|
|||
|
well-connected communities. Sci Rep 9, 5233 (2019). https://doi.org/10.1038/s41598-019-41695-z
|
|||
|
.. [3] Nicolas Dugué, Anthony Perez. Directed Louvain : maximizing modularity in directed networks.
|
|||
|
[Research Report] Université d’Orléans. 2015. hal-01231784. https://hal.archives-ouvertes.fr/hal-01231784
|
|||
|
|
|||
|
See Also
|
|||
|
--------
|
|||
|
louvain_partitions
|
|||
|
"""
|
|||
|
|
|||
|
d = louvain_partitions(G, weight, resolution, threshold, seed)
|
|||
|
q = deque(d, maxlen=1)
|
|||
|
return q.pop()
|
|||
|
|
|||
|
|
|||
|
@py_random_state("seed")
|
|||
|
def louvain_partitions(
|
|||
|
G, weight="weight", resolution=1, threshold=0.0000001, seed=None
|
|||
|
):
|
|||
|
"""Yields partitions for each level of the Louvain Community Detection Algorithm
|
|||
|
|
|||
|
Louvain Community Detection Algorithm is a simple method to extract the community
|
|||
|
structure of a network. This is a heuristic method based on modularity optimization. [1]_
|
|||
|
|
|||
|
The partitions at each level (step of the algorithm) form a dendogram of communities.
|
|||
|
A dendrogram is a diagram representing a tree and each level represents
|
|||
|
a partition of the G graph. The top level contains the smallest communities
|
|||
|
and as you traverse to the bottom of the tree the communities get bigger
|
|||
|
and the overal modularity increases making the partition better.
|
|||
|
|
|||
|
Each level is generated by executing the two phases of the Louvain Community
|
|||
|
Detection Algorithm.
|
|||
|
|
|||
|
Parameters
|
|||
|
----------
|
|||
|
G : NetworkX graph
|
|||
|
weight : string or None, optional (default="weight")
|
|||
|
The name of an edge attribute that holds the numerical value
|
|||
|
used as a weight. If None then each edge has weight 1.
|
|||
|
resolution : float, optional (default=1)
|
|||
|
If resolution is less than 1, the algorithm favors larger communities.
|
|||
|
Greater than 1 favors smaller communities
|
|||
|
threshold : float, optional (default=0.0000001)
|
|||
|
Modularity gain threshold for each level. If the gain of modularity
|
|||
|
between 2 levels of the algorithm is less than the given threshold
|
|||
|
then the algorithm stops and returns the resulting communities.
|
|||
|
seed : integer, random_state, or None (default)
|
|||
|
Indicator of random number generation state.
|
|||
|
See :ref:`Randomness<randomness>`.
|
|||
|
|
|||
|
Yields
|
|||
|
------
|
|||
|
list
|
|||
|
A list of sets (partition of `G`). Each set represents one community and contains
|
|||
|
all the nodes that constitute it.
|
|||
|
|
|||
|
References
|
|||
|
----------
|
|||
|
.. [1] Blondel, V.D. et al. Fast unfolding of communities in
|
|||
|
large networks. J. Stat. Mech 10008, 1-12(2008)
|
|||
|
|
|||
|
See Also
|
|||
|
--------
|
|||
|
louvain_communities
|
|||
|
"""
|
|||
|
|
|||
|
partition = [{u} for u in G.nodes()]
|
|||
|
mod = modularity(G, partition, resolution=resolution, weight=weight)
|
|||
|
is_directed = G.is_directed()
|
|||
|
if G.is_multigraph():
|
|||
|
graph = _convert_multigraph(G, weight, is_directed)
|
|||
|
else:
|
|||
|
graph = G.__class__()
|
|||
|
graph.add_nodes_from(G)
|
|||
|
graph.add_weighted_edges_from(G.edges(data=weight, default=1))
|
|||
|
|
|||
|
m = graph.size(weight="weight")
|
|||
|
partition, inner_partition, improvement = _one_level(
|
|||
|
graph, m, partition, resolution, is_directed, seed
|
|||
|
)
|
|||
|
improvement = True
|
|||
|
while improvement:
|
|||
|
# gh-5901 protect the sets in the yielded list from further manipulation here
|
|||
|
yield [s.copy() for s in partition]
|
|||
|
new_mod = modularity(
|
|||
|
graph, inner_partition, resolution=resolution, weight="weight"
|
|||
|
)
|
|||
|
if new_mod - mod <= threshold:
|
|||
|
return
|
|||
|
mod = new_mod
|
|||
|
graph = _gen_graph(graph, inner_partition)
|
|||
|
partition, inner_partition, improvement = _one_level(
|
|||
|
graph, m, partition, resolution, is_directed, seed
|
|||
|
)
|
|||
|
|
|||
|
|
|||
|
def _one_level(G, m, partition, resolution=1, is_directed=False, seed=None):
|
|||
|
"""Calculate one level of the Louvain partitions tree
|
|||
|
|
|||
|
Parameters
|
|||
|
----------
|
|||
|
G : NetworkX Graph/DiGraph
|
|||
|
The graph from which to detect communities
|
|||
|
m : number
|
|||
|
The size of the graph `G`.
|
|||
|
partition : list of sets of nodes
|
|||
|
A valid partition of the graph `G`
|
|||
|
resolution : positive number
|
|||
|
The resolution parameter for computing the modularity of a partition
|
|||
|
is_directed : bool
|
|||
|
True if `G` is a directed graph.
|
|||
|
seed : integer, random_state, or None (default)
|
|||
|
Indicator of random number generation state.
|
|||
|
See :ref:`Randomness<randomness>`.
|
|||
|
|
|||
|
"""
|
|||
|
node2com = {u: i for i, u in enumerate(G.nodes())}
|
|||
|
inner_partition = [{u} for u in G.nodes()]
|
|||
|
if is_directed:
|
|||
|
in_degrees = dict(G.in_degree(weight="weight"))
|
|||
|
out_degrees = dict(G.out_degree(weight="weight"))
|
|||
|
Stot_in = [deg for deg in in_degrees.values()]
|
|||
|
Stot_out = [deg for deg in out_degrees.values()]
|
|||
|
# Calculate weights for both in and out neighbours
|
|||
|
nbrs = {}
|
|||
|
for u in G:
|
|||
|
nbrs[u] = defaultdict(float)
|
|||
|
for _, n, wt in G.out_edges(u, data="weight"):
|
|||
|
nbrs[u][n] += wt
|
|||
|
for n, _, wt in G.in_edges(u, data="weight"):
|
|||
|
nbrs[u][n] += wt
|
|||
|
else:
|
|||
|
degrees = dict(G.degree(weight="weight"))
|
|||
|
Stot = [deg for deg in degrees.values()]
|
|||
|
nbrs = {u: {v: data["weight"] for v, data in G[u].items() if v != u} for u in G}
|
|||
|
rand_nodes = list(G.nodes)
|
|||
|
seed.shuffle(rand_nodes)
|
|||
|
nb_moves = 1
|
|||
|
improvement = False
|
|||
|
while nb_moves > 0:
|
|||
|
nb_moves = 0
|
|||
|
for u in rand_nodes:
|
|||
|
best_mod = 0
|
|||
|
best_com = node2com[u]
|
|||
|
weights2com = _neighbor_weights(nbrs[u], node2com)
|
|||
|
if is_directed:
|
|||
|
in_degree = in_degrees[u]
|
|||
|
out_degree = out_degrees[u]
|
|||
|
Stot_in[best_com] -= in_degree
|
|||
|
Stot_out[best_com] -= out_degree
|
|||
|
remove_cost = (
|
|||
|
-weights2com[best_com] / m
|
|||
|
+ resolution
|
|||
|
* (out_degree * Stot_in[best_com] + in_degree * Stot_out[best_com])
|
|||
|
/ m**2
|
|||
|
)
|
|||
|
else:
|
|||
|
degree = degrees[u]
|
|||
|
Stot[best_com] -= degree
|
|||
|
remove_cost = -weights2com[best_com] / m + resolution * (
|
|||
|
Stot[best_com] * degree
|
|||
|
) / (2 * m**2)
|
|||
|
for nbr_com, wt in weights2com.items():
|
|||
|
if is_directed:
|
|||
|
gain = (
|
|||
|
remove_cost
|
|||
|
+ wt / m
|
|||
|
- resolution
|
|||
|
* (
|
|||
|
out_degree * Stot_in[nbr_com]
|
|||
|
+ in_degree * Stot_out[nbr_com]
|
|||
|
)
|
|||
|
/ m**2
|
|||
|
)
|
|||
|
else:
|
|||
|
gain = (
|
|||
|
remove_cost
|
|||
|
+ wt / m
|
|||
|
- resolution * (Stot[nbr_com] * degree) / (2 * m**2)
|
|||
|
)
|
|||
|
if gain > best_mod:
|
|||
|
best_mod = gain
|
|||
|
best_com = nbr_com
|
|||
|
if is_directed:
|
|||
|
Stot_in[best_com] += in_degree
|
|||
|
Stot_out[best_com] += out_degree
|
|||
|
else:
|
|||
|
Stot[best_com] += degree
|
|||
|
if best_com != node2com[u]:
|
|||
|
com = G.nodes[u].get("nodes", {u})
|
|||
|
partition[node2com[u]].difference_update(com)
|
|||
|
inner_partition[node2com[u]].remove(u)
|
|||
|
partition[best_com].update(com)
|
|||
|
inner_partition[best_com].add(u)
|
|||
|
improvement = True
|
|||
|
nb_moves += 1
|
|||
|
node2com[u] = best_com
|
|||
|
partition = list(filter(len, partition))
|
|||
|
inner_partition = list(filter(len, inner_partition))
|
|||
|
return partition, inner_partition, improvement
|
|||
|
|
|||
|
|
|||
|
def _neighbor_weights(nbrs, node2com):
|
|||
|
"""Calculate weights between node and its neighbor communities.
|
|||
|
|
|||
|
Parameters
|
|||
|
----------
|
|||
|
nbrs : dictionary
|
|||
|
Dictionary with nodes' neighbours as keys and their edge weight as value.
|
|||
|
node2com : dictionary
|
|||
|
Dictionary with all graph's nodes as keys and their community index as value.
|
|||
|
|
|||
|
"""
|
|||
|
weights = defaultdict(float)
|
|||
|
for nbr, wt in nbrs.items():
|
|||
|
weights[node2com[nbr]] += wt
|
|||
|
return weights
|
|||
|
|
|||
|
|
|||
|
def _gen_graph(G, partition):
|
|||
|
"""Generate a new graph based on the partitions of a given graph"""
|
|||
|
H = G.__class__()
|
|||
|
node2com = {}
|
|||
|
for i, part in enumerate(partition):
|
|||
|
nodes = set()
|
|||
|
for node in part:
|
|||
|
node2com[node] = i
|
|||
|
nodes.update(G.nodes[node].get("nodes", {node}))
|
|||
|
H.add_node(i, nodes=nodes)
|
|||
|
|
|||
|
for node1, node2, wt in G.edges(data=True):
|
|||
|
wt = wt["weight"]
|
|||
|
com1 = node2com[node1]
|
|||
|
com2 = node2com[node2]
|
|||
|
temp = H.get_edge_data(com1, com2, {"weight": 0})["weight"]
|
|||
|
H.add_edge(com1, com2, **{"weight": wt + temp})
|
|||
|
return H
|
|||
|
|
|||
|
|
|||
|
def _convert_multigraph(G, weight, is_directed):
|
|||
|
"""Convert a Multigraph to normal Graph"""
|
|||
|
if is_directed:
|
|||
|
H = nx.DiGraph()
|
|||
|
else:
|
|||
|
H = nx.Graph()
|
|||
|
H.add_nodes_from(G)
|
|||
|
for u, v, wt in G.edges(data=weight, default=1):
|
|||
|
if H.has_edge(u, v):
|
|||
|
H[u][v]["weight"] += wt
|
|||
|
else:
|
|||
|
H.add_edge(u, v, weight=wt)
|
|||
|
return H
|