ai-content-maker/.venv/Lib/site-packages/networkx/algorithms/community/louvain.py

352 lines
13 KiB
Python
Raw Normal View History

2024-05-03 04:18:51 +03:00
"""Function for detecting communities based on Louvain Community Detection
Algorithm"""
from collections import defaultdict, deque
import networkx as nx
from networkx.algorithms.community import modularity
from networkx.utils import py_random_state
__all__ = ["louvain_communities", "louvain_partitions"]
@py_random_state("seed")
def louvain_communities(
G, weight="weight", resolution=1, threshold=0.0000001, seed=None
):
r"""Find the best partition of a graph using the Louvain Community Detection
Algorithm.
Louvain Community Detection Algorithm is a simple method to extract the community
structure of a network. This is a heuristic method based on modularity optimization. [1]_
The algorithm works in 2 steps. On the first step it assigns every node to be
in its own community and then for each node it tries to find the maximum positive
modularity gain by moving each node to all of its neighbor communities. If no positive
gain is achieved the node remains in its original community.
The modularity gain obtained by moving an isolated node $i$ into a community $C$ can
easily be calculated by the following formula (combining [1]_ [2]_ and some algebra):
.. math::
\Delta Q = \frac{k_{i,in}}{2m} - \gamma\frac{ \Sigma_{tot} \cdot k_i}{2m^2}
where $m$ is the size of the graph, $k_{i,in}$ is the sum of the weights of the links
from $i$ to nodes in $C$, $k_i$ is the sum of the weights of the links incident to node $i$,
$\Sigma_{tot}$ is the sum of the weights of the links incident to nodes in $C$ and $\gamma$
is the resolution parameter.
For the directed case the modularity gain can be computed using this formula according to [3]_
.. math::
\Delta Q = \frac{k_{i,in}}{m}
- \gamma\frac{k_i^{out} \cdot\Sigma_{tot}^{in} + k_i^{in} \cdot \Sigma_{tot}^{out}}{m^2}
where $k_i^{out}$, $k_i^{in}$ are the outer and inner weighted degrees of node $i$ and
$\Sigma_{tot}^{in}$, $\Sigma_{tot}^{out}$ are the sum of in-going and out-going links incident
to nodes in $C$.
The first phase continues until no individual move can improve the modularity.
The second phase consists in building a new network whose nodes are now the communities
found in the first phase. To do so, the weights of the links between the new nodes are given by
the sum of the weight of the links between nodes in the corresponding two communities. Once this
phase is complete it is possible to reapply the first phase creating bigger communities with
increased modularity.
The above two phases are executed until no modularity gain is achieved (or is less than
the `threshold`).
Parameters
----------
G : NetworkX graph
weight : string or None, optional (default="weight")
The name of an edge attribute that holds the numerical value
used as a weight. If None then each edge has weight 1.
resolution : float, optional (default=1)
If resolution is less than 1, the algorithm favors larger communities.
Greater than 1 favors smaller communities
threshold : float, optional (default=0.0000001)
Modularity gain threshold for each level. If the gain of modularity
between 2 levels of the algorithm is less than the given threshold
then the algorithm stops and returns the resulting communities.
seed : integer, random_state, or None (default)
Indicator of random number generation state.
See :ref:`Randomness<randomness>`.
Returns
-------
list
A list of sets (partition of `G`). Each set represents one community and contains
all the nodes that constitute it.
Examples
--------
>>> import networkx as nx
>>> import networkx.algorithms.community as nx_comm
>>> G = nx.petersen_graph()
>>> nx_comm.louvain_communities(G, seed=123)
[{0, 4, 5, 7, 9}, {1, 2, 3, 6, 8}]
Notes
-----
The order in which the nodes are considered can affect the final output. In the algorithm
the ordering happens using a random shuffle.
References
----------
.. [1] Blondel, V.D. et al. Fast unfolding of communities in
large networks. J. Stat. Mech 10008, 1-12(2008). https://doi.org/10.1088/1742-5468/2008/10/P10008
.. [2] Traag, V.A., Waltman, L. & van Eck, N.J. From Louvain to Leiden: guaranteeing
well-connected communities. Sci Rep 9, 5233 (2019). https://doi.org/10.1038/s41598-019-41695-z
.. [3] Nicolas Dugué, Anthony Perez. Directed Louvain : maximizing modularity in directed networks.
[Research Report] Université dOrléans. 2015. hal-01231784. https://hal.archives-ouvertes.fr/hal-01231784
See Also
--------
louvain_partitions
"""
d = louvain_partitions(G, weight, resolution, threshold, seed)
q = deque(d, maxlen=1)
return q.pop()
@py_random_state("seed")
def louvain_partitions(
G, weight="weight", resolution=1, threshold=0.0000001, seed=None
):
"""Yields partitions for each level of the Louvain Community Detection Algorithm
Louvain Community Detection Algorithm is a simple method to extract the community
structure of a network. This is a heuristic method based on modularity optimization. [1]_
The partitions at each level (step of the algorithm) form a dendogram of communities.
A dendrogram is a diagram representing a tree and each level represents
a partition of the G graph. The top level contains the smallest communities
and as you traverse to the bottom of the tree the communities get bigger
and the overal modularity increases making the partition better.
Each level is generated by executing the two phases of the Louvain Community
Detection Algorithm.
Parameters
----------
G : NetworkX graph
weight : string or None, optional (default="weight")
The name of an edge attribute that holds the numerical value
used as a weight. If None then each edge has weight 1.
resolution : float, optional (default=1)
If resolution is less than 1, the algorithm favors larger communities.
Greater than 1 favors smaller communities
threshold : float, optional (default=0.0000001)
Modularity gain threshold for each level. If the gain of modularity
between 2 levels of the algorithm is less than the given threshold
then the algorithm stops and returns the resulting communities.
seed : integer, random_state, or None (default)
Indicator of random number generation state.
See :ref:`Randomness<randomness>`.
Yields
------
list
A list of sets (partition of `G`). Each set represents one community and contains
all the nodes that constitute it.
References
----------
.. [1] Blondel, V.D. et al. Fast unfolding of communities in
large networks. J. Stat. Mech 10008, 1-12(2008)
See Also
--------
louvain_communities
"""
partition = [{u} for u in G.nodes()]
mod = modularity(G, partition, resolution=resolution, weight=weight)
is_directed = G.is_directed()
if G.is_multigraph():
graph = _convert_multigraph(G, weight, is_directed)
else:
graph = G.__class__()
graph.add_nodes_from(G)
graph.add_weighted_edges_from(G.edges(data=weight, default=1))
m = graph.size(weight="weight")
partition, inner_partition, improvement = _one_level(
graph, m, partition, resolution, is_directed, seed
)
improvement = True
while improvement:
# gh-5901 protect the sets in the yielded list from further manipulation here
yield [s.copy() for s in partition]
new_mod = modularity(
graph, inner_partition, resolution=resolution, weight="weight"
)
if new_mod - mod <= threshold:
return
mod = new_mod
graph = _gen_graph(graph, inner_partition)
partition, inner_partition, improvement = _one_level(
graph, m, partition, resolution, is_directed, seed
)
def _one_level(G, m, partition, resolution=1, is_directed=False, seed=None):
"""Calculate one level of the Louvain partitions tree
Parameters
----------
G : NetworkX Graph/DiGraph
The graph from which to detect communities
m : number
The size of the graph `G`.
partition : list of sets of nodes
A valid partition of the graph `G`
resolution : positive number
The resolution parameter for computing the modularity of a partition
is_directed : bool
True if `G` is a directed graph.
seed : integer, random_state, or None (default)
Indicator of random number generation state.
See :ref:`Randomness<randomness>`.
"""
node2com = {u: i for i, u in enumerate(G.nodes())}
inner_partition = [{u} for u in G.nodes()]
if is_directed:
in_degrees = dict(G.in_degree(weight="weight"))
out_degrees = dict(G.out_degree(weight="weight"))
Stot_in = [deg for deg in in_degrees.values()]
Stot_out = [deg for deg in out_degrees.values()]
# Calculate weights for both in and out neighbours
nbrs = {}
for u in G:
nbrs[u] = defaultdict(float)
for _, n, wt in G.out_edges(u, data="weight"):
nbrs[u][n] += wt
for n, _, wt in G.in_edges(u, data="weight"):
nbrs[u][n] += wt
else:
degrees = dict(G.degree(weight="weight"))
Stot = [deg for deg in degrees.values()]
nbrs = {u: {v: data["weight"] for v, data in G[u].items() if v != u} for u in G}
rand_nodes = list(G.nodes)
seed.shuffle(rand_nodes)
nb_moves = 1
improvement = False
while nb_moves > 0:
nb_moves = 0
for u in rand_nodes:
best_mod = 0
best_com = node2com[u]
weights2com = _neighbor_weights(nbrs[u], node2com)
if is_directed:
in_degree = in_degrees[u]
out_degree = out_degrees[u]
Stot_in[best_com] -= in_degree
Stot_out[best_com] -= out_degree
remove_cost = (
-weights2com[best_com] / m
+ resolution
* (out_degree * Stot_in[best_com] + in_degree * Stot_out[best_com])
/ m**2
)
else:
degree = degrees[u]
Stot[best_com] -= degree
remove_cost = -weights2com[best_com] / m + resolution * (
Stot[best_com] * degree
) / (2 * m**2)
for nbr_com, wt in weights2com.items():
if is_directed:
gain = (
remove_cost
+ wt / m
- resolution
* (
out_degree * Stot_in[nbr_com]
+ in_degree * Stot_out[nbr_com]
)
/ m**2
)
else:
gain = (
remove_cost
+ wt / m
- resolution * (Stot[nbr_com] * degree) / (2 * m**2)
)
if gain > best_mod:
best_mod = gain
best_com = nbr_com
if is_directed:
Stot_in[best_com] += in_degree
Stot_out[best_com] += out_degree
else:
Stot[best_com] += degree
if best_com != node2com[u]:
com = G.nodes[u].get("nodes", {u})
partition[node2com[u]].difference_update(com)
inner_partition[node2com[u]].remove(u)
partition[best_com].update(com)
inner_partition[best_com].add(u)
improvement = True
nb_moves += 1
node2com[u] = best_com
partition = list(filter(len, partition))
inner_partition = list(filter(len, inner_partition))
return partition, inner_partition, improvement
def _neighbor_weights(nbrs, node2com):
"""Calculate weights between node and its neighbor communities.
Parameters
----------
nbrs : dictionary
Dictionary with nodes' neighbours as keys and their edge weight as value.
node2com : dictionary
Dictionary with all graph's nodes as keys and their community index as value.
"""
weights = defaultdict(float)
for nbr, wt in nbrs.items():
weights[node2com[nbr]] += wt
return weights
def _gen_graph(G, partition):
"""Generate a new graph based on the partitions of a given graph"""
H = G.__class__()
node2com = {}
for i, part in enumerate(partition):
nodes = set()
for node in part:
node2com[node] = i
nodes.update(G.nodes[node].get("nodes", {node}))
H.add_node(i, nodes=nodes)
for node1, node2, wt in G.edges(data=True):
wt = wt["weight"]
com1 = node2com[node1]
com2 = node2com[node2]
temp = H.get_edge_data(com1, com2, {"weight": 0})["weight"]
H.add_edge(com1, com2, **{"weight": wt + temp})
return H
def _convert_multigraph(G, weight, is_directed):
"""Convert a Multigraph to normal Graph"""
if is_directed:
H = nx.DiGraph()
else:
H = nx.Graph()
H.add_nodes_from(G)
for u, v, wt in G.edges(data=weight, default=1):
if H.has_edge(u, v):
H[u][v]["weight"] += wt
else:
H.add_edge(u, v, weight=wt)
return H