ai-content-maker/.venv/Lib/site-packages/nltk/draw/dispersion.py

64 lines
1.7 KiB
Python
Raw Normal View History

2024-05-03 04:18:51 +03:00
# Natural Language Toolkit: Dispersion Plots
#
# Copyright (C) 2001-2023 NLTK Project
# Author: Steven Bird <stevenbird1@gmail.com>
# URL: <https://www.nltk.org/>
# For license information, see LICENSE.TXT
"""
A utility for displaying lexical dispersion.
"""
def dispersion_plot(text, words, ignore_case=False, title="Lexical Dispersion Plot"):
"""
Generate a lexical dispersion plot.
:param text: The source text
:type text: list(str) or iter(str)
:param words: The target words
:type words: list of str
:param ignore_case: flag to set if case should be ignored when searching text
:type ignore_case: bool
:return: a matplotlib Axes object that may still be modified before plotting
:rtype: Axes
"""
try:
import matplotlib.pyplot as plt
except ImportError as e:
raise ImportError(
"The plot function requires matplotlib to be installed. "
"See https://matplotlib.org/"
) from e
word2y = {
word.casefold() if ignore_case else word: y
for y, word in enumerate(reversed(words))
}
xs, ys = [], []
for x, token in enumerate(text):
token = token.casefold() if ignore_case else token
y = word2y.get(token)
if y is not None:
xs.append(x)
ys.append(y)
_, ax = plt.subplots()
ax.plot(xs, ys, "|")
ax.set_yticks(list(range(len(words))), words, color="C0")
ax.set_ylim(-1, len(words))
ax.set_title(title)
ax.set_xlabel("Word Offset")
return ax
if __name__ == "__main__":
import matplotlib.pyplot as plt
from nltk.corpus import gutenberg
words = ["Elinor", "Marianne", "Edward", "Willoughby"]
dispersion_plot(gutenberg.words("austen-sense.txt"), words)
plt.show()