50 lines
1.6 KiB
Python
50 lines
1.6 KiB
Python
# Natural Language Toolkit: WordNet stemmer interface
|
|
#
|
|
# Copyright (C) 2001-2023 NLTK Project
|
|
# Author: Steven Bird <stevenbird1@gmail.com>
|
|
# Edward Loper <edloper@gmail.com>
|
|
# URL: <https://www.nltk.org/>
|
|
# For license information, see LICENSE.TXT
|
|
|
|
from nltk.corpus import wordnet as wn
|
|
|
|
|
|
class WordNetLemmatizer:
|
|
"""
|
|
WordNet Lemmatizer
|
|
|
|
Lemmatize using WordNet's built-in morphy function.
|
|
Returns the input word unchanged if it cannot be found in WordNet.
|
|
|
|
>>> from nltk.stem import WordNetLemmatizer
|
|
>>> wnl = WordNetLemmatizer()
|
|
>>> print(wnl.lemmatize('dogs'))
|
|
dog
|
|
>>> print(wnl.lemmatize('churches'))
|
|
church
|
|
>>> print(wnl.lemmatize('aardwolves'))
|
|
aardwolf
|
|
>>> print(wnl.lemmatize('abaci'))
|
|
abacus
|
|
>>> print(wnl.lemmatize('hardrock'))
|
|
hardrock
|
|
"""
|
|
|
|
def lemmatize(self, word: str, pos: str = "n") -> str:
|
|
"""Lemmatize `word` using WordNet's built-in morphy function.
|
|
Returns the input word unchanged if it cannot be found in WordNet.
|
|
|
|
:param word: The input word to lemmatize.
|
|
:type word: str
|
|
:param pos: The Part Of Speech tag. Valid options are `"n"` for nouns,
|
|
`"v"` for verbs, `"a"` for adjectives, `"r"` for adverbs and `"s"`
|
|
for satellite adjectives.
|
|
:param pos: str
|
|
:return: The lemma of `word`, for the given `pos`.
|
|
"""
|
|
lemmas = wn._morphy(word, pos)
|
|
return min(lemmas, key=len) if lemmas else word
|
|
|
|
def __repr__(self):
|
|
return "<WordNetLemmatizer>"
|