ai-content-maker/.venv/Lib/site-packages/dateparser/custom_language_detection/langdetect.py

38 lines
1.2 KiB
Python
Raw Normal View History

2024-05-03 04:18:51 +03:00
import langdetect
# The below _Factory is set to prevent setting global state of the library
# but still get consistent results.
# Refer : https://github.com/Mimino666/langdetect
class _Factory:
data = None
def _init_factory():
if _Factory.data is None:
_Factory.data = langdetect.detector_factory.DetectorFactory()
_Factory.data.load_profile(langdetect.detector_factory.PROFILES_DIRECTORY)
_Factory.data.seed = 0
def _get_language_probablities(text):
_init_factory()
detector = _Factory.data.create()
detector.append(text)
return detector.get_probabilities()
def detect_languages(text, confidence_threshold):
language_codes = []
try:
parser_data = _get_language_probablities(text)
for language_candidate in parser_data:
if language_candidate.prob > confidence_threshold:
language_codes.append(language_candidate.lang)
except langdetect.lang_detect_exception.LangDetectException:
# This exception can be produced with empty strings or inputs without letters like `10-10-2021`.
# As this could be really common, we ignore them.
pass
return language_codes