38 lines
1.2 KiB
Python
38 lines
1.2 KiB
Python
|
import langdetect
|
||
|
|
||
|
|
||
|
# The below _Factory is set to prevent setting global state of the library
|
||
|
# but still get consistent results.
|
||
|
# Refer : https://github.com/Mimino666/langdetect
|
||
|
|
||
|
class _Factory:
|
||
|
data = None
|
||
|
|
||
|
|
||
|
def _init_factory():
|
||
|
if _Factory.data is None:
|
||
|
_Factory.data = langdetect.detector_factory.DetectorFactory()
|
||
|
_Factory.data.load_profile(langdetect.detector_factory.PROFILES_DIRECTORY)
|
||
|
_Factory.data.seed = 0
|
||
|
|
||
|
|
||
|
def _get_language_probablities(text):
|
||
|
_init_factory()
|
||
|
detector = _Factory.data.create()
|
||
|
detector.append(text)
|
||
|
return detector.get_probabilities()
|
||
|
|
||
|
|
||
|
def detect_languages(text, confidence_threshold):
|
||
|
language_codes = []
|
||
|
try:
|
||
|
parser_data = _get_language_probablities(text)
|
||
|
for language_candidate in parser_data:
|
||
|
if language_candidate.prob > confidence_threshold:
|
||
|
language_codes.append(language_candidate.lang)
|
||
|
except langdetect.lang_detect_exception.LangDetectException:
|
||
|
# This exception can be produced with empty strings or inputs without letters like `10-10-2021`.
|
||
|
# As this could be really common, we ignore them.
|
||
|
pass
|
||
|
return language_codes
|