ai-content-maker/.venv/Lib/site-packages/pysbd/lang/persian.py

31 lines
897 B
Python
Raw Normal View History

2024-05-03 04:18:51 +03:00
# -*- coding: utf-8 -*-
import re
from pysbd.abbreviation_replacer import AbbreviationReplacer
from pysbd.lang.common import Common, Standard
from pysbd.utils import Rule
class Persian(Common, Standard):
iso_code = 'fa'
Punctuations = ['?', '!', ':', '.', '؟']
SENTENCE_BOUNDARY_REGEX = r'.*?[:\.!\?؟]|.*?\Z|.*?$'
# Rubular: http://rubular.com/r/RX5HpdDIyv
ReplaceColonBetweenNumbersRule = Rule(r'(?<=\d):(?=\d)', '')
# Rubular: http://rubular.com/r/kPRgApNHUg
ReplaceNonSentenceBoundaryCommaRule = Rule(r'،(?=\s\S+،)', '')
class AbbreviationReplacer(AbbreviationReplacer):
SENTENCE_STARTERS = []
def __init__(self, text, lang):
super().__init__(text, lang)
def scan_for_replacements(self, txt, am, index, character_array):
txt = re.sub('(?<={0})\.'.format(am), '', txt)
return txt