ai-content-maker/.venv/Lib/site-packages/pypinyin/contrib/tone_sandhi.py

164 lines
6.9 KiB
Python
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
from pypinyin.contrib.tone_convert import tone_to_tone2, tone2_to_tone
_re_num = re.compile(r'\d')
class ToneSandhiMixin(object):
"""
按普通话变调规则处理拼音:
* https://en.wikipedia.org/wiki/Standard_Chinese_phonology#Tone_sandhi
* https://studycli.org/zh-CN/learn-chinese/tone-changes-in-mandarin/
""" # noqa
def post_pinyin(self, han, heteronym, pinyin_list, **kwargs):
ret = super(ToneSandhiMixin, self).post_pinyin(
han, heteronym, pinyin_list, **kwargs)
if ret is not None:
pinyin_list = ret
pinyin_list = self._third_tone(han, pinyin_list)
pinyin_list = self._bu(han, pinyin_list)
pinyin_list = self._yi(han, pinyin_list)
return pinyin_list
def _third_tone(self, han, pinyin_list):
"""
Third tone sandhi:
The principal rule of third tone sandhi is:
When there are two consecutive third-tone syllables, the first of them is pronounced with second tone.
For example, lǎoshǔ 老鼠 ("mouse") comes to be pronounced láoshǔ [lau̯˧˥ʂu˨˩]. It has been investigated whether the rising contour (˧˥) on the prior syllable is in fact identical to a normal second tone; it has been concluded that it is, at least in terms of auditory perception.[1]:237
When there are three or more third tones in a row, the situation becomes more complicated, since a third tone that precedes a second tone resulting from third tone sandhi may or may not be subject to sandhi itself. The results may depend on word boundaries, stress, and dialectal variations. General rules for three-syllable third-tone combinations can be formulated as follows:
If the first word is two syllables and the second word is one syllable, then the first two syllables become second tones. For example, bǎoguǎn hǎo 保管好 ("to take good care of") takes the pronunciation báoguán hǎo [pau̯˧˥kwan˧˥xau̯˨˩˦].
If the first word is one syllable, and the second word is two syllables, the second syllable becomes second tone, but the first syllable remains third tone. For example: lǎo bǎoguǎn 老保管 ("to take care of all the time") takes the pronunciation lǎo báoguǎn [lau̯˨˩pau̯˧˥kwan˨˩˦].
Some linguists have put forward more comprehensive systems of sandhi rules for multiple third tone sequences. For example, it is proposed[1]:248 that modifications are applied cyclically, initially within rhythmic feet (trochees; see below), and that sandhi "need not apply between two cyclic branches".
""" # noqa
tone2_pinyin_list = [tone_to_tone2(x[0]) for x in pinyin_list]
if '3' not in ''.join(tone2_pinyin_list):
return pinyin_list
changed = False
third_num = 0
for pinyin in tone2_pinyin_list:
if '3' in pinyin:
third_num += 1
else:
third_num = 0
if third_num == 2:
for i, v in enumerate(tone2_pinyin_list):
if '3' in v:
tone2_pinyin_list[i] = v.replace('3', '2')
changed = True
break
elif third_num > 2:
n = 1
for i, v in enumerate(tone2_pinyin_list):
if '3' in v:
if n == third_num:
break
tone2_pinyin_list[i] = v.replace('3', '2')
changed = True
n += 1
if changed:
return [[tone2_to_tone(x)] for x in tone2_pinyin_list]
return pinyin_list
def _bu(self, han, pinyin_list):
"""
For 不 bù:
不 is pronounced with second tone when followed by a fourth tone syllable.
Example: 不是 (bù+shì, "to not be") becomes búshì [pu˧˥ʂɻ̩˥˩]
In other cases, 不 is pronounced with fourth tone. However, when used between words in an A-not-A question, it may become neutral in tone (e.g., 是不是 shìbushì).
""" # noqa
if '' not in han:
return pinyin_list
tone2_pinyin_list = [tone_to_tone2(x[0]) for x in pinyin_list]
changed = False
for i, h in enumerate(han):
current_pinyin = tone2_pinyin_list[i]
if h == '' and i < len(han) - 1:
next_pinyin = tone2_pinyin_list[i+1]
if '4' in next_pinyin:
tone2_pinyin_list[i] = current_pinyin.replace('4', '2')
changed = True
else:
tone2_pinyin_list[i] = _re_num.sub('4', current_pinyin)
changed = True
elif h == '':
tone2_pinyin_list[i] = _re_num.sub('4', current_pinyin)
changed = True
if changed:
return [[tone2_to_tone(x)] for x in tone2_pinyin_list]
return pinyin_list
def _yi(self, han, pinyin_list):
"""
For 一 yī:
一 is pronounced with second tone when followed by a fourth tone syllable.
Example: 一定 (yī+dìng, "must") becomes yídìng [i˧˥tiŋ˥˩]
Before a first, second or third tone syllable, 一 is pronounced with fourth tone.
Examples一天 (yī+tiān, "one day") becomes yìtiān [i˥˩tʰjɛn˥], 一年 (yī+nián, "one year") becomes yìnián [i˥˩njɛn˧˥], 一起 (yī+qǐ, "together") becomes yìqǐ [i˥˩t͡ɕʰi˨˩˦].
When final, or when it comes at the end of a multi-syllable word (regardless of the first tone of the next word), 一 is pronounced with first tone. It also has first tone when used as an ordinal number (or part of one), and when it is immediately followed by any digit (including another 一; hence both syllables of the word 一一 yīyī and its compounds have first tone).
When 一 is used between two reduplicated words, it may become neutral in tone (e.g. 看一看 kànyikàn ("to take a look of")).
""" # noqa
if '' not in han:
return pinyin_list
tone2_pinyin_list = [tone_to_tone2(x[0]) for x in pinyin_list]
changed = False
for i, h in enumerate(han):
current_pinyin = tone2_pinyin_list[i]
if h == '' and i < len(han) - 1:
next_pinyin = tone2_pinyin_list[i + 1]
if '4' in next_pinyin:
tone2_pinyin_list[i] = current_pinyin.replace('4', '2')
changed = True
else:
tone2_pinyin_list[i] = _re_num.sub('4', current_pinyin)
changed = True
elif h == '':
tone2_pinyin_list[i] = _re_num.sub('1', current_pinyin)
changed = True
if changed:
return [[tone2_to_tone(x)] for x in tone2_pinyin_list]
return pinyin_list