ai-content-maker/.venv/Lib/site-packages/pypinyin/style/_tone_rule.py

59 lines
1.7 KiB
Python
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
from __future__ import unicode_literals
"""
标调位置
ɑ 不放过,
  没 ɑ 找 o、e
  ɑ、o、e、i、u、ü
  标调就按这顺序;
  i、u 若是连在一起,
  谁在后面就标谁。
http://www.hwjyw.com/resource/content/2010/06/04/8183.shtml
https://www.zhihu.com/question/23655297
https://github.com/mozillazg/python-pinyin/issues/160
http://www.pinyin.info/rules/where.html
"""
# TODO: 增加测试用例:使用 pinyin_dict 中的数据收集所有带声调拼音测试这个规则的正确性
# 1. 收集所有带声调拼音
# 2. 转换为数字声调拼音(tone3),然后再转换为声调拼音
# 3. 比对转换后的声调拼音跟原始拼音,确保结果一致
def right_mark_index(pinyin_no_tone):
# 'iou', 'uei', 'uen': 还原原始韵母后,还需要按照还原前的声调进行标记
if 'iou' in pinyin_no_tone:
return pinyin_no_tone.index('u')
if 'uei' in pinyin_no_tone:
return pinyin_no_tone.index('i')
if 'uen' in pinyin_no_tone:
return pinyin_no_tone.index('u')
# 有 ɑ 不放过, 没 ɑ 找 o、e
for c in ['a', 'o', 'e']:
if c in pinyin_no_tone:
return pinyin_no_tone.index(c) + len(c) - 1
# i、u 若是连在一起,谁在后面就标谁
for c in ['iu', 'ui']:
if c in pinyin_no_tone:
return pinyin_no_tone.index(c) + len(c) - 1
# ɑ、o、e、i、u、ü
for c in ['i', 'u', 'v', 'ü']:
if c in pinyin_no_tone:
return pinyin_no_tone.index(c) + len(c) - 1
# n, m, ê
for c in ['n', 'm', 'ê']:
if c in pinyin_no_tone:
return pinyin_no_tone.index(c) + len(c) - 1