59 lines
1.7 KiB
Python
59 lines
1.7 KiB
Python
|
# -*- coding: utf-8 -*-
|
|||
|
from __future__ import unicode_literals
|
|||
|
|
|||
|
"""
|
|||
|
标调位置
|
|||
|
|
|||
|
有 ɑ 不放过,
|
|||
|
|
|||
|
没 ɑ 找 o、e;
|
|||
|
|
|||
|
ɑ、o、e、i、u、ü
|
|||
|
|
|||
|
标调就按这顺序;
|
|||
|
|
|||
|
i、u 若是连在一起,
|
|||
|
|
|||
|
谁在后面就标谁。
|
|||
|
|
|||
|
http://www.hwjyw.com/resource/content/2010/06/04/8183.shtml
|
|||
|
https://www.zhihu.com/question/23655297
|
|||
|
https://github.com/mozillazg/python-pinyin/issues/160
|
|||
|
http://www.pinyin.info/rules/where.html
|
|||
|
"""
|
|||
|
|
|||
|
# TODO: 增加测试用例:使用 pinyin_dict 中的数据收集所有带声调拼音测试这个规则的正确性
|
|||
|
# 1. 收集所有带声调拼音
|
|||
|
# 2. 转换为数字声调拼音(tone3),然后再转换为声调拼音
|
|||
|
# 3. 比对转换后的声调拼音跟原始拼音,确保结果一致
|
|||
|
|
|||
|
|
|||
|
def right_mark_index(pinyin_no_tone):
|
|||
|
# 'iou', 'uei', 'uen': 还原原始韵母后,还需要按照还原前的声调进行标记
|
|||
|
if 'iou' in pinyin_no_tone:
|
|||
|
return pinyin_no_tone.index('u')
|
|||
|
if 'uei' in pinyin_no_tone:
|
|||
|
return pinyin_no_tone.index('i')
|
|||
|
if 'uen' in pinyin_no_tone:
|
|||
|
return pinyin_no_tone.index('u')
|
|||
|
|
|||
|
# 有 ɑ 不放过, 没 ɑ 找 o、e
|
|||
|
for c in ['a', 'o', 'e']:
|
|||
|
if c in pinyin_no_tone:
|
|||
|
return pinyin_no_tone.index(c) + len(c) - 1
|
|||
|
|
|||
|
# i、u 若是连在一起,谁在后面就标谁
|
|||
|
for c in ['iu', 'ui']:
|
|||
|
if c in pinyin_no_tone:
|
|||
|
return pinyin_no_tone.index(c) + len(c) - 1
|
|||
|
|
|||
|
# ɑ、o、e、i、u、ü
|
|||
|
for c in ['i', 'u', 'v', 'ü']:
|
|||
|
if c in pinyin_no_tone:
|
|||
|
return pinyin_no_tone.index(c) + len(c) - 1
|
|||
|
|
|||
|
# n, m, ê
|
|||
|
for c in ['n', 'm', 'ê']:
|
|||
|
if c in pinyin_no_tone:
|
|||
|
return pinyin_no_tone.index(c) + len(c) - 1
|