ai-content-maker/.venv/Lib/site-packages/pypinyin/style/_tone_convert.py

578 lines
19 KiB
Python
Raw Normal View History

2024-05-03 04:18:51 +03:00
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
from pypinyin import phonetic_symbol
from pypinyin.style._tone_rule import right_mark_index
from pypinyin.style._constants import RE_TONE3, RE_TONE2
from pypinyin.style.tone import converter
from pypinyin.style._utils import (
get_initials, replace_symbol_to_no_symbol,
get_finals, replace_symbol_to_number
)
_re_number = re.compile(r'\d')
def to_normal(pinyin, v_to_u=False):
"""将 :py:attr:`~pypinyin.Style.TONE`、
:py:attr:`~pypinyin.Style.TONE2`
:py:attr:`~pypinyin.Style.TONE3` 风格的拼音转换为
:py:attr:`~pypinyin.Style.NORMAL` 风格的拼音
:param pinyin: :py:attr:`~pypinyin.Style.TONE`
:py:attr:`~pypinyin.Style.TONE2`
:py:attr:`~pypinyin.Style.TONE3` 风格的拼音
:param v_to_u: 是否使用 ``ü`` 代替原来的 ``v``
当为 False 时结果中将使用 ``v`` 表示 ``ü``
:return: :py:attr:`~pypinyin.Style.NORMAL` 风格的拼音
Usage::
>>> from pypinyin.contrib.tone_convert import to_normal
>>> to_normal('zhōng')
'zhong'
>>> to_normal('zho1ng')
'zhong'
>>> to_normal('zhong1')
'zhong'
>>> to_normal('lüè')
'lve'
>>> to_normal('lüè', v_to_u=True)
'lüe'
"""
s = tone_to_tone2(pinyin, v_to_u=True)
s = tone2_to_normal(s)
return _fix_v_u(pinyin, s, v_to_u=v_to_u)
def to_tone(pinyin):
"""将 :py:attr:`~pypinyin.Style.TONE2` 或
:py:attr:`~pypinyin.Style.TONE3` 风格的拼音转换为
:py:attr:`~pypinyin.Style.TONE` 风格的拼音
:param pinyin: :py:attr:`~pypinyin.Style.TONE2`
:py:attr:`~pypinyin.Style.TONE3` 风格的拼音
:return: :py:attr:`~pypinyin.Style.TONE` 风格的拼音
Usage::
>>> from pypinyin.contrib.tone_convert import to_tone
>>> to_tone('zho1ng')
'zhōng'
>>> to_tone('zhong1')
'zhōng'
"""
pinyin = pinyin.replace('v', 'ü')
if not _re_number.search(pinyin):
return pinyin
s = tone_to_tone2(pinyin)
s = tone2_to_tone(s)
return s
def to_tone2(pinyin, v_to_u=False, neutral_tone_with_five=False, **kwargs):
"""将 :py:attr:`~pypinyin.Style.TONE` 或
:py:attr:`~pypinyin.Style.TONE3` 风格的拼音转换为
:py:attr:`~pypinyin.Style.TONE2` 风格的拼音
:param pinyin: :py:attr:`~pypinyin.Style.TONE`
:py:attr:`~pypinyin.Style.TONE3` 风格的拼音
:param v_to_u: 是否使用 ``ü`` 代替原来的 ``v``
当为 False 时结果中将使用 ``v`` 表示 ``ü``
:param neutral_tone_with_five: 是否使用 ``5`` 标识轻声
:param kwargs: 用于兼容老版本的 ``neutral_tone_with_5`` 参数当传入
``neutral_tone_with_5`` 参数时
将覆盖 ``neutral_tone_with_five`` 的值
:return: :py:attr:`~pypinyin.Style.TONE2` 风格的拼音
Usage::
>>> from pypinyin.contrib.tone_convert import to_tone2
>>> to_tone2('zhōng')
'zho1ng'
>>> to_tone2('zhong1')
'zho1ng'
>>> to_tone2('shang')
'shang'
>>> to_tone2('shang', neutral_tone_with_five=True)
'sha5ng'
>>> to_tone2('lüè')
'lve4'
>>> to_tone2('lüè', v_to_u=True)
'lüe4'
"""
if kwargs.get('neutral_tone_with_5', None) is not None:
neutral_tone_with_five = kwargs['neutral_tone_with_5']
pinyin = pinyin.replace('5', '')
s = tone_to_tone3(
pinyin, v_to_u=True, neutral_tone_with_five=neutral_tone_with_five)
s = tone3_to_tone2(s)
return _fix_v_u(pinyin, s, v_to_u)
def to_tone3(pinyin, v_to_u=False, neutral_tone_with_five=False, **kwargs):
"""将 :py:attr:`~pypinyin.Style.TONE` 或
:py:attr:`~pypinyin.Style.TONE2` 风格的拼音转换为
:py:attr:`~pypinyin.Style.TONE3` 风格的拼音
:param pinyin: :py:attr:`~pypinyin.Style.TONE`
:py:attr:`~pypinyin.Style.TONE2` 风格的拼音
:param v_to_u: 是否使用 ``ü`` 代替原来的 ``v``
当为 False 时结果中将使用 ``v`` 表示 ``ü``
:param neutral_tone_with_five: 是否使用 ``5`` 标识轻声
:param kwargs: 用于兼容老版本的 ``neutral_tone_with_5`` 参数当传入
``neutral_tone_with_5`` 参数时
将覆盖 ``neutral_tone_with_five`` 的值
:return: :py:attr:`~pypinyin.Style.TONE2` 风格的拼音
Usage::
>>> from pypinyin.contrib.tone_convert import to_tone3
>>> to_tone3('zhōng')
'zhong1'
>>> to_tone3('zho1ng')
'zhong1'
>>> to_tone3('shang')
'shang'
>>> to_tone3('shang', neutral_tone_with_five=True)
'shang5'
>>> to_tone3('lüè')
'lve4'
>>> to_tone3('lüè', v_to_u=True)
'lüe4'
"""
if kwargs.get('neutral_tone_with_5', None) is not None:
neutral_tone_with_five = kwargs['neutral_tone_with_5']
pinyin = pinyin.replace('5', '')
s = tone_to_tone2(
pinyin, v_to_u=True, neutral_tone_with_five=neutral_tone_with_five)
s = tone2_to_tone3(s)
return _fix_v_u(pinyin, s, v_to_u)
def to_initials(pinyin, strict=True):
"""将 :py:attr:`~pypinyin.Style.TONE`、
:py:attr:`~pypinyin.Style.TONE2`
:py:attr:`~pypinyin.Style.TONE3`
:py:attr:`~pypinyin.Style.NORMAL` 风格的拼音转换为
:py:attr:`~pypinyin.Style.INITIALS` 风格的拼音
:param pinyin: :py:attr:`~pypinyin.Style.TONE`
:py:attr:`~pypinyin.Style.TONE2`
:py:attr:`~pypinyin.Style.TONE3`
:py:attr:`~pypinyin.Style.NORMAL` 风格的拼音
:param strict: 返回结果是否严格遵照汉语拼音方案来处理声母和韵母
详见 :ref:`strict`
:return: :py:attr:`~pypinyin.Style.INITIALS` 风格的拼音
Usage::
>>> from pypinyin.contrib.tone_convert import to_initials
>>> to_initials('zhōng')
'zh'
"""
return get_initials(pinyin, strict=strict)
def to_finals(pinyin, strict=True, v_to_u=False):
"""将 :py:attr:`~pypinyin.Style.TONE`、
:py:attr:`~pypinyin.Style.TONE2`
:py:attr:`~pypinyin.Style.TONE3`
:py:attr:`~pypinyin.Style.NORMAL` 风格的拼音转换为
:py:attr:`~pypinyin.Style.FINALS` 风格的拼音
:param pinyin: :py:attr:`~pypinyin.Style.TONE`
:py:attr:`~pypinyin.Style.TONE2`
:py:attr:`~pypinyin.Style.TONE3`
:py:attr:`~pypinyin.Style.NORMAL` 风格的拼音
:param strict: 返回结果是否严格遵照汉语拼音方案来处理声母和韵母
详见 :ref:`strict`
:param v_to_u: 是否使用 ``ü`` 代替原来的 ``v``
当为 False 时结果中将使用 ``v`` 表示 ``ü``
:return: :py:attr:`~pypinyin.Style.FINALS` 风格的拼音
Usage::
>>> from pypinyin.contrib.tone_convert import to_finals
>>> to_finals('zhōng')
'ong'
"""
new_pinyin = replace_symbol_to_no_symbol(pinyin).replace('v', 'ü')
finals = get_finals(new_pinyin, strict=strict)
finals = _fix_v_u(finals, finals, v_to_u)
return finals
def to_finals_tone(pinyin, strict=True):
"""将 :py:attr:`~pypinyin.Style.TONE`、
:py:attr:`~pypinyin.Style.TONE2`
:py:attr:`~pypinyin.Style.TONE3` 风格的拼音转换为
:py:attr:`~pypinyin.Style.FINALS_TONE` 风格的拼音
:param pinyin: :py:attr:`~pypinyin.Style.TONE`
:py:attr:`~pypinyin.Style.TONE2`
:py:attr:`~pypinyin.Style.TONE3` 风格的拼音
:param strict: 返回结果是否严格遵照汉语拼音方案来处理声母和韵母
详见 :ref:`strict`
:return: :py:attr:`~pypinyin.Style.FINALS_TONE` 风格的拼音
Usage::
>>> from pypinyin.contrib.tone_convert import to_finals_tone
>>> to_finals_tone('zhōng')
'ōng'
"""
finals = to_finals_tone2(pinyin, strict=strict)
finals = tone2_to_tone(finals)
return finals
def to_finals_tone2(pinyin, strict=True, v_to_u=False,
neutral_tone_with_five=False):
"""将 :py:attr:`~pypinyin.Style.TONE`、
:py:attr:`~pypinyin.Style.TONE2`
:py:attr:`~pypinyin.Style.TONE3` 风格的拼音转换为
:py:attr:`~pypinyin.Style.FINALS_TONE2` 风格的拼音
:param pinyin: :py:attr:`~pypinyin.Style.TONE`
:py:attr:`~pypinyin.Style.TONE2`
:py:attr:`~pypinyin.Style.TONE3` 风格的拼音
:param strict: 返回结果是否严格遵照汉语拼音方案来处理声母和韵母
详见 :ref:`strict`
:param v_to_u: 是否使用 ``ü`` 代替原来的 ``v``
当为 False 时结果中将使用 ``v`` 表示 ``ü``
:param neutral_tone_with_five: 是否使用 ``5`` 标识轻声
:return: :py:attr:`~pypinyin.Style.FINALS_TONE2` 风格的拼音
Usage::
>>> from pypinyin.contrib.tone_convert import to_finals_tone2
>>> to_finals_tone2('zhōng')
'o1ng'
"""
pinyin = pinyin.replace('5', '')
finals = to_finals_tone3(pinyin, strict=strict, v_to_u=v_to_u,
neutral_tone_with_five=neutral_tone_with_five)
finals = tone3_to_tone2(finals, v_to_u=v_to_u)
return finals
def to_finals_tone3(pinyin, strict=True, v_to_u=False,
neutral_tone_with_five=False):
"""将 :py:attr:`~pypinyin.Style.TONE`、
:py:attr:`~pypinyin.Style.TONE2`
:py:attr:`~pypinyin.Style.TONE3` 风格的拼音转换为
:py:attr:`~pypinyin.Style.FINALS_TONE3` 风格的拼音
:param pinyin: :py:attr:`~pypinyin.Style.TONE`
:py:attr:`~pypinyin.Style.TONE2`
:py:attr:`~pypinyin.Style.TONE3` 风格的拼音
:param strict: 返回结果是否严格遵照汉语拼音方案来处理声母和韵母
详见 :ref:`strict`
:param v_to_u: 是否使用 ``ü`` 代替原来的 ``v``
当为 False 时结果中将使用 ``v`` 表示 ``ü``
:param neutral_tone_with_five: 是否使用 ``5`` 标识轻声
:return: :py:attr:`~pypinyin.Style.FINALS_TONE3` 风格的拼音
Usage::
>>> from pypinyin.contrib.tone_convert import to_finals_tone3
>>> to_finals_tone3('zhōng')
'ong1'
"""
pinyin = pinyin.replace('5', '')
finals = to_finals(pinyin, strict=strict, v_to_u=v_to_u)
if not finals:
return finals
numbers = _re_number.findall(replace_symbol_to_number(pinyin))
if not numbers:
if neutral_tone_with_five:
numbers = ['5']
else:
return finals
number = numbers[0]
finals = finals + number
return finals
def tone_to_normal(tone, v_to_u=False):
"""将 :py:attr:`~pypinyin.Style.TONE` 风格的拼音转换为
:py:attr:`~pypinyin.Style.NORMAL` 风格的拼音
:param tone: :py:attr:`~pypinyin.Style.TONE` 风格的拼音
:param v_to_u: 是否使用 ``ü`` 代替原来的 ``v``
当为 False 时结果中将使用 ``v`` 表示 ``ü``
:return: :py:attr:`~pypinyin.Style.NORMAL` 风格的拼音
Usage::
>>> from pypinyin.contrib.tone_convert import tone_to_normal
>>> tone_to_normal('zhōng')
'zhong'
>>> tone_to_normal('lüè')
'lve'
>>> tone_to_normal('lüè', v_to_u=True)
'lüe'
"""
s = tone_to_tone2(tone, v_to_u=v_to_u)
s = _re_number.sub('', s)
return _v_to_u(s, v_to_u)
def tone_to_tone2(tone, v_to_u=False, neutral_tone_with_five=False, **kwargs):
"""将 :py:attr:`~pypinyin.Style.TONE` 风格的拼音转换为
:py:attr:`~pypinyin.Style.TONE2` 风格的拼音
:param tone: :py:attr:`~pypinyin.Style.TONE` 风格的拼音
:param v_to_u: 是否使用 ``ü`` 代替原来的 ``v``
当为 False 时结果中将使用 ``v`` 表示 ``ü``
:param neutral_tone_with_five: 是否使用 ``5`` 标识轻声
:param kwargs: 用于兼容老版本的 ``neutral_tone_with_5`` 参数当传入
``neutral_tone_with_5`` 参数时
将覆盖 ``neutral_tone_with_five`` 的值
:return: :py:attr:`~pypinyin.Style.TONE2` 风格的拼音
Usage::
>>> from pypinyin.contrib.tone_convert import tone_to_tone2
>>> tone_to_tone2('zhōng')
'zho1ng'
>>> tone_to_tone2('shang')
'shang'
>>> tone_to_tone2('shang', neutral_tone_with_5=True)
'sha5ng'
>>> tone_to_tone2('lüè')
'lve4'
>>> tone_to_tone2('lüè', v_to_u=True)
'lüe4'
"""
if kwargs.get('neutral_tone_with_5', None) is not None:
neutral_tone_with_five = kwargs['neutral_tone_with_5']
tone3 = tone_to_tone3(
tone, v_to_u=v_to_u, neutral_tone_with_five=neutral_tone_with_five)
s = tone3_to_tone2(tone3)
return _v_to_u(s, v_to_u)
def tone_to_tone3(tone, v_to_u=False, neutral_tone_with_five=False, **kwargs):
"""将 :py:attr:`~pypinyin.Style.TONE` 风格的拼音转换为
:py:attr:`~pypinyin.Style.TONE3` 风格的拼音
:param tone: :py:attr:`~pypinyin.Style.TONE` 风格的拼音
:param v_to_u: 是否使用 ``ü`` 代替原来的 ``v``
当为 False 时结果中将使用 ``v`` 表示 ``ü``
:param neutral_tone_with_five: 是否使用 ``5`` 标识轻声
:param kwargs: 用于兼容老版本的 ``neutral_tone_with_5`` 参数当传入
``neutral_tone_with_5`` 参数时
将覆盖 ``neutral_tone_with_five`` 的值
:return: :py:attr:`~pypinyin.Style.TONE3` 风格的拼音
Usage::
>>> from pypinyin.contrib.tone_convert import tone_to_tone3
>>> tone_to_tone3('zhōng')
'zhong1'
>>> tone_to_tone3('shang')
'shang'
>>> tone_to_tone3('shang', neutral_tone_with_five=True)
'shang5'
>>> tone_to_tone3('lüè')
'lve4'
>>> tone_to_tone3('lüè', v_to_u=True)
'lüe4'
"""
if kwargs.get('neutral_tone_with_5', None) is not None:
neutral_tone_with_five = kwargs['neutral_tone_with_5']
tone3 = converter.to_tone3(tone)
s = _improve_tone3(tone3, neutral_tone_with_five=neutral_tone_with_five)
return _v_to_u(s, v_to_u)
def tone2_to_normal(tone2, v_to_u=False):
"""将 :py:attr:`~pypinyin.Style.TONE2` 风格的拼音转换为
:py:attr:`~pypinyin.Style.NORMAL` 风格的拼音
:param tone2: :py:attr:`~pypinyin.Style.TONE2` 风格的拼音
:param v_to_u: 是否使用 ``ü`` 代替原来的 ``v``
当为 False 时结果中将使用 ``v`` 表示 ``ü``
:return: Style.NORMAL 风格的拼音
Usage::
>>> from pypinyin.contrib.tone_convert import tone2_to_normal
>>> tone2_to_normal('zho1ng')
'zhong'
>>> tone2_to_normal('lüe4')
'lve'
>>> tone2_to_normal('lüe4', v_to_u=True)
'lüe'
"""
s = _re_number.sub('', tone2)
s = _v_to_u(s, v_to_u)
return _fix_v_u(tone2, s, v_to_u=v_to_u)
def tone2_to_tone(tone2):
"""将 :py:attr:`~pypinyin.Style.TONE2` 风格的拼音转换为
:py:attr:`~pypinyin.Style.TONE` 风格的拼音
:param tone2: :py:attr:`~pypinyin.Style.TONE2` 风格的拼音
:return: Style.TONE 风格的拼音
Usage::
>>> from pypinyin.contrib.tone_convert import tone2_to_tone
>>> tone2_to_tone('zho1ng')
'zhōng'
"""
regex = re.compile(RE_TONE2.pattern.replace('$', ''))
d = phonetic_symbol.phonetic_symbol_reverse
string = tone2.replace('ü', 'v').replace('5', '').replace('0', '')
def _replace(m):
s = m.group(0)
return d.get(s) or s
return regex.sub(_replace, string).replace('v', 'ü')
def tone2_to_tone3(tone2, v_to_u=False):
"""将 :py:attr:`~pypinyin.Style.TONE2` 风格的拼音转换为
:py:attr:`~pypinyin.Style.TONE3` 风格的拼音
:param tone2: :py:attr:`~pypinyin.Style.TONE2` 风格的拼音
:param v_to_u: 是否使用 ``ü`` 代替原来的 ``v``
当为 False 时结果中将使用 ``v`` 表示 ``ü``
:return: :py:attr:`~pypinyin.Style.TONE3` 风格的拼音
Usage::
>>> from pypinyin.contrib.tone_convert import tone2_to_tone3
>>> tone2_to_tone3('zho1ng')
'zhong1'
>>> tone2_to_tone3('lüe4')
'lve4'
>>> tone2_to_tone3('lüe4', v_to_u=True)
'lüe4'
"""
tone3 = RE_TONE3.sub(r'\1\3\2', tone2)
return _fix_v_u(tone2, tone3, v_to_u=v_to_u)
def tone3_to_normal(tone3, v_to_u=False):
"""将 :py:attr:`~pypinyin.Style.TONE3` 风格的拼音转换为
:py:attr:`~pypinyin.Style.NORMAL` 风格的拼音
:param tone3: :py:attr:`~pypinyin.Style.TONE3` 风格的拼音
:param v_to_u: 是否使用 ``ü`` 代替原来的 ``v``
当为 False 时结果中将使用 ``v`` 表示 ``ü``
:return: :py:attr:`~pypinyin.Style.NORMAL` 风格的拼音
Usage::
>>> from pypinyin.contrib.tone_convert import tone3_to_normal
>>> tone3_to_normal('zhong1')
'zhong'
>>> tone3_to_normal('lüe4')
'lve'
>>> tone3_to_normal('lüe4', v_to_u=True)
'lüe'
"""
s = _re_number.sub('', tone3)
s = _v_to_u(s, v_to_u)
return _fix_v_u(tone3, s, v_to_u=v_to_u)
def tone3_to_tone(tone3):
"""将 :py:attr:`~pypinyin.Style.TONE3` 风格的拼音转换为
:py:attr:`~pypinyin.Style.TONE` 风格的拼音
:param tone3: :py:attr:`~pypinyin.Style.TONE3` 风格的拼音
:return: :py:attr:`~pypinyin.Style.TONE` 风格的拼音
Usage::
>>> from pypinyin.contrib.tone_convert import tone3_to_tone
>>> tone3_to_tone('zhong1')
'zhōng'
"""
tone2 = tone3_to_tone2(tone3, v_to_u=True)
return tone2_to_tone(tone2)
def tone3_to_tone2(tone3, v_to_u=False):
"""将 :py:attr:`~pypinyin.Style.TONE3` 风格的拼音转换为
:py:attr:`~pypinyin.Style.TONE2` 风格的拼音
:param tone3: :py:attr:`~pypinyin.Style.TONE3` 风格的拼音
:param v_to_u: 是否使用 ``ü`` 代替原来的 ``v``
当为 False 时结果中将使用 ``v`` 表示 ``ü``
:return: :py:attr:`~pypinyin.Style.TONE2` 风格的拼音
Usage::
>>> from pypinyin.contrib.tone_convert import tone3_to_tone2
>>> tone3_to_tone2('zhong1')
'zho1ng'
>>> tone3_to_tone2('lüe4')
'lve4'
>>> tone3_to_tone2('lüe4', v_to_u=True)
'lüe4'
"""
no_number_tone3 = tone3_to_normal(tone3)
mark_index = right_mark_index(no_number_tone3)
if mark_index is None:
mark_index = len(no_number_tone3) - 1
before = no_number_tone3[:mark_index + 1]
after = no_number_tone3[mark_index + 1:]
number = _get_number_from_pinyin(tone3)
if number is None:
return tone3
s = '{}{}{}'.format(before, number, after)
return _fix_v_u(tone3, s, v_to_u=v_to_u)
def _improve_tone3(tone3, neutral_tone_with_five=False):
number = _get_number_from_pinyin(tone3)
if number is None and neutral_tone_with_five and tone3 != '':
tone3 = '{}5'.format(tone3)
return tone3
def _get_number_from_pinyin(pinyin):
numbers = _re_number.findall(pinyin)
if numbers:
number = numbers[0]
else:
number = None
return number
def _v_to_u(pinyin, replace=False):
if not replace:
return pinyin
return pinyin.replace('v', 'ü')
def _fix_v_u(origin_py, new_py, v_to_u):
if not v_to_u:
return new_py.replace('ü', 'v')
return _v_to_u(new_py, replace=True)