145 lines
4.5 KiB
Python
145 lines
4.5 KiB
Python
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
|
|
from __future__ import unicode_literals
|
|
from argparse import ArgumentParser
|
|
import logging
|
|
import sys
|
|
|
|
import pypinyin
|
|
from pypinyin.compat import PY2
|
|
|
|
style_map = {
|
|
'NORMAL': pypinyin.Style.NORMAL,
|
|
'zhao': pypinyin.Style.NORMAL,
|
|
'TONE': pypinyin.Style.TONE,
|
|
'zh4ao': pypinyin.Style.TONE,
|
|
'TONE2': pypinyin.Style.TONE2,
|
|
'zha4o': pypinyin.Style.TONE2,
|
|
'TONE3': pypinyin.Style.TONE3,
|
|
'zhao4': pypinyin.Style.TONE3,
|
|
'INITIALS': pypinyin.Style.INITIALS,
|
|
'zh': pypinyin.Style.INITIALS,
|
|
'FIRST_LETTER': pypinyin.Style.FIRST_LETTER,
|
|
'z': pypinyin.Style.FIRST_LETTER,
|
|
'FINALS': pypinyin.Style.FINALS,
|
|
'ao': pypinyin.Style.FINALS,
|
|
'FINALS_TONE': pypinyin.Style.FINALS_TONE,
|
|
'4ao': pypinyin.Style.FINALS_TONE,
|
|
'FINALS_TONE2': pypinyin.Style.FINALS_TONE2,
|
|
'a4o': pypinyin.Style.FINALS_TONE2,
|
|
'FINALS_TONE3': pypinyin.Style.FINALS_TONE3,
|
|
'ao4': pypinyin.Style.FINALS_TONE3,
|
|
'BOPOMOFO': pypinyin.Style.BOPOMOFO,
|
|
'BOPOMOFO_FIRST': pypinyin.Style.BOPOMOFO_FIRST,
|
|
'CYRILLIC': pypinyin.Style.CYRILLIC,
|
|
'CYRILLIC_FIRST': pypinyin.Style.CYRILLIC_FIRST,
|
|
}
|
|
func_map = {
|
|
'pinyin': pypinyin.pinyin,
|
|
'slug': pypinyin.slug,
|
|
}
|
|
default_style = 'zh4ao'
|
|
|
|
|
|
class NullWriter(object):
|
|
"""数据流黑洞,类似 linux/unix 下 /dev/null 的效果。"""
|
|
def write(self, string):
|
|
pass
|
|
|
|
|
|
def get_parser():
|
|
parser = ArgumentParser(description='convert chinese to pinyin.')
|
|
parser.add_argument('-V', '--version', action='version',
|
|
version='{0} {1}'.format(
|
|
pypinyin.__title__, pypinyin.__version__
|
|
))
|
|
# 要执行的函数名称
|
|
parser.add_argument('-f', '--func',
|
|
help='function name (default: "pinyin")',
|
|
choices=['pinyin', 'slug'],
|
|
default='pinyin')
|
|
# 拼音风格
|
|
parser.add_argument(
|
|
'-s', '--style',
|
|
help='pinyin style (default: "{0}")'.format(default_style),
|
|
choices=style_map.keys(), default=default_style
|
|
)
|
|
parser.add_argument('-p', '--separator',
|
|
help='slug separator (default: "-")',
|
|
default='-')
|
|
parser.add_argument('-e', '--errors',
|
|
help=('how to handle none-pinyin string'
|
|
' (default: "default")'),
|
|
choices=['default', 'ignore', 'replace'],
|
|
default='default')
|
|
# 输出多音字
|
|
parser.add_argument('-m', '--heteronym', help='enable heteronym',
|
|
action='store_true')
|
|
# 要查询的汉字
|
|
parser.add_argument('hans', nargs='+', help='chinese string')
|
|
return parser
|
|
|
|
|
|
def main():
|
|
# 禁用除 CRITICAL 外的日志消息
|
|
logging.disable(logging.CRITICAL)
|
|
|
|
# read hans from stdin
|
|
if not sys.stdin.isatty():
|
|
pipe_data = sys.stdin.read().strip()
|
|
else:
|
|
pipe_data = ''
|
|
args = sys.argv[1:]
|
|
if pipe_data:
|
|
args.append(pipe_data)
|
|
|
|
# 获取命令行选项和参数
|
|
parser = get_parser()
|
|
options = parser.parse_args(args)
|
|
if PY2:
|
|
hans = [
|
|
han.decode(sys.stdin.encoding or 'utf-8') for han in options.hans
|
|
]
|
|
else:
|
|
hans = options.hans
|
|
func = getattr(pypinyin, options.func)
|
|
style = style_map[options.style]
|
|
heteronym = options.heteronym
|
|
separator = options.separator
|
|
errors = options.errors
|
|
|
|
func_kwargs = {
|
|
'pinyin': {'heteronym': heteronym, 'errors': errors},
|
|
'slug': {'heteronym': heteronym, 'separator': separator,
|
|
'errors': errors},
|
|
}
|
|
if PY2:
|
|
kwargs = func_kwargs[func.func_name]
|
|
else:
|
|
kwargs = func_kwargs[func.__name__]
|
|
|
|
# 重设标准输出流和标准错误流
|
|
# 不输出任何字符,防止污染命令行命令的输出结果
|
|
# 其实主要是为了干掉 jieba 内的 print 语句 ;)
|
|
sys.stdout = sys.stderr = NullWriter()
|
|
results = [func(han, style=style, **kwargs) for han in hans]
|
|
# 恢复默认
|
|
sys.stdout = sys.__stdout__
|
|
sys.stderr = sys.__stderr__
|
|
|
|
for result in results:
|
|
if not result:
|
|
print('')
|
|
elif result and isinstance(result, (list, tuple)):
|
|
if isinstance(result[0], (list, tuple)):
|
|
print(' '.join([','.join(s) for s in result]))
|
|
else:
|
|
print(result)
|
|
else:
|
|
print(result)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|