实际上是字典举穷,把汉字码表和拼音对应起来了
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
#!/usr/bin/env python # encoding: utf-8 """ Created by Eric Lo on 2010-05-20. Copyright (c) 2010 __lxneng@gmail.com__. http://lxneng.com All rights reserved. """ class Pinyin(): def __init__(self, data_path='./Mandarin.dat'): self.dict = {} for line in open(data_path): k, v = line.split('\t') self.dict[k] = v self.splitter = '' def get_pinyin(self, chars=u"你好吗"): result = [] for char in chars: key = "%X" % ord(char) try: result.append(self.dict[key].split(" ")[0].strip()[:-1].lower()) except: result.append(char) return self.splitter.join(result) def get_initials(self, char=u'你'): try: return self.dict["%X" % ord(char)].split(" ")[0][0] except: return char |
1 2 3 4 5 6 |
from xpinyin import Pinyin p = Pinyin() p.get_pinyin(u"上海") #输出: 'shanghai' p.get_initials(u"上") #输出 'S' |
下载数据库:http://github.com/lxneng/xpinyin/raw/master/Ma[......]