PinyinToneMark = {
0: "aoeiuvu00fc",
1: "u0101u014du0113u012bu016bu01d6u01d6",
2: "u00e1u00f3u00e9u00edu00fau01d8u01d8",
3: "u01ceu01d2u011bu01d0u01d4u01dau01da",
4: "u00e0u00f2u00e8u00ecu00f9u01dcu01dc",
}
def decode_pinyin(s):
s = s.lower()
r = ""
t = ""
for c in s:
if c >= 'a' and c <= 'z':
t += c
elif c == ':':
assert t[-1] == 'u'
t = t[:-1] + "u00fc"
else:
if c >= '0' and c <= '5':
tone = int(c) % 5
if tone != 0:
m = re.search("[aoeiuvu00fc]+", t)
if m is None:
t += c
elif len(m.group(0)) == 1:
t = t[:m.start(0)] + PinyinToneMark[tone][PinyinToneMark[0].index(m.group(0))] + t[m.end(0):]
else:
if 'a' in t:
t = t.replace("a", PinyinToneMark[tone][0])
elif 'o' in t:
t = t.replace("o", PinyinToneMark[tone][1])
elif 'e' in t:
t = t.replace("e", PinyinToneMark[tone][2])
elif t.endswith("ui"):
t = t.replace("i", PinyinToneMark[tone][3])
elif t.endswith("iu"):
t = t.replace("u", PinyinToneMark[tone][4])
else:
t += "!"
r += t
t = ""
r += t
return r