Python源码示例:opencc.OpenCC()

示例1
def traditional2simple(input_path, output_path):
    """繁体转简体

    :param input_path:
    :param output_path:
    :return:
    """
    # input check
    assert os.path.exists(input_path)
    # output check
    path = Path(output_path).resolve()
    path.parent.mkdir(exist_ok=True)
    # convert
    f_in = open(input_path, 'r', encoding='utf8')
    f_out = open(output_path, 'w', encoding='utf8')
    cc = opencc.OpenCC('t2s')
    for (i, line) in enumerate(f_in.readlines()):
        f_out.write(cc.convert(line))
        if i % 10000 == 0:
            logger.info("t2s %d lines complete" % i)
    f_in.close()
    f_out.close()
    logger.info("Finished Text T2S") 
示例2
def segment_lyric_convert_pinyin_mir1k():
    openCC = OpenCC('tw2s')
    folder_lyrics_mir1k = os.path.join(mir1k_root, 'Lyrics')
    filenames_lyrics_mir1k = list(set(get_filenames_in_folder(folder_lyrics_mir1k)))
    for fn in filenames_lyrics_mir1k:
        fn_txt = os.path.join(folder_lyrics_mir1k, fn+'.txt')
        try:
            list_line = read_mir1k_lyrics(fn_txt)
            line_simplified = openCC.convert(list_line[0])
            line_pinyin = pinyin.get(line_simplified, format='strip', delimiter=' ')
            line_char = ' '.join(fool.cut(line_simplified)[0])
            write_lyrics_one_line(filename=os.path.join(mir1k_root, 'annotation', fn + '_phrase_char.txt'),
                                  line=line_char)
            write_lyrics_one_line(filename=os.path.join(mir1k_root, 'annotation', fn + '_phrase_pinyin.txt'),
                                  line=line_pinyin)
        except UnicodeDecodeError:
            print(fn) 
示例3
def __init__(self, infile, outfile):
        self.infile = infile
        self.outfile = outfile
        self.cc = opencc.OpenCC('t2s')
        self.t_corpus = []
        self.s_corpus = []
        self.read(self.infile)
        self.t2s()
        self.write(self.s_corpus, self.outfile) 
示例4
def test_class_convert():
    cc = OpenCC()
    text = '乾坤一擲'
    expect = '乾坤一掷'
    assert cc.convert(text) == expect

    text = '開放中文轉換'
    expect = '开放中文转换'
    assert cc.convert(text) == expect 
示例5
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('-i', '--input', metavar='<file>',
                        help='Read original text from <file>.')
    parser.add_argument('-o', '--output', metavar='<file>',
                        help='Write converted text to <file>.')
    parser.add_argument('-c', '--config', metavar='<conversion>',
                        help='Conversion')
    parser.add_argument('--in-enc', metavar='<encoding>', default='UTF-8',
                        help='Encoding for input')
    parser.add_argument('--out-enc', metavar='<encoding>', default='UTF-8',
                        help='Encoding for output')
    args = parser.parse_args()

    if args.config is None:
        print("Please specify a conversion.", file=sys.stderr)
        return 1

    cc = OpenCC(args.config)

    with io.open(args.input if args.input else 0, encoding=args.in_enc) as f:
        input_str = f.read()
    output_str = cc.convert(input_str)
    with io.open(args.output if args.output else 1, 'w',
              encoding=args.out_enc) as f:
        f.write(output_str)

    return 0 
示例6
def setUp(self):
        # Unitialized convertor object
        self.openCC = OpenCC()
        # Constructor intitialized convertor object
        self.openCC2 = OpenCC('hk2s')