Python源码示例:stanfordcorenlp.StanfordCoreNLP()

示例1
def load_simpq(data_dir):
    LogInfo.logs('SimpQ initializing ... ')
    qa_list = []
    corenlp = StanfordCoreNLP(CORENLP_PATH)
    for Tvt in ('train', 'valid', 'test'):
        fp = '%s/annotated_fb_data_%s.txt' % (data_dir, Tvt)
        with codecs.open(fp, 'r', 'utf-8') as br:
            for line in br.readlines():
                qa = {}
                s, p, o, q = line.strip().split('\t')
                s = _remove_simpq_header(s)
                p = _remove_simpq_header(p)
                o = _remove_simpq_header(o)
                qa['utterance'] = q
                qa['targetValue'] = (s, p, o)  # different from other datasets
                qa['tokens'] = corenlp.word_tokenize(qa['utterance'])
                qa['parse'] = corenlp.dependency_parse(qa['utterance'])
                qa_list.append(qa)
                if len(qa_list) % 1000 == 0:
                    LogInfo.logs('%d scanned.', len(qa_list))
    pickle_fp = '%s/simpQ.data.pkl' % data_dir
    with open(pickle_fp, 'wb') as bw:
        pickle.dump(qa_list, bw)
    LogInfo.logs('%d SimpleQuestions loaded.' % len(qa_list))
    return qa_list 
示例2
def load_reddit(data_dir, mode='train'):
    LogInfo.logs('Reddit initializing ... ')
    dg_list = []
    corenlp = StanfordCoreNLP(CORENLP_PATH)
    fp = '%s/%s_v3.txt' % (data_dir, mode)
    with open(fp, 'r') as br:
        for line in br:
            dg_line = json.loads(line)
            dialog = {'utterance': dg_line['post'].strip(),
                      'tokens': dg_line['post'].split(),
                      'parse': corenlp.dependency_parse(dg_line['post']),
                      'response': dg_line['response'].strip(),
                      'corr_responses': dg_line['corr_responses'],
                      'all_triples': dg_line['all_triples'],
                      'all_entities': dg_line['all_entities']
                      }

            dg_list.append(dialog)
            if len(dg_list) % 10000 == 0:
                LogInfo.logs('%d scanned.', len(dg_list))
    pickle_fp = '%s/Reddit.%s.pkl' % (data_dir, mode)
    with open(pickle_fp, 'wb') as bw:
        pickle.dump(dg_list, bw)
    LogInfo.logs('%d Reddit saved in [%s].' % (len(dg_list), pickle_fp))
    return dg_list 
示例3
def load_stanford_core_nlp(path):
    from stanfordcorenlp import StanfordCoreNLP
    
    """
    Load stanford core NLP toolkit object
    args:
        path: String
    output:
        Stanford core NLP objects
    """
    zh_nlp = StanfordCoreNLP(path, lang='zh')
    en_nlp = StanfordCoreNLP(path, lang='en')
    return zh_nlp, en_nlp 
示例4
def test_args(self):
        self.assertRaises(IOError, StanfordCoreNLP, '/abc')
        self.assertRaises(ValueError, StanfordCoreNLP, r'G:/JavaLibraries/stanford-corenlp-full-2016-10-31/',
                          lang='abc')
        self.assertRaises(ValueError, StanfordCoreNLP, r'G:/JavaLibraries/stanford-corenlp-full-2016-10-31/',
                          memory='4m') 
示例5
def __init__(self, url_or_path, port = 9000):
        """Initialize stanford core nlp tokenier.

        Args:
          url_or_path: Url string of path string of Stanford CoreNLP library.
            Provide url string if you already stand up Stanford CoreNLP server.
            If not, provide path to directory of library i.e. JavaLibraries/stanford-corenlp-full-2017-06-09/.
            When you provide path of librart, Stanford CoreNLP server will be up independent of python process.

        """
        self.tokenizer = StanfordCoreNLP(url_or_path, port = port) 
示例6
def __init__(self, params):
        """
        A simple NLP helper class.

        Args:
            params(dict): A dict containing some parameters.
        """
        self.params = params
        self.corenlp = StanfordCoreNLP(self.params['corenlp_path'], quiet=False)

        # Pre-fetching the required models.
        props = {'annotators': 'coref', 'pipelineLanguage': 'en', 'ner.useSUTime': False}
        self.corenlp.annotate('', properties=props)