Java源码示例:org.apache.lucene.analysis.custom.CustomAnalyzer

示例1
/**
 * Create a new empty search index.
 */
public SongSearchIndex() {
    songs = new HashMap<>();
    try {
        analyzer = CustomAnalyzer.builder()
                .withTokenizer(StandardTokenizerFactory.class)
                .addTokenFilter(LowerCaseFilterFactory.class)
                .addTokenFilter(ASCIIFoldingFilterFactory.class)
                .build();
        index = new MMapDirectory(Files.createTempDirectory("quelea-mmap-song").toAbsolutePath());
    }
    catch(IOException ex) {
        LOGGER.log(Level.SEVERE, "Couldn't create song search index");
        throw new RuntimeException("Couldn't create song search index", ex);
    }
}
 
示例2
@Test
public void testAnalyze_custom() {
  AnalysisImpl analysis = new AnalysisImpl();
  Map<String, String> tkParams = new HashMap<>();
  tkParams.put("maxTokenLen", "128");
  CustomAnalyzerConfig.Builder builder = new CustomAnalyzerConfig.Builder(
      "keyword", tkParams)
      .addTokenFilterConfig("lowercase", Collections.emptyMap());
  CustomAnalyzer analyzer = (CustomAnalyzer) analysis.buildCustomAnalyzer(builder.build());
  assertEquals("org.apache.lucene.analysis.custom.CustomAnalyzer", analyzer.getClass().getName());
  assertEquals("org.apache.lucene.analysis.core.KeywordTokenizerFactory", analyzer.getTokenizerFactory().getClass().getName());
  assertEquals("org.apache.lucene.analysis.core.LowerCaseFilterFactory", analyzer.getTokenFilterFactories().get(0).getClass().getName());

  String text = "Apache Lucene";
  List<Analysis.Token> tokens = analysis.analyze(text);
  assertNotNull(tokens);
}
 
示例3
public void testPOS() throws Exception {
  CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
      .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
      .build();
  assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets,
      SENTENCES_posTags, null, null, true);

  analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
      .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
      .addTokenFilter(TypeAsPayloadTokenFilterFactory.class)
      .build();
  assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets,
      null, null, null, true, toPayloads(SENTENCES_posTags));
}
 
示例4
@Override
public void setArgs(IndexSchema schema, Map<String, String> args) {
  args.putIfAbsent("stored", "false");
  args.putIfAbsent("omitTermFreqAndPositions", "true");
  args.putIfAbsent("omitNorms", "true");
  args.putIfAbsent("maxCharsForDocValues", "-1");
  super.setArgs(schema, args);

  // CustomAnalyzer is easy to use
  CustomAnalyzer customAnalyzer;
  try {
    customAnalyzer = CustomAnalyzer.builder(schema.getResourceLoader())
        .withDefaultMatchVersion(schema.getDefaultLuceneMatchVersion())
        .withTokenizer(KeywordTokenizerFactory.class)
        .addTokenFilter(PatternReplaceFilterFactory.class,
            "pattern", "#\\d*",
            "replace", "all")
        .build();
  } catch (IOException e) {
    throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);//impossible?
  }
  // Solr HTTP Schema APIs don't know about CustomAnalyzer so use TokenizerChain instead
  setIndexAnalyzer(new TokenizerChain(customAnalyzer));
  // leave queryAnalyzer as literal
}
 
示例5
public static void main(String[] args) throws IOException {
    Analyzer az = CustomAnalyzer.builder()
        //.withTokenizer("Standard")
        .withTokenizer("Name")
        .addTokenFilter("EdgeNGram", "minGramSize", "1", "maxGramSize", "20")
        //.addTokenFilter("ICUTransform", "id", "Han-Latin;NFD;[[:NonspacingMark:][:Space:]] Remove")
        //.addTokenFilter("EdgeNGram", "minGramSize", "1", "maxGramSize", "20")
        .build();

    StringReader      sr = new StringReader(args[0]);
    TokenStream       ts = az.tokenStream  ("" , sr);
    OffsetAttribute   oa = ts.addAttribute (OffsetAttribute.class);
    CharTermAttribute ta = ts.addAttribute (CharTermAttribute.class);

    try {
        ts.reset(); // Resets this stream to the beginning. (Required)
        while (ts.incrementToken()) {
            System.out.println(ta.toString() + "|" + ta.length()
                    + "[" + oa.startOffset() + "," + oa.endOffset() + "]");
        }
        ts.end(  ); // Perform end-of-stream operations, e.g. set the final offset.
    } finally {
        ts.close(); // Release resources associated with this stream.
    }

}
 
示例6
private Builder createDefaultAnalyzerBuilder() throws IOException {
    Builder builder = CustomAnalyzer.builder()
            .withTokenizer(StandardTokenizerFactory.class)
            .addTokenFilter(CJKWidthFilterFactory.class)
            .addTokenFilter(ASCIIFoldingFilterFactory.class, "preserveOriginal", "false")
            .addTokenFilter(LowerCaseFilterFactory.class)
            .addTokenFilter(StopFilterFactory.class, "words", STOP_WORDS)
            .addTokenFilter(EnglishPossessiveFilterFactory.class);
    addTokenFilterForUnderscoreRemovalAroundToken(builder);
    return builder;
}
 
示例7
private Builder createArtistAnalyzerBuilder() throws IOException {
    Builder builder = CustomAnalyzer.builder()
            .withTokenizer(StandardTokenizerFactory.class)
            .addTokenFilter(CJKWidthFilterFactory.class)
            .addTokenFilter(ASCIIFoldingFilterFactory.class, "preserveOriginal", "false")
            .addTokenFilter(LowerCaseFilterFactory.class)
            .addTokenFilter(StopFilterFactory.class, "words", STOP_WORDS_ARTIST)
            .addTokenFilter(EnglishPossessiveFilterFactory.class);
    addTokenFilterForUnderscoreRemovalAroundToken(builder);
    return builder;
}
 
示例8
/**
 * Create a new empty search index.
 */
public BibleSearchIndex() {
    chapters = new HashMap<>();
    try {
        analyzer = CustomAnalyzer.builder()
                .withTokenizer(StandardTokenizerFactory.class)
                .addTokenFilter(LowerCaseFilterFactory.class)
                .addTokenFilter(ASCIIFoldingFilterFactory.class)
                .build();
        index = new MMapDirectory(Files.createTempDirectory("quelea-mmap-bible").toAbsolutePath());
    } catch (IOException ex) {
        LOGGER.log(Level.SEVERE, "Couldn't create song search index");
        throw new RuntimeException("Couldn't create song search index", ex);
    }
}
 
示例9
private Builder createDefaultAnalyzerBuilder() throws IOException {
    Builder builder = CustomAnalyzer.builder()
            .withTokenizer(StandardTokenizerFactory.class)
            .addTokenFilter(CJKWidthFilterFactory.class)
            .addTokenFilter(ASCIIFoldingFilterFactory.class, "preserveOriginal", "false")
            .addTokenFilter(LowerCaseFilterFactory.class)
            .addTokenFilter(StopFilterFactory.class, "words", STOP_WORDS)
            .addTokenFilter(EnglishPossessiveFilterFactory.class);
    addTokenFilterForUnderscoreRemovalAroundToken(builder);
    return builder;
}
 
示例10
private Builder createArtistAnalyzerBuilder() throws IOException {
    Builder builder = CustomAnalyzer.builder()
            .withTokenizer(StandardTokenizerFactory.class)
            .addTokenFilter(CJKWidthFilterFactory.class)
            .addTokenFilter(ASCIIFoldingFilterFactory.class, "preserveOriginal", "false")
            .addTokenFilter(LowerCaseFilterFactory.class)
            .addTokenFilter(StopFilterFactory.class, "words", STOP_WORDS_ARTIST)
            .addTokenFilter(EnglishPossessiveFilterFactory.class);
    addTokenFilterForUnderscoreRemovalAroundToken(builder);
    return builder;
}
 
示例11
@Override
public void setAnalyzer(Analyzer analyzer) {
  analyzerNameLbl.setText(analyzer.getClass().getName());

  if (analyzer instanceof CustomAnalyzer) {
    CustomAnalyzer customAnalyzer = (CustomAnalyzer) analyzer;

    DefaultListModel<String> charFilterListModel = new DefaultListModel<>();
    customAnalyzer.getCharFilterFactories().stream()
        .map(f -> f.getClass().getSimpleName())
        .forEach(charFilterListModel::addElement);
    charFilterList.setModel(charFilterListModel);

    tokenizerTF.setText(customAnalyzer.getTokenizerFactory().getClass().getSimpleName());

    DefaultListModel<String> tokenFilterListModel = new DefaultListModel<>();
    customAnalyzer.getTokenFilterFactories().stream()
        .map(f -> f.getClass().getSimpleName())
        .forEach(tokenFilterListModel::addElement);
    tokenFilterList.setModel(tokenFilterListModel);

    charFilterList.setBackground(Color.white);
    tokenizerTF.setBackground(Color.white);
    tokenFilterList.setBackground(Color.white);
  } else {
    charFilterList.setModel(new DefaultListModel<>());
    tokenizerTF.setText("");
    tokenFilterList.setModel(new DefaultListModel<>());

    charFilterList.setBackground(Color.lightGray);
    tokenizerTF.setBackground(Color.lightGray);
    tokenFilterList.setBackground(Color.lightGray);
  }
}
 
示例12
void showAnalysisChainDialog() {
  if (getCurrentAnalyzer() instanceof CustomAnalyzer) {
    CustomAnalyzer analyzer = (CustomAnalyzer) getCurrentAnalyzer();
    new DialogOpener<>(analysisChainDialogFactory).open("Analysis chain", 600, 320,
        (factory) -> {
          factory.setAnalyzer(analyzer);
        });
  }
}
 
示例13
@Override
public Analyzer buildCustomAnalyzer(CustomAnalyzerConfig config) {
  Objects.requireNonNull(config);
  try {
    // create builder
    CustomAnalyzer.Builder builder = config.getConfigDir()
        .map(path -> CustomAnalyzer.builder(FileSystems.getDefault().getPath(path)))
        .orElse(CustomAnalyzer.builder());

    // set tokenizer
    builder.withTokenizer(config.getTokenizerConfig().getName(), config.getTokenizerConfig().getParams());

    // add char filters
    for (CustomAnalyzerConfig.ComponentConfig cfConf : config.getCharFilterConfigs()) {
      builder.addCharFilter(cfConf.getName(), cfConf.getParams());
    }

    // add token filters
    for (CustomAnalyzerConfig.ComponentConfig tfConf : config.getTokenFilterConfigs()) {
      builder.addTokenFilter(tfConf.getName(), tfConf.getParams());
    }

    // build analyzer
    this.analyzer = builder.build();
    return analyzer;
  } catch (Exception e) {
    throw new LukeException("Failed to build custom analyzer.", e);
  }
}
 
示例14
@Test
public void testAnalyzer_custom_with_confdir() throws Exception {
  Path confDir = createTempDir("conf");
  Path stopFile = Files.createFile(Paths.get(confDir.toString(), "stop.txt"));
  Files.write(stopFile, "of\nthe\nby\nfor\n".getBytes(StandardCharsets.UTF_8));

  AnalysisImpl analysis = new AnalysisImpl();
  Map<String, String> tkParams = new HashMap<>();
  tkParams.put("maxTokenLen", "128");
  Map<String, String> tfParams = new HashMap<>();
  tfParams.put("ignoreCase", "true");
  tfParams.put("words", "stop.txt");
  tfParams.put("format", "wordset");
  CustomAnalyzerConfig.Builder builder = new CustomAnalyzerConfig.Builder(
      "whitespace", tkParams)
      .configDir(confDir.toString())
      .addTokenFilterConfig("lowercase", Collections.emptyMap())
      .addTokenFilterConfig("stop", tfParams);
  CustomAnalyzer analyzer = (CustomAnalyzer) analysis.buildCustomAnalyzer(builder.build());
  assertEquals("org.apache.lucene.analysis.custom.CustomAnalyzer", analyzer.getClass().getName());
  assertEquals("org.apache.lucene.analysis.core.WhitespaceTokenizerFactory", analyzer.getTokenizerFactory().getClass().getName());
  assertEquals("org.apache.lucene.analysis.core.LowerCaseFilterFactory", analyzer.getTokenFilterFactories().get(0).getClass().getName());
  assertEquals("org.apache.lucene.analysis.core.StopFilterFactory", analyzer.getTokenFilterFactories().get(1).getClass().getName());

  String text = "Government of the People, by the People, for the People";
  List<Analysis.Token> tokens = analysis.analyze(text);
  assertNotNull(tokens);
}
 
示例15
@Test
public void testAnalyzeStepByStep_custom() {
  AnalysisImpl analysis = new AnalysisImpl();
  Map<String, String> tkParams = new HashMap<>();
  tkParams.put("maxTokenLen", "128");
  CustomAnalyzerConfig.Builder builder = new CustomAnalyzerConfig.Builder("keyword", tkParams)
      .addTokenFilterConfig("lowercase", Collections.emptyMap())
      .addCharFilterConfig("htmlstrip", Collections.emptyMap());
  CustomAnalyzer analyzer = (CustomAnalyzer) analysis.buildCustomAnalyzer(builder.build());
  assertEquals("org.apache.lucene.analysis.custom.CustomAnalyzer", analyzer.getClass().getName());
  assertEquals("org.apache.lucene.analysis.charfilter.HTMLStripCharFilterFactory",
      analyzer.getCharFilterFactories().get(0).getClass().getName());
  assertEquals("org.apache.lucene.analysis.core.KeywordTokenizerFactory",
      analyzer.getTokenizerFactory().getClass().getName());
  assertEquals("org.apache.lucene.analysis.core.LowerCaseFilterFactory",
      analyzer.getTokenFilterFactories().get(0).getClass().getName());

  String text = "Apache Lucene";
  Analysis.StepByStepResult result = analysis.analyzeStepByStep(text);
  assertNotNull(result);
  assertNotNull(result.getCharfilteredTexts());
  assertEquals(1,result.getCharfilteredTexts().size());
  assertEquals("htmlStrip", result.getCharfilteredTexts().get(0).getName());

  assertNotNull(result.getNamedTokens());
  assertEquals(2, result.getNamedTokens().size());
  //FIXME check each namedTokensList
  assertEquals("keyword", result.getNamedTokens().get(0).getName());
  assertEquals("lowercase", result.getNamedTokens().get(1).getName());
}
 
示例16
public void testBasic() throws Exception {
  CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
      .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
      .addTokenFilter("opennlpChunker", "chunkerModel", chunkerModelFile)
      .build();
  assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets,
      SENTENCES_chunks, null, null, true);
}
 
示例17
public void testPayloads() throws Exception {
  CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
      .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
      .addTokenFilter("opennlpChunker", "chunkerModel", chunkerModelFile)
      .addTokenFilter(TypeAsPayloadTokenFilterFactory.class)
      .build();
  assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets,
      null, null, null, true, toPayloads(SENTENCES_chunks));
}
 
示例18
public void testBasic() throws IOException {
  CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
      .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
      .build();
  assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets);
}
 
示例19
public void testNoBreak() throws Exception {
  CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
      .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
      .build();
  assertAnalyzesTo(analyzer, NO_BREAK, NO_BREAK_terms, NO_BREAK_startOffsets, NO_BREAK_endOffsets,
      null, null, null, true);
}
 
示例20
@Test
public void testTokenizer() throws IOException {
  CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "sentenceModel", "en-test-sent.bin", "tokenizerModel", "en-test-tokenizer.bin")
      .build();
  assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets);
  assertAnalyzesTo(analyzer, SENTENCE1, SENTENCE1_punc);
}
 
示例21
@Test
public void testTokenizerNoSentenceDetector() throws IOException {
  IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
    CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
        .withTokenizer("opennlp", "tokenizerModel", "en-test-tokenizer.bin")
        .build();
  });
  assertTrue(expected.getMessage().contains("Configuration Error: missing parameter 'sentenceModel'"));
}
 
示例22
@Test
public void testTokenizerNoTokenizer() throws IOException {
  IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
    CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
        .withTokenizer("opennlp", "sentenceModel", "en-test-sent.bin")
        .build();
  });
  assertTrue(expected.getMessage().contains("Configuration Error: missing parameter 'tokenizerModel'"));
}
 
示例23
public void test1SentenceDictionaryOnly() throws Exception {
  CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
      .addTokenFilter("opennlpPOS", "posTaggerModel", "en-test-pos-maxent.bin")
      .addTokenFilter("opennlplemmatizer", "dictionary", "en-test-lemmas.dict")
      .build();
  assertAnalyzesTo(analyzer, SENTENCE, SENTENCE_dict_punc, null, null,
      SENTENCE_posTags, null, null, true);
}
 
示例24
public void test2SentencesDictionaryOnly() throws Exception {
  CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
      .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
      .addTokenFilter("opennlplemmatizer", "dictionary", lemmatizerDictFile)
      .build();
  assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_dict_punc, null, null,
      SENTENCES_posTags, null, null, true);
}
 
示例25
public void test1SentenceMaxEntOnly() throws Exception {
  CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
      .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
      .addTokenFilter("opennlplemmatizer", "lemmatizerModel", lemmatizerModelFile)
      .build();
  assertAnalyzesTo(analyzer, SENTENCE, SENTENCE_maxent_punc, null, null,
      SENTENCE_posTags, null, null, true);
}
 
示例26
public void test2SentencesMaxEntOnly() throws Exception {
  CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
      .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
      .addTokenFilter("OpenNLPLemmatizer", "lemmatizerModel", lemmatizerModelFile)
      .build();
  assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_maxent_punc, null, null,
      SENTENCES_posTags, null, null, true);
}
 
示例27
public void test1SentenceDictionaryAndMaxEnt() throws Exception {
  CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
      .addTokenFilter("opennlpPOS", "posTaggerModel", "en-test-pos-maxent.bin")
      .addTokenFilter("opennlplemmatizer", "dictionary", "en-test-lemmas.dict", "lemmatizerModel", lemmatizerModelFile)
      .build();
  assertAnalyzesTo(analyzer, SENTENCE_both, SENTENCE_both_punc, null, null,
      SENTENCE_both_posTags, null, null, true);
}
 
示例28
public void test2SentencesDictionaryAndMaxEnt() throws Exception {
  CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
      .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
      .addTokenFilter("opennlplemmatizer", "dictionary", lemmatizerDictFile, "lemmatizerModel", lemmatizerModelFile)
      .build();
  assertAnalyzesTo(analyzer, SENTENCES_both, SENTENCES_both_punc, null, null,
      SENTENCES_both_posTags, null, null, true);
}
 
示例29
public void testKeywordAttributeAwarenessDictionaryOnly() throws Exception {
  CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
      .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
      .addTokenFilter(KeywordRepeatFilterFactory.class)
      .addTokenFilter("opennlplemmatizer", "dictionary", lemmatizerDictFile)
      .addTokenFilter(RemoveDuplicatesTokenFilterFactory.class)
      .build();
  assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_dict_keep_orig_punc, null, null,
      SENTENCES_keep_orig_posTags, null, null, true);
}
 
示例30
public void testKeywordAttributeAwarenessMaxEntOnly() throws Exception {
  CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
      .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
      .addTokenFilter(KeywordRepeatFilterFactory.class)
      .addTokenFilter("opennlplemmatizer", "lemmatizerModel", lemmatizerModelFile)
      .addTokenFilter(RemoveDuplicatesTokenFilterFactory.class)
      .build();
  assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_max_ent_keep_orig_punc, null, null,
      SENTENCES_keep_orig_posTags, null, null, true);
}