Java源码示例:org.elasticsearch.analysis.common.CommonAnalysisPlugin
示例1
public EsEmbeddedServer(String clusterName, String homePath, String dataPath, String httpPort) {
Settings settings = Settings.builder()
.put("transport.type", "netty4")
.put("http.type", "netty4")
.put("path.home", homePath)
.put("path.data", dataPath)
.put("http.port", httpPort)
.put("cluster.name", clusterName).build();
node = new PluginConfigurableNode(settings, asList(
Netty4Plugin.class,
ParentJoinPlugin.class,
CommonAnalysisPlugin.class,
PainlessPlugin.class,
ReindexPlugin.class
));
}
示例2
public void testWithSubwordsOnly() throws Exception {
String source = "Das ist ein Schlüsselwort, ein Bindestrichwort";
String[] expected = {
"Da",
"ist",
"ein",
"Schlüssel",
"wort",
"ein",
"Bindestrich",
"wort"
};
String resource = "keywords_analysis.json";
Settings settings = Settings.builder()
.loadFromStream(resource, getClass().getResourceAsStream(resource), true)
.build();
ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
settings,
new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin());
Analyzer analyzer = analysis.indexAnalyzers.get("with_subwords_only");
assertNotNull(analyzer);
assertTokenStreamContents(analyzer.tokenStream("test-field", source), expected);
}
示例3
public void testTwo() throws Exception {
String source = "So wird's was: das Elasticsearch-Buch erscheint beim O'Reilly-Verlag.";
String[] expected = {
"wird's",
"elasticsearch-buch",
"elasticsearchbuch",
"erscheint",
"o'reilly-verlag",
"o'reillyverlag"
};
String resource = "unstemmed.json";
Settings settings = Settings.builder()
.loadFromStream(resource, getClass().getResourceAsStream(resource), true)
.build();
ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
settings,
new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin());
Analyzer analyzer = analysis.indexAnalyzers.get("default");
assertTokenStreamContents(analyzer.tokenStream(null, new StringReader(source)), expected);
}
示例4
public void testThree() throws Exception {
String source = "978-1-4493-5854-9";
String[] expected = {
"978-1-4493-5854-9",
"9781449358549"
};
String resource = "unstemmed.json";
Settings settings = Settings.builder()
.loadFromStream(resource, getClass().getResourceAsStream(resource), true)
.build();
ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
settings,
new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin());
Analyzer analyzer = analysis.indexAnalyzers.get("default");
assertTokenStreamContents(analyzer.tokenStream(null, new StringReader(source)), expected);
}
示例5
public void testFour() throws Exception {
String source = "Prante, Jörg";
String[] expected = {
"prante",
"jorg"
};
String resource = "unstemmed.json";
Settings settings = Settings.builder()
.loadFromStream(resource, getClass().getResourceAsStream(resource), true)
.build();
ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
settings,
new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin());
Analyzer analyzer = analysis.indexAnalyzers.get("unstemmed");
assertTokenStreamContents(analyzer.tokenStream("test", new StringReader(source)), expected);
}
示例6
public void testSix() throws Exception {
String source = "Programmieren in C++ für Einsteiger";
String[] expected = {
"programmieren",
"programmi",
"c++",
"einsteiger",
"einsteig"
};
String resource = "unstemmed.json";
Settings settings = Settings.builder()
.loadFromStream(resource, getClass().getResourceAsStream(resource), true)
.build();
ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
settings,
new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin());
Analyzer analyzer = analysis.indexAnalyzers.get("default");
assertTokenStreamContents(analyzer.tokenStream(null, new StringReader(source)), expected);
}
示例7
private static List<Class<? extends Plugin>> plugins() {
return List.of(ReindexPlugin.class,
Netty4Plugin.class,
MapperExtrasPlugin.class, // for scaled_float type
PainlessPlugin.class,
CommonAnalysisPlugin.class); // for stemmer analysis
}
示例8
public void testDecompound() throws Exception {
String source = "Die Jahresfeier der Rechtsanwaltskanzleien auf dem Donaudampfschiff hat viel Ökosteuer gekostet";
String[] expected = {
"Die",
"Jahresfeier",
"jahres",
"feier",
"der",
"Rechtsanwaltskanzleien",
"rechts",
"anwalts",
"kanzleien",
"auf",
"dem",
"Donaudampfschiff",
"donau",
"dampf",
"schiff",
"hat",
"viel",
"Ökosteuer",
"ökos",
"teuer",
"gekostet"
};
Settings settings = Settings.builder()
.put("index.analysis.analyzer.myanalyzer.type", "custom")
.put("index.analysis.analyzer.myanalyzer.tokenizer", "standard")
.put("index.analysis.analyzer.myanalyzer.filter.0", "fst_decompound")
.put("index.analysis.analyzer.myanalyzer.filter.1", "unique")
.build();
ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
settings,
new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin());
Analyzer myanalyzer = analysis.indexAnalyzers.get("myanalyzer");
assertAnalyzesTo(myanalyzer, source, expected);
}
示例9
public void testOne() throws Exception {
String source = "Ein Tag in Köln im Café an der Straßenecke mit einer Standard-Nummer ISBN 1-4493-5854-3";
String[] expected = {
"tag",
"koln",
"cafe",
"caf",
"strassenecke",
"strasseneck",
"standard-nummer",
"standardnummer",
"standard-numm",
"standardnumm",
"isbn",
"1-4493-5854-3",
"1449358543",
"978-1-4493-5854-9",
"9781449358549"
};
String resource = "unstemmed.json";
Settings settings = Settings.builder()
.loadFromStream(resource, getClass().getResourceAsStream(resource), true)
.build();
ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
settings,
new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin());
Analyzer analyzer = analysis.indexAnalyzers.get("default");
assertTokenStreamContents(analyzer.tokenStream(null, new StringReader(source)), expected);
}
示例10
public void testFive() throws Exception {
String source = "Schroeder";
String[] expected = {
"schroder"
};
String resource = "unstemmed.json";
Settings settings = Settings.builder()
.loadFromStream(resource, getClass().getResourceAsStream(resource), true)
.build();
ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
settings,
new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin());
Analyzer analyzer = analysis.indexAnalyzers.get("unstemmed");
assertTokenStreamContents(analyzer.tokenStream("test", new StringReader(source)), expected);
}
示例11
@Before
public void prepare() throws IOException {
e = SQLExecutor.builder(clusterService, 1, Randomness.get(), List.of(new CommonAnalysisPlugin()))
.enableDefaultTables()
.build();
plannerContext = e.getPlannerContext(clusterService.state());
}
示例12
@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
return Arrays.asList(CommonAnalysisPlugin.class, Netty4Plugin.class, BundlePlugin.class);
}
示例13
public void test() throws Exception {
String source = "Die Jahresfeier der Rechtsanwaltskanzleien auf dem Donaudampfschiff hat viel Ökosteuer gekostet";
String[] expected = {
"Die",
"Die",
"Jahresfeier",
"Jahr",
"feier",
"der",
"der",
"Rechtsanwaltskanzleien",
"Recht",
"anwalt",
"kanzlei",
"auf",
"auf",
"dem",
"dem",
"Donaudampfschiff",
"Donau",
"dampf",
"schiff",
"hat",
"hat",
"viel",
"viel",
"Ökosteuer",
"Ökosteuer",
"gekostet",
"gekosten"
};
String resource = "decompound_analysis.json";
Settings settings = Settings.builder()
.loadFromStream(resource, getClass().getResourceAsStream(resource), true)
.build();
ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
settings,
new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin());
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("decomp");
Tokenizer tokenizer = analysis.tokenizer.get("standard").create();
tokenizer.setReader(new StringReader(source));
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
}
示例14
/** The plugin classes that should be added to the node. */
@Override
protected Collection<Class<? extends Plugin>> getPlugins() {
return Arrays.asList(BundlePlugin.class, CommonAnalysisPlugin.class);
}
示例15
public void testLemmatizer() throws Exception {
String source = "While these texts were previously only available to users of academic libraries " +
"participating in the partnership, at the end of the first phase of EEBO-TCP the current " +
"25,000 texts have now been released into the public domain.";
String[] expected = {
"While",
"this",
"text",
"be",
"previously",
"only",
"available",
"to",
"user",
"of",
"academic",
"library",
"participate",
"in",
"the",
"partnership",
"at",
"end",
"first",
"phase",
"EEBO",
"TCP",
"current",
"25,000",
"have",
"now",
"release",
"into",
"public",
"domain"
};
Settings settings = Settings.builder()
.put("index.analysis.analyzer.myanalyzer.type", "custom")
.put("index.analysis.analyzer.myanalyzer.tokenizer", "standard")
.put("index.analysis.analyzer.myanalyzer.filter.0", "lemmatize")
.put("index.analysis.analyzer.myanalyzer.filter.1", "unique")
.build();
ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
settings,
new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin());
Analyzer myanalyzer = analysis.indexAnalyzers.get( "myanalyzer");
assertAnalyzesTo(myanalyzer, source, expected);
}
示例16
public void testFull() throws Exception {
String source = "While these texts were previously only available to users of academic libraries " +
"participating in the partnership, at the end of the first phase of EEBO-TCP the current " +
"25,000 texts have now been released into the public domain.";
String[] expected = {
"While",
"these",
"this",
"texts",
"text",
"were",
"be",
"previously",
"only",
"available",
"to",
"users",
"user",
"of",
"academic",
"libraries",
"library",
"participating",
"participate",
"in",
"the",
"partnership",
"at",
"end",
"first",
"phase",
"EEBO",
"TCP",
"current",
"25,000",
"have",
"now",
"been",
"released",
"release",
"into",
"public",
"domain"
};
Settings settings = Settings.builder()
.put("index.analysis.filter.myfilter.type", "lemmatize")
.put("index.analysis.filter.myfilter.lemma_only", "false")
.put("index.analysis.analyzer.myanalyzer.type", "custom")
.put("index.analysis.analyzer.myanalyzer.tokenizer", "standard")
.put("index.analysis.analyzer.myanalyzer.filter.0", "myfilter")
.put("index.analysis.analyzer.myanalyzer.filter.1", "unique")
.build();
ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
settings,
new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin());
Analyzer myanalyzer =analysis.indexAnalyzers.get("myanalyzer");
assertAnalyzesTo(myanalyzer, source, expected);
}
示例17
public void testGermanLemmatizer() throws Exception {
String source = "Die Würde des Menschen ist unantastbar. " +
"Sie zu achten und zu schützen ist Verpflichtung aller staatlichen Gewalt. " +
"Das Deutsche Volk bekennt sich darum zu unverletzlichen und unveräußerlichen Menschenrechten " +
"als Grundlage jeder menschlichen Gemeinschaft, des Friedens und der Gerechtigkeit in der Welt.";
String[] expected = {
"Die",
"Würde",
"der",
"Mensch",
"mein", // ?
"unantastbar",
"Sie",
"zu",
"achten",
"und",
"zu",
"schützen",
"mein", // ?
"Verpflichtung",
"all",
"staatlich",
"Gewalt",
"Das",
"deutsch",
"Volk",
"bekennen",
"sich",
"darum",
"zu",
"unverletzlichen", // ?
"und",
"unveräußerlichen", // ?
"Menschenrechten", // ?
"als",
"Grundlage",
"jed",
"menschlich",
"Gemeinschaft",
"der",
"Friede",
"und",
"der",
"Gerechtigkeit",
"in",
"der",
"Welt"
};
Settings settings = Settings.builder()
.put("index.analysis.filter.myfilter.type", "lemmatize")
.put("index.analysis.filter.myfilter.language", "de")
.put("index.analysis.analyzer.myanalyzer.type", "custom")
.put("index.analysis.analyzer.myanalyzer.tokenizer", "standard")
.put("index.analysis.analyzer.myanalyzer.filter.0", "myfilter")
.build();
ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
settings,
new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin());
Analyzer myanalyzer = analysis.indexAnalyzers.get("myanalyzer");
assertAnalyzesTo(myanalyzer, source, expected);
}
示例18
@Override
protected Collection<Class<? extends Plugin>> getPlugins() {
return Arrays.asList(BundlePlugin.class, CommonAnalysisPlugin.class);
}
示例19
@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
var plugins = new ArrayList<>(super.nodePlugins());
plugins.add(CommonAnalysisPlugin.class);
return plugins;
}
示例20
@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
var plugins = new ArrayList<>(super.nodePlugins());
plugins.add(CommonAnalysisPlugin.class);
return plugins;
}
示例21
@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
var plugins = new ArrayList<>(super.nodePlugins());
plugins.add(CommonAnalysisPlugin.class);
return plugins;
}