Java源码示例:org.elasticsearch.analysis.common.CommonAnalysisPlugin

示例1
public EsEmbeddedServer(String clusterName, String homePath, String dataPath, String httpPort) {
    Settings settings = Settings.builder()
            .put("transport.type", "netty4")
            .put("http.type", "netty4")
            .put("path.home", homePath)
            .put("path.data", dataPath)
            .put("http.port", httpPort)
            .put("cluster.name", clusterName).build();

    node = new PluginConfigurableNode(settings, asList(
            Netty4Plugin.class,
            ParentJoinPlugin.class,
            CommonAnalysisPlugin.class,
            PainlessPlugin.class,
            ReindexPlugin.class
    ));
}
 
示例2
public void testWithSubwordsOnly() throws Exception {
    String source = "Das ist ein Schlüsselwort, ein Bindestrichwort";
    String[] expected = {
            "Da",
            "ist",
            "ein",
            "Schlüssel",
            "wort",
            "ein",
            "Bindestrich",
            "wort"
    };
    String resource = "keywords_analysis.json";
    Settings settings = Settings.builder()
            .loadFromStream(resource, getClass().getResourceAsStream(resource), true)
            .build();
    ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
            settings,
            new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin());
    Analyzer analyzer = analysis.indexAnalyzers.get("with_subwords_only");
    assertNotNull(analyzer);
    assertTokenStreamContents(analyzer.tokenStream("test-field", source), expected);
}
 
示例3
public void testTwo() throws Exception {
    String source = "So wird's was: das Elasticsearch-Buch erscheint beim O'Reilly-Verlag.";
    String[] expected = {
            "wird's",
            "elasticsearch-buch",
            "elasticsearchbuch",
            "erscheint",
            "o'reilly-verlag",
            "o'reillyverlag"
    };
    String resource = "unstemmed.json";
    Settings settings = Settings.builder()
            .loadFromStream(resource, getClass().getResourceAsStream(resource), true)
            .build();
    ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
            settings,
            new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin());
    Analyzer analyzer = analysis.indexAnalyzers.get("default");
    assertTokenStreamContents(analyzer.tokenStream(null, new StringReader(source)), expected);
}
 
示例4
public void testThree() throws Exception {
    String source = "978-1-4493-5854-9";
    String[] expected = {
         "978-1-4493-5854-9",
         "9781449358549"
    };
    String resource = "unstemmed.json";
    Settings settings = Settings.builder()
            .loadFromStream(resource, getClass().getResourceAsStream(resource), true)
            .build();
    ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
            settings,
            new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin());
    Analyzer analyzer = analysis.indexAnalyzers.get("default");
    assertTokenStreamContents(analyzer.tokenStream(null, new StringReader(source)), expected);
}
 
示例5
public void testFour() throws Exception {
    String source = "Prante, Jörg";
    String[] expected = {
            "prante",
            "jorg"
    };
    String resource = "unstemmed.json";
    Settings settings = Settings.builder()
            .loadFromStream(resource, getClass().getResourceAsStream(resource), true)
            .build();
    ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
            settings,
            new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin());
    Analyzer analyzer = analysis.indexAnalyzers.get("unstemmed");
    assertTokenStreamContents(analyzer.tokenStream("test", new StringReader(source)), expected);
}
 
示例6
public void testSix() throws Exception {
    String source = "Programmieren in C++ für Einsteiger";
    String[] expected = {
            "programmieren",
            "programmi",
            "c++",
            "einsteiger",
            "einsteig"
    };
    String resource = "unstemmed.json";
    Settings settings = Settings.builder()
            .loadFromStream(resource, getClass().getResourceAsStream(resource), true)
            .build();
    ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
            settings,
            new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin());
    Analyzer analyzer = analysis.indexAnalyzers.get("default");
    assertTokenStreamContents(analyzer.tokenStream(null, new StringReader(source)), expected);
}
 
示例7
private static List<Class<? extends Plugin>> plugins() {
    return List.of(ReindexPlugin.class,
            Netty4Plugin.class,
            MapperExtrasPlugin.class,  // for scaled_float type
            PainlessPlugin.class,
            CommonAnalysisPlugin.class);  // for stemmer analysis
}
 
示例8
public void testDecompound() throws Exception {

        String source = "Die Jahresfeier der Rechtsanwaltskanzleien auf dem Donaudampfschiff hat viel Ökosteuer gekostet";

        String[] expected = {
                "Die",
                "Jahresfeier",
                "jahres",
                "feier",
                "der",
                "Rechtsanwaltskanzleien",
                "rechts",
                "anwalts",
                "kanzleien",
                "auf",
                "dem",
                "Donaudampfschiff",
                "donau",
                "dampf",
                "schiff",
                "hat",
                "viel",
                "Ökosteuer",
                "ökos",
                "teuer",
                "gekostet"
        };

        Settings settings = Settings.builder()
                .put("index.analysis.analyzer.myanalyzer.type", "custom")
                .put("index.analysis.analyzer.myanalyzer.tokenizer", "standard")
                .put("index.analysis.analyzer.myanalyzer.filter.0", "fst_decompound")
                .put("index.analysis.analyzer.myanalyzer.filter.1", "unique")
                .build();
        ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
                settings,
                new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin());
        Analyzer myanalyzer = analysis.indexAnalyzers.get("myanalyzer");
        assertAnalyzesTo(myanalyzer, source, expected);
    }
 
示例9
public void testOne() throws Exception {
    String source = "Ein Tag in Köln im Café an der Straßenecke mit einer Standard-Nummer ISBN 1-4493-5854-3";
    String[] expected = {
            "tag",
            "koln",
            "cafe",
            "caf",
            "strassenecke",
            "strasseneck",
            "standard-nummer",
            "standardnummer",
            "standard-numm",
            "standardnumm",
            "isbn",
            "1-4493-5854-3",
            "1449358543",
            "978-1-4493-5854-9",
            "9781449358549"
    };
    String resource = "unstemmed.json";
    Settings settings = Settings.builder()
            .loadFromStream(resource, getClass().getResourceAsStream(resource), true)
            .build();
    ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
            settings,
            new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin());
    Analyzer analyzer = analysis.indexAnalyzers.get("default");
    assertTokenStreamContents(analyzer.tokenStream(null, new StringReader(source)), expected);
}
 
示例10
public void testFive() throws Exception {
    String source = "Schroeder";
    String[] expected = {
            "schroder"
    };
    String resource = "unstemmed.json";
    Settings settings = Settings.builder()
            .loadFromStream(resource, getClass().getResourceAsStream(resource), true)
            .build();
    ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
            settings,
            new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin());
    Analyzer analyzer = analysis.indexAnalyzers.get("unstemmed");
    assertTokenStreamContents(analyzer.tokenStream("test", new StringReader(source)), expected);
}
 
示例11
@Before
public void prepare() throws IOException {
    e = SQLExecutor.builder(clusterService, 1, Randomness.get(), List.of(new CommonAnalysisPlugin()))
        .enableDefaultTables()
        .build();
    plannerContext = e.getPlannerContext(clusterService.state());
}
 
示例12
@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
    return Arrays.asList(CommonAnalysisPlugin.class, Netty4Plugin.class, BundlePlugin.class);
}
 
示例13
public void test() throws Exception {

        String source = "Die Jahresfeier der Rechtsanwaltskanzleien auf dem Donaudampfschiff hat viel Ökosteuer gekostet";

        String[] expected = {
            "Die",
            "Die",
            "Jahresfeier",
            "Jahr",
            "feier",
            "der",
            "der",
            "Rechtsanwaltskanzleien",
            "Recht",
            "anwalt",
            "kanzlei",
            "auf",
            "auf",
            "dem",
            "dem",
            "Donaudampfschiff",
            "Donau",
            "dampf",
            "schiff",
            "hat",
            "hat",
            "viel",
            "viel",
            "Ökosteuer",
            "Ökosteuer",
            "gekostet",
            "gekosten"
        };
        String resource = "decompound_analysis.json";
        Settings settings = Settings.builder()
                .loadFromStream(resource, getClass().getResourceAsStream(resource), true)
                .build();
        ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
                settings,
                new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin());
        TokenFilterFactory tokenFilter = analysis.tokenFilter.get("decomp");
        Tokenizer tokenizer = analysis.tokenizer.get("standard").create();
        tokenizer.setReader(new StringReader(source));
        assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
    }
 
示例14
/** The plugin classes that should be added to the node. */
@Override
protected Collection<Class<? extends Plugin>> getPlugins() {
    return Arrays.asList(BundlePlugin.class, CommonAnalysisPlugin.class);
}
 
示例15
public void testLemmatizer() throws Exception {

        String source = "While these texts were previously only available to users of academic libraries " +
                "participating in the partnership, at the end of the first phase of EEBO-TCP the current " +
                "25,000 texts have now been released into the public domain.";
        String[] expected = {
                "While",
                "this",
                "text",
                "be",
                "previously",
                "only",
                "available",
                "to",
                "user",
                "of",
                "academic",
                "library",
                "participate",
                "in",
                "the",
                "partnership",
                "at",
                "end",
                "first",
                "phase",
                "EEBO",
                "TCP",
                "current",
                "25,000",
                "have",
                "now",
                "release",
                "into",
                "public",
                "domain"
        };
        Settings settings = Settings.builder()
                .put("index.analysis.analyzer.myanalyzer.type", "custom")
                .put("index.analysis.analyzer.myanalyzer.tokenizer", "standard")
                .put("index.analysis.analyzer.myanalyzer.filter.0", "lemmatize")
                .put("index.analysis.analyzer.myanalyzer.filter.1", "unique")
                .build();
        ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
                settings,
                new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin());
        Analyzer myanalyzer = analysis.indexAnalyzers.get( "myanalyzer");
        assertAnalyzesTo(myanalyzer, source, expected);
    }
 
示例16
public void testFull() throws Exception {

        String source = "While these texts were previously only available to users of academic libraries " +
                "participating in the partnership, at the end of the first phase of EEBO-TCP the current " +
                "25,000 texts have now been released into the public domain.";
        String[] expected = {
                "While",
                "these",
                "this",
                "texts",
                "text",
                "were",
                "be",
                "previously",
                "only",
                "available",
                "to",
                "users",
                "user",
                "of",
                "academic",
                "libraries",
                "library",
                "participating",
                "participate",
                "in",
                "the",
                "partnership",
                "at",
                "end",
                "first",
                "phase",
                "EEBO",
                "TCP",
                "current",
                "25,000",
                "have",
                "now",
                "been",
                "released",
                "release",
                "into",
                "public",
                "domain"
        };

        Settings settings = Settings.builder()
                .put("index.analysis.filter.myfilter.type", "lemmatize")
                .put("index.analysis.filter.myfilter.lemma_only", "false")
                .put("index.analysis.analyzer.myanalyzer.type", "custom")
                .put("index.analysis.analyzer.myanalyzer.tokenizer", "standard")
                .put("index.analysis.analyzer.myanalyzer.filter.0", "myfilter")
                .put("index.analysis.analyzer.myanalyzer.filter.1", "unique")
                .build();
        ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
                settings,
                new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin());
        Analyzer myanalyzer =analysis.indexAnalyzers.get("myanalyzer");
        assertAnalyzesTo(myanalyzer, source, expected);
    }
 
示例17
public void testGermanLemmatizer() throws Exception {

        String source = "Die Würde des Menschen ist unantastbar. " +
                "Sie zu achten und zu schützen ist Verpflichtung aller staatlichen Gewalt. " +
                "Das Deutsche Volk bekennt sich darum zu unverletzlichen und unveräußerlichen Menschenrechten " +
                "als Grundlage jeder menschlichen Gemeinschaft, des Friedens und der Gerechtigkeit in der Welt.";
        String[] expected = {
                "Die",
                "Würde",
                "der",
                "Mensch",
                "mein",  // ?
                "unantastbar",
                "Sie",
                "zu",
                "achten",
                "und",
                "zu",
                "schützen",
                "mein",  // ?
                "Verpflichtung",
                "all",
                "staatlich",
                "Gewalt",
                "Das",
                "deutsch",
                "Volk",
                "bekennen",
                "sich",
                "darum",
                "zu",
                "unverletzlichen", // ?
                "und",
                "unveräußerlichen", // ?
                "Menschenrechten", // ?
                "als",
                "Grundlage",
                "jed",
                "menschlich",
                "Gemeinschaft",
                "der",
                "Friede",
                "und",
                "der",
                "Gerechtigkeit",
                "in",
                "der",
                "Welt"
        };
        Settings settings = Settings.builder()
                .put("index.analysis.filter.myfilter.type", "lemmatize")
                .put("index.analysis.filter.myfilter.language", "de")
                .put("index.analysis.analyzer.myanalyzer.type", "custom")
                .put("index.analysis.analyzer.myanalyzer.tokenizer", "standard")
                .put("index.analysis.analyzer.myanalyzer.filter.0", "myfilter")
                .build();
        ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
                settings,
                new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin());
        Analyzer myanalyzer = analysis.indexAnalyzers.get("myanalyzer");
        assertAnalyzesTo(myanalyzer, source, expected);
    }
 
示例18
@Override
protected Collection<Class<? extends Plugin>> getPlugins() {
    return Arrays.asList(BundlePlugin.class, CommonAnalysisPlugin.class);
}
 
示例19
@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
    var plugins = new ArrayList<>(super.nodePlugins());
    plugins.add(CommonAnalysisPlugin.class);
    return plugins;
}
 
示例20
@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
    var plugins = new ArrayList<>(super.nodePlugins());
    plugins.add(CommonAnalysisPlugin.class);
    return plugins;
}
 
示例21
@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
    var plugins = new ArrayList<>(super.nodePlugins());
    plugins.add(CommonAnalysisPlugin.class);
    return plugins;
}