Java源码示例:org.apache.commons.codec.language.bm.Languages.LanguageSet
示例1
/**
* Applies the given phoneme expression to all phonemes in this phoneme builder.
* <p>
* This will lengthen phonemes that have compatible language sets to the expression, and drop those that are
* incompatible.
*
* @param phonemeExpr the expression to apply
* @param maxPhonemes the maximum number of phonemes to build up
*/
public void apply(final Rule.PhonemeExpr phonemeExpr, final int maxPhonemes) {
final Set<Phoneme> newPhonemes = new LinkedHashSet<Phoneme>(maxPhonemes);
EXPR: for (final Phoneme left : this.phonemes) {
for (final Phoneme right : phonemeExpr.getPhonemes()) {
final LanguageSet languages = left.getLanguages().restrictTo(right.getLanguages());
if (!languages.isEmpty()) {
final Phoneme join = new Phoneme(left, right, languages);
if (newPhonemes.size() < maxPhonemes) {
newPhonemes.add(join);
if (newPhonemes.size() >= maxPhonemes) {
break EXPR;
}
}
}
}
}
this.phonemes.clear();
this.phonemes.addAll(newPhonemes);
}
示例2
/**
* Applies the given phoneme expression to all phonemes in this phoneme builder.
* <p>
* This will lengthen phonemes that have compatible language sets to the expression, and drop those that are
* incompatible.
*
* @param phonemeExpr the expression to apply
* @param maxPhonemes the maximum number of phonemes to build up
*/
public void apply(final Rule.PhonemeExpr phonemeExpr, final int maxPhonemes) {
final Set<Rule.Phoneme> newPhonemes = new LinkedHashSet<Rule.Phoneme>(maxPhonemes);
EXPR: for (final Rule.Phoneme left : this.phonemes) {
for (final Rule.Phoneme right : phonemeExpr.getPhonemes()) {
final LanguageSet languages = left.getLanguages().restrictTo(right.getLanguages());
if (!languages.isEmpty()) {
final Rule.Phoneme join = new Phoneme(left, right, languages);
if (newPhonemes.size() < maxPhonemes) {
newPhonemes.add(join);
if (newPhonemes.size() >= maxPhonemes) {
break EXPR;
}
}
}
}
}
this.phonemes.clear();
this.phonemes.addAll(newPhonemes);
}
示例3
/** Creates a new BeiderMorseFilterFactory */
public BeiderMorseFilterFactory(Map<String,String> args) {
super(args);
// PhoneticEngine = NameType + RuleType + concat
// we use common-codec's defaults: GENERIC + APPROX + true
NameType nameType = NameType.valueOf(get(args, "nameType", NameType.GENERIC.toString()));
RuleType ruleType = RuleType.valueOf(get(args, "ruleType", RuleType.APPROX.toString()));
boolean concat = getBoolean(args, "concat", true);
engine = new PhoneticEngine(nameType, ruleType, concat);
// LanguageSet: defaults to automagic, otherwise a comma-separated list.
Set<String> langs = getSet(args, "languageSet");
languageSet = (null == langs || (1 == langs.size() && langs.contains("auto"))) ? null : LanguageSet.from(langs);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
示例4
/** restrict the output to a set of possible origin languages */
public void testLanguageSet() throws Exception {
final LanguageSet languages = LanguageSet.from(new HashSet<String>() {{
add("italian"); add("greek"); add("spanish");
}});
Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer( MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer,
new BeiderMorseFilter(tokenizer,
new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true), languages));
}
};
assertAnalyzesTo(analyzer, "Angelo",
new String[] { "andZelo", "angelo", "anxelo" },
new int[] { 0, 0, 0, },
new int[] { 6, 6, 6, },
new int[] { 1, 0, 0, });
analyzer.close();
}
示例5
@Override
public TokenStream create(TokenStream tokenStream) {
if (encoder == null) {
if (isDaitchMokotoff) {
return new DaitchMokotoffSoundexFilter(tokenStream, !replace);
}
if (ruletype != null && nametype != null) {
LanguageSet langset = null;
if (languageset != null && languageset.size() > 0) {
langset = LanguageSet.from(new HashSet<>(languageset));
}
return new BeiderMorseFilter(tokenStream, new PhoneticEngine(nametype, ruletype, true), langset);
}
if (maxcodelength > 0) {
return new DoubleMetaphoneFilter(tokenStream, maxcodelength, !replace);
}
} else {
return new PhoneticFilter(tokenStream, encoder, !replace);
}
throw new IllegalArgumentException("encoder error");
}
示例6
private static Phoneme parsePhoneme(final String ph) {
final int open = ph.indexOf("[");
if (open >= 0) {
if (!ph.endsWith("]")) {
throw new IllegalArgumentException("Phoneme expression contains a '[' but does not end in ']'");
}
final String before = ph.substring(0, open);
final String in = ph.substring(open + 1, ph.length() - 1);
final Set<String> langs = new HashSet<String>(Arrays.asList(in.split("[+]")));
return new Phoneme(before, LanguageSet.from(langs));
}
return new Phoneme(ph, Languages.ANY_LANGUAGE);
}
示例7
private static Phoneme parsePhoneme(final String ph) {
final int open = ph.indexOf("[");
if (open >= 0) {
if (!ph.endsWith("]")) {
throw new IllegalArgumentException("Phoneme expression contains a '[' but does not end in ']'");
}
final String before = ph.substring(0, open);
final String in = ph.substring(open + 1, ph.length() - 1);
final Set<String> langs = new HashSet<String>(Arrays.asList(in.split("[+]")));
return new Phoneme(before, Languages.LanguageSet.from(langs));
}
return new Phoneme(ph, Languages.ANY_LANGUAGE);
}
示例8
public Phoneme(final CharSequence phonemeText, final LanguageSet languages) {
this.phonemeText = new StringBuilder(phonemeText);
this.languages = languages;
}
示例9
public Phoneme(final Phoneme phonemeLeft, final Phoneme phonemeRight, final LanguageSet languages) {
this(phonemeLeft.phonemeText, languages);
this.phonemeText.append(phonemeRight.phonemeText);
}
示例10
public LanguageSet getLanguages() {
return this.languages;
}
示例11
public Phoneme(final CharSequence phonemeText, final Languages.LanguageSet languages) {
this.phonemeText = new StringBuilder(phonemeText);
this.languages = languages;
}
示例12
public Phoneme(final Phoneme phonemeLeft, final Phoneme phonemeRight, final Languages.LanguageSet languages) {
this(phonemeLeft.phonemeText, languages);
this.phonemeText.append(phonemeRight.phonemeText);
}
示例13
public Languages.LanguageSet getLanguages() {
return this.languages;
}
示例14
/**
* Gets rules for a combination of name type, rule type and languages.
*
* @param nameType
* the NameType to consider
* @param rt
* the RuleType to consider
* @param langs
* the set of languages to consider
* @return a list of Rules that apply
*/
public static List<Rule> getInstance(final NameType nameType, final RuleType rt,
final LanguageSet langs) {
final Map<String, List<Rule>> ruleMap = getInstanceMap(nameType, rt, langs);
final List<Rule> allRules = new ArrayList<Rule>();
for (final List<Rule> rules : ruleMap.values()) {
allRules.addAll(rules);
}
return allRules;
}
示例15
/**
* Gets rules for a combination of name type, rule type and languages.
*
* @param nameType
* the NameType to consider
* @param rt
* the RuleType to consider
* @param langs
* the set of languages to consider
* @return a list of Rules that apply
*/
public static List<Rule> getInstance(final NameType nameType, final RuleType rt,
final Languages.LanguageSet langs) {
final Map<String, List<Rule>> ruleMap = getInstanceMap(nameType, rt, langs);
final List<Rule> allRules = new ArrayList<Rule>();
for (final List<Rule> rules : ruleMap.values()) {
allRules.addAll(rules);
}
return allRules;
}
示例16
/**
* An empty builder where all phonemes must come from some set of languages. This will contain a single
* phoneme of zero characters. This can then be appended to. This should be the only way to create a new
* phoneme from scratch.
*
* @param languages the set of languages
* @return a new, empty phoneme builder
*/
public static PhonemeBuilder empty(final LanguageSet languages) {
return new PhonemeBuilder(new Phoneme("", languages));
}
示例17
/**
* Encodes a string to its phonetic representation.
*
* @param input
* the String to encode
* @return the encoding of the input
*/
public String encode(final String input) {
final LanguageSet languageSet = this.lang.guessLanguages(input);
return encode(input, languageSet);
}
示例18
/**
* Returns a new Phoneme with the same text but a union of its
* current language set and the given one.
*
* @param lang the language set to merge
* @return a new Phoneme
*/
public Phoneme mergeWithLanguage(final LanguageSet lang) {
return new Phoneme(this.phonemeText.toString(), this.languages.merge(lang));
}
示例19
/**
* Gets rules for a combination of name type, rule type and a single language.
*
* @param nameType
* the NameType to consider
* @param rt
* the RuleType to consider
* @param lang
* the language to consider
* @return a list of Rules that apply
*/
public static List<Rule> getInstance(final NameType nameType, final RuleType rt, final String lang) {
return getInstance(nameType, rt, LanguageSet.from(new HashSet<String>(Arrays.asList(lang))));
}
示例20
/**
* Gets rules for a combination of name type, rule type and languages.
*
* @param nameType
* the NameType to consider
* @param rt
* the RuleType to consider
* @param langs
* the set of languages to consider
* @return a map containing all Rules that apply, grouped by the first character of the rule pattern
* @since 1.9
*/
public static Map<String, List<Rule>> getInstanceMap(final NameType nameType, final RuleType rt,
final LanguageSet langs) {
return langs.isSingleton() ? getInstanceMap(nameType, rt, langs.getAny()) :
getInstanceMap(nameType, rt, Languages.ANY);
}
示例21
/**
* An empty builder where all phonemes must come from some set of languages. This will contain a single
* phoneme of zero characters. This can then be appended to. This should be the only way to create a new
* phoneme from scratch.
*
* @param languages the set of languages
* @return a new, empty phoneme builder
*/
public static PhonemeBuilder empty(final Languages.LanguageSet languages) {
return new PhonemeBuilder(new Rule.Phoneme("", languages));
}
示例22
/**
* Encodes a string to its phonetic representation.
*
* @param input
* the String to encode
* @return the encoding of the input
*/
public String encode(final String input) {
final Languages.LanguageSet languageSet = this.lang.guessLanguages(input);
return encode(input, languageSet);
}
示例23
/**
* Returns a new Phoneme with the same text but a union of its
* current language set and the given one.
*
* @param lang the language set to merge
* @return a new Phoneme
*/
public Phoneme mergeWithLanguage(final LanguageSet lang) {
return new Phoneme(this.phonemeText.toString(), this.languages.merge(lang));
}
示例24
/**
* Gets rules for a combination of name type, rule type and a single language.
*
* @param nameType
* the NameType to consider
* @param rt
* the RuleType to consider
* @param lang
* the language to consider
* @return a list of Rules that apply
*/
public static List<Rule> getInstance(final NameType nameType, final RuleType rt, final String lang) {
return getInstance(nameType, rt, LanguageSet.from(new HashSet<String>(Arrays.asList(lang))));
}
示例25
/**
* Gets rules for a combination of name type, rule type and languages.
*
* @param nameType
* the NameType to consider
* @param rt
* the RuleType to consider
* @param langs
* the set of languages to consider
* @return a map containing all Rules that apply, grouped by the first character of the rule pattern
* @since 1.9
*/
public static Map<String, List<Rule>> getInstanceMap(final NameType nameType, final RuleType rt,
final Languages.LanguageSet langs) {
return langs.isSingleton() ? getInstanceMap(nameType, rt, langs.getAny()) :
getInstanceMap(nameType, rt, Languages.ANY);
}
示例26
/**
* Create a new BeiderMorseFilter
* @param input TokenStream to filter
* @param engine configured PhoneticEngine with BM settings.
* @param languages optional Set of original languages. Can be null (which means it will be guessed).
*/
public BeiderMorseFilter(TokenStream input, PhoneticEngine engine, LanguageSet languages) {
super(input);
this.engine = engine;
this.languages = languages;
}