Java源码示例:org.apache.commons.codec.language.bm.Languages.LanguageSet

示例1
/**
 * Applies the given phoneme expression to all phonemes in this phoneme builder.
 * <p>
 * This will lengthen phonemes that have compatible language sets to the expression, and drop those that are
 * incompatible.
 *
 * @param phonemeExpr   the expression to apply
 * @param maxPhonemes   the maximum number of phonemes to build up
 */
public void apply(final Rule.PhonemeExpr phonemeExpr, final int maxPhonemes) {
    final Set<Phoneme> newPhonemes = new LinkedHashSet<Phoneme>(maxPhonemes);

    EXPR: for (final Phoneme left : this.phonemes) {
        for (final Phoneme right : phonemeExpr.getPhonemes()) {
            final LanguageSet languages = left.getLanguages().restrictTo(right.getLanguages());
            if (!languages.isEmpty()) {
                final Phoneme join = new Phoneme(left, right, languages);
                if (newPhonemes.size() < maxPhonemes) {
                    newPhonemes.add(join);
                    if (newPhonemes.size() >= maxPhonemes) {
                        break EXPR;
                    }
                }
            }
        }
    }

    this.phonemes.clear();
    this.phonemes.addAll(newPhonemes);
}
 
示例2
/**
 * Applies the given phoneme expression to all phonemes in this phoneme builder.
 * <p>
 * This will lengthen phonemes that have compatible language sets to the expression, and drop those that are
 * incompatible.
 *
 * @param phonemeExpr   the expression to apply
 * @param maxPhonemes   the maximum number of phonemes to build up
 */
public void apply(final Rule.PhonemeExpr phonemeExpr, final int maxPhonemes) {
    final Set<Rule.Phoneme> newPhonemes = new LinkedHashSet<Rule.Phoneme>(maxPhonemes);

    EXPR: for (final Rule.Phoneme left : this.phonemes) {
        for (final Rule.Phoneme right : phonemeExpr.getPhonemes()) {
            final LanguageSet languages = left.getLanguages().restrictTo(right.getLanguages());
            if (!languages.isEmpty()) {
                final Rule.Phoneme join = new Phoneme(left, right, languages);
                if (newPhonemes.size() < maxPhonemes) {
                    newPhonemes.add(join);
                    if (newPhonemes.size() >= maxPhonemes) {
                        break EXPR;
                    }
                }
            }
        }
    }

    this.phonemes.clear();
    this.phonemes.addAll(newPhonemes);
}
 
示例3
/** Creates a new BeiderMorseFilterFactory */
public BeiderMorseFilterFactory(Map<String,String> args) {
  super(args);
  // PhoneticEngine = NameType + RuleType + concat
  // we use common-codec's defaults: GENERIC + APPROX + true
  NameType nameType = NameType.valueOf(get(args, "nameType", NameType.GENERIC.toString()));
  RuleType ruleType = RuleType.valueOf(get(args, "ruleType", RuleType.APPROX.toString()));
  
  boolean concat = getBoolean(args, "concat", true);
  engine = new PhoneticEngine(nameType, ruleType, concat);
  
  // LanguageSet: defaults to automagic, otherwise a comma-separated list.
  Set<String> langs = getSet(args, "languageSet");
  languageSet = (null == langs || (1 == langs.size() && langs.contains("auto"))) ? null : LanguageSet.from(langs);
  if (!args.isEmpty()) {
    throw new IllegalArgumentException("Unknown parameters: " + args);
  }
}
 
示例4
/** restrict the output to a set of possible origin languages */
public void testLanguageSet() throws Exception {
  final LanguageSet languages = LanguageSet.from(new HashSet<String>() {{
    add("italian"); add("greek"); add("spanish");
  }});
  Analyzer analyzer = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new MockTokenizer( MockTokenizer.WHITESPACE, false);
      return new TokenStreamComponents(tokenizer, 
          new BeiderMorseFilter(tokenizer, 
              new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true), languages));
    }
  };
  assertAnalyzesTo(analyzer, "Angelo",
      new String[] { "andZelo", "angelo", "anxelo" },
      new int[] { 0, 0, 0, },
      new int[] { 6, 6, 6, },
      new int[] { 1, 0, 0, });
  analyzer.close();
}
 
示例5
@Override
public TokenStream create(TokenStream tokenStream) {
    if (encoder == null) {
        if (isDaitchMokotoff) {
            return new DaitchMokotoffSoundexFilter(tokenStream, !replace);
        }
        if (ruletype != null && nametype != null) {
            LanguageSet langset = null;
            if (languageset != null && languageset.size() > 0) {
                langset = LanguageSet.from(new HashSet<>(languageset));
            }
            return new BeiderMorseFilter(tokenStream, new PhoneticEngine(nametype, ruletype, true), langset);
        }
        if (maxcodelength > 0) {
            return new DoubleMetaphoneFilter(tokenStream, maxcodelength, !replace);
        }
    } else {
        return new PhoneticFilter(tokenStream, encoder, !replace);
    }
    throw new IllegalArgumentException("encoder error");
}
 
示例6
private static Phoneme parsePhoneme(final String ph) {
    final int open = ph.indexOf("[");
    if (open >= 0) {
        if (!ph.endsWith("]")) {
            throw new IllegalArgumentException("Phoneme expression contains a '[' but does not end in ']'");
        }
        final String before = ph.substring(0, open);
        final String in = ph.substring(open + 1, ph.length() - 1);
        final Set<String> langs = new HashSet<String>(Arrays.asList(in.split("[+]")));

        return new Phoneme(before, LanguageSet.from(langs));
    }
    return new Phoneme(ph, Languages.ANY_LANGUAGE);
}
 
示例7
private static Phoneme parsePhoneme(final String ph) {
    final int open = ph.indexOf("[");
    if (open >= 0) {
        if (!ph.endsWith("]")) {
            throw new IllegalArgumentException("Phoneme expression contains a '[' but does not end in ']'");
        }
        final String before = ph.substring(0, open);
        final String in = ph.substring(open + 1, ph.length() - 1);
        final Set<String> langs = new HashSet<String>(Arrays.asList(in.split("[+]")));

        return new Phoneme(before, Languages.LanguageSet.from(langs));
    }
    return new Phoneme(ph, Languages.ANY_LANGUAGE);
}
 
示例8
public Phoneme(final CharSequence phonemeText, final LanguageSet languages) {
    this.phonemeText = new StringBuilder(phonemeText);
    this.languages = languages;
}
 
示例9
public Phoneme(final Phoneme phonemeLeft, final Phoneme phonemeRight, final LanguageSet languages) {
    this(phonemeLeft.phonemeText, languages);
    this.phonemeText.append(phonemeRight.phonemeText);
}
 
示例10
public LanguageSet getLanguages() {
    return this.languages;
}
 
示例11
public Phoneme(final CharSequence phonemeText, final Languages.LanguageSet languages) {
    this.phonemeText = new StringBuilder(phonemeText);
    this.languages = languages;
}
 
示例12
public Phoneme(final Phoneme phonemeLeft, final Phoneme phonemeRight, final Languages.LanguageSet languages) {
    this(phonemeLeft.phonemeText, languages);
    this.phonemeText.append(phonemeRight.phonemeText);
}
 
示例13
public Languages.LanguageSet getLanguages() {
    return this.languages;
}
 
示例14
/**
 * Gets rules for a combination of name type, rule type and languages.
 *
 * @param nameType
 *            the NameType to consider
 * @param rt
 *            the RuleType to consider
 * @param langs
 *            the set of languages to consider
 * @return a list of Rules that apply
 */
public static List<Rule> getInstance(final NameType nameType, final RuleType rt,
                                     final LanguageSet langs) {
    final Map<String, List<Rule>> ruleMap = getInstanceMap(nameType, rt, langs);
    final List<Rule> allRules = new ArrayList<Rule>();
    for (final List<Rule> rules : ruleMap.values()) {
        allRules.addAll(rules);
    }
    return allRules;
}
 
示例15
/**
 * Gets rules for a combination of name type, rule type and languages.
 *
 * @param nameType
 *            the NameType to consider
 * @param rt
 *            the RuleType to consider
 * @param langs
 *            the set of languages to consider
 * @return a list of Rules that apply
 */
public static List<Rule> getInstance(final NameType nameType, final RuleType rt,
                                     final Languages.LanguageSet langs) {
    final Map<String, List<Rule>> ruleMap = getInstanceMap(nameType, rt, langs);
    final List<Rule> allRules = new ArrayList<Rule>();
    for (final List<Rule> rules : ruleMap.values()) {
        allRules.addAll(rules);
    }
    return allRules;
}
 
示例16
/**
 * An empty builder where all phonemes must come from some set of languages. This will contain a single
 * phoneme of zero characters. This can then be appended to. This should be the only way to create a new
 * phoneme from scratch.
 *
 * @param languages the set of languages
 * @return  a new, empty phoneme builder
 */
public static PhonemeBuilder empty(final LanguageSet languages) {
    return new PhonemeBuilder(new Phoneme("", languages));
}
 
示例17
/**
 * Encodes a string to its phonetic representation.
 *
 * @param input
 *            the String to encode
 * @return the encoding of the input
 */
public String encode(final String input) {
    final LanguageSet languageSet = this.lang.guessLanguages(input);
    return encode(input, languageSet);
}
 
示例18
/**
 * Returns a new Phoneme with the same text but a union of its
 * current language set and the given one.
 *
 * @param lang the language set to merge
 * @return a new Phoneme
 */
public Phoneme mergeWithLanguage(final LanguageSet lang) {
  return new Phoneme(this.phonemeText.toString(), this.languages.merge(lang));
}
 
示例19
/**
 * Gets rules for a combination of name type, rule type and a single language.
 *
 * @param nameType
 *            the NameType to consider
 * @param rt
 *            the RuleType to consider
 * @param lang
 *            the language to consider
 * @return a list of Rules that apply
 */
public static List<Rule> getInstance(final NameType nameType, final RuleType rt, final String lang) {
    return getInstance(nameType, rt, LanguageSet.from(new HashSet<String>(Arrays.asList(lang))));
}
 
示例20
/**
 * Gets rules for a combination of name type, rule type and languages.
 *
 * @param nameType
 *            the NameType to consider
 * @param rt
 *            the RuleType to consider
 * @param langs
 *            the set of languages to consider
 * @return a map containing all Rules that apply, grouped by the first character of the rule pattern
 * @since 1.9
 */
public static Map<String, List<Rule>> getInstanceMap(final NameType nameType, final RuleType rt,
                                                     final LanguageSet langs) {
    return langs.isSingleton() ? getInstanceMap(nameType, rt, langs.getAny()) :
                                 getInstanceMap(nameType, rt, Languages.ANY);
}
 
示例21
/**
 * An empty builder where all phonemes must come from some set of languages. This will contain a single
 * phoneme of zero characters. This can then be appended to. This should be the only way to create a new
 * phoneme from scratch.
 *
 * @param languages the set of languages
 * @return  a new, empty phoneme builder
 */
public static PhonemeBuilder empty(final Languages.LanguageSet languages) {
    return new PhonemeBuilder(new Rule.Phoneme("", languages));
}
 
示例22
/**
 * Encodes a string to its phonetic representation.
 *
 * @param input
 *            the String to encode
 * @return the encoding of the input
 */
public String encode(final String input) {
    final Languages.LanguageSet languageSet = this.lang.guessLanguages(input);
    return encode(input, languageSet);
}
 
示例23
/**
 * Returns a new Phoneme with the same text but a union of its
 * current language set and the given one.
 *
 * @param lang the language set to merge
 * @return a new Phoneme
 */
public Phoneme mergeWithLanguage(final LanguageSet lang) {
  return new Phoneme(this.phonemeText.toString(), this.languages.merge(lang));
}
 
示例24
/**
 * Gets rules for a combination of name type, rule type and a single language.
 *
 * @param nameType
 *            the NameType to consider
 * @param rt
 *            the RuleType to consider
 * @param lang
 *            the language to consider
 * @return a list of Rules that apply
 */
public static List<Rule> getInstance(final NameType nameType, final RuleType rt, final String lang) {
    return getInstance(nameType, rt, LanguageSet.from(new HashSet<String>(Arrays.asList(lang))));
}
 
示例25
/**
 * Gets rules for a combination of name type, rule type and languages.
 *
 * @param nameType
 *            the NameType to consider
 * @param rt
 *            the RuleType to consider
 * @param langs
 *            the set of languages to consider
 * @return a map containing all Rules that apply, grouped by the first character of the rule pattern
 * @since 1.9
 */
public static Map<String, List<Rule>> getInstanceMap(final NameType nameType, final RuleType rt,
                                                     final Languages.LanguageSet langs) {
    return langs.isSingleton() ? getInstanceMap(nameType, rt, langs.getAny()) :
                                 getInstanceMap(nameType, rt, Languages.ANY);
}
 
示例26
/**
 * Create a new BeiderMorseFilter
 * @param input TokenStream to filter
 * @param engine configured PhoneticEngine with BM settings.
 * @param languages optional Set of original languages. Can be null (which means it will be guessed).
 */
public BeiderMorseFilter(TokenStream input, PhoneticEngine engine, LanguageSet languages) {
  super(input);
  this.engine = engine;
  this.languages = languages;
}