Java源码示例:org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute
示例1
/** If inputText is non-null, and the TokenStream has
* offsets, we include the surface form in each arc's
* label. */
public TokenStreamToDot(String inputText, TokenStream in, PrintWriter out) {
this.in = in;
this.out = out;
this.inputText = inputText;
termAtt = in.addAttribute(CharTermAttribute.class);
posIncAtt = in.addAttribute(PositionIncrementAttribute.class);
posLengthAtt = in.addAttribute(PositionLengthAttribute.class);
if (in.hasAttribute(OffsetAttribute.class)) {
offsetAtt = in.addAttribute(OffsetAttribute.class);
} else {
offsetAtt = null;
}
}
示例2
private void setAttributes() {
charTermAtt = addAttribute(CharTermAttribute.class);
posIncrAtt = addAttribute(PositionIncrementAttribute.class);
posLenAtt = addAttribute(PositionLengthAttribute.class);
offsetAtt = addAttribute(OffsetAttribute.class);
typeAtt = addAttribute(TypeAttribute.class);
posAtt = addAttribute(PartOfSpeechAttribute.class);
semanticClassAtt = addAttribute(SemanticClassAttribute.class);
}
示例3
static void testNGrams(int minGram, int maxGram, String s, final String nonTokenChars, boolean edgesOnly) throws IOException {
// convert the string to code points
final int[] codePoints = toCodePoints(s);
final int[] offsets = new int[codePoints.length + 1];
for (int i = 0; i < codePoints.length; ++i) {
offsets[i+1] = offsets[i] + Character.charCount(codePoints[i]);
}
final Tokenizer grams = new NGramTokenizer(minGram, maxGram, edgesOnly) {
@Override
protected boolean isTokenChar(int chr) {
return nonTokenChars.indexOf(chr) < 0;
}
};
grams.setReader(new StringReader(s));
final CharTermAttribute termAtt = grams.addAttribute(CharTermAttribute.class);
final PositionIncrementAttribute posIncAtt = grams.addAttribute(PositionIncrementAttribute.class);
final PositionLengthAttribute posLenAtt = grams.addAttribute(PositionLengthAttribute.class);
final OffsetAttribute offsetAtt = grams.addAttribute(OffsetAttribute.class);
grams.reset();
for (int start = 0; start < codePoints.length; ++start) {
nextGram:
for (int end = start + minGram; end <= start + maxGram && end <= codePoints.length; ++end) {
if (edgesOnly && start > 0 && isTokenChar(nonTokenChars, codePoints[start - 1])) {
// not on an edge
continue nextGram;
}
for (int j = start; j < end; ++j) {
if (!isTokenChar(nonTokenChars, codePoints[j])) {
continue nextGram;
}
}
assertTrue(grams.incrementToken());
assertArrayEquals(ArrayUtil.copyOfSubArray(codePoints, start, end), toCodePoints(termAtt));
assertEquals(1, posIncAtt.getPositionIncrement());
assertEquals(1, posLenAtt.getPositionLength());
assertEquals(offsets[start], offsetAtt.startOffset());
assertEquals(offsets[end], offsetAtt.endOffset());
}
}
assertFalse(grams.incrementToken());
grams.end();
assertEquals(s.length(), offsetAtt.startOffset());
assertEquals(s.length(), offsetAtt.endOffset());
}
示例4
Token(AttributeSource attSource) {
this.attSource = attSource;
this.posIncAtt = attSource.addAttribute(PositionIncrementAttribute.class);
boolean hasLengthAtt = attSource.hasAttribute(PositionLengthAttribute.class);
this.lengthAtt = hasLengthAtt ? attSource.addAttribute(PositionLengthAttribute.class) : null;
}
示例5
/**
* Build an automaton from the provided {@link TokenStream}.
*/
private Automaton build(final TokenStream in) throws IOException {
Automaton.Builder builder = new Automaton.Builder();
final PositionIncrementAttribute posIncAtt = in.addAttribute(PositionIncrementAttribute.class);
final PositionLengthAttribute posLengthAtt = in.addAttribute(PositionLengthAttribute.class);
in.reset();
int pos = -1;
int prevIncr = 1;
int state = -1;
int id = -1;
int gap = 0;
while (in.incrementToken()) {
int currentIncr = posIncAtt.getPositionIncrement();
if (pos == -1 && currentIncr < 1) {
throw new IllegalStateException("Malformed TokenStream, start token can't have increment less than 1");
}
if (currentIncr == 0) {
if (gap > 0) {
pos -= gap;
}
}
else {
pos++;
gap = currentIncr - 1;
}
int endPos = pos + posLengthAtt.getPositionLength() + gap;
while (state < endPos) {
state = builder.createState();
}
id++;
if (tokens.length < id + 1) {
tokens = ArrayUtil.grow(tokens, id + 1);
}
tokens[id] = in.cloneAttributes();
builder.addTransition(pos, endPos, id);
pos += gap;
// we always produce linear token graphs from getFiniteStrings(), so we need to adjust
// posLength and posIncrement accordingly
tokens[id].addAttribute(PositionLengthAttribute.class).setPositionLength(1);
if (currentIncr == 0) {
// stacked token should have the same increment as original token at this position
tokens[id].addAttribute(PositionIncrementAttribute.class).setPositionIncrement(prevIncr);
}
// only save last increment on non-zero increment in case we have multiple stacked tokens
if (currentIncr > 0) {
prevIncr = currentIncr;
}
}
in.end();
if (state != -1) {
builder.setAccept(state, true);
}
return builder.finish();
}