Java源码示例:org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute

示例1
/** If inputText is non-null, and the TokenStream has
 *  offsets, we include the surface form in each arc's
 *  label. */
public TokenStreamToDot(String inputText, TokenStream in, PrintWriter out) {
  this.in = in;
  this.out = out;
  this.inputText = inputText;
  termAtt = in.addAttribute(CharTermAttribute.class);
  posIncAtt = in.addAttribute(PositionIncrementAttribute.class);
  posLengthAtt = in.addAttribute(PositionLengthAttribute.class);
  if (in.hasAttribute(OffsetAttribute.class)) {
    offsetAtt = in.addAttribute(OffsetAttribute.class);
  } else {
    offsetAtt = null;
  }
}
 
示例2
private void setAttributes() {
  charTermAtt = addAttribute(CharTermAttribute.class);
  posIncrAtt = addAttribute(PositionIncrementAttribute.class);
  posLenAtt = addAttribute(PositionLengthAttribute.class);
  offsetAtt = addAttribute(OffsetAttribute.class);
  typeAtt = addAttribute(TypeAttribute.class);
  posAtt = addAttribute(PartOfSpeechAttribute.class);
  semanticClassAtt = addAttribute(SemanticClassAttribute.class);
}
 
示例3
static void testNGrams(int minGram, int maxGram, String s, final String nonTokenChars, boolean edgesOnly) throws IOException {
  // convert the string to code points
  final int[] codePoints = toCodePoints(s);
  final int[] offsets = new int[codePoints.length + 1];
  for (int i = 0; i < codePoints.length; ++i) {
    offsets[i+1] = offsets[i] + Character.charCount(codePoints[i]);
  }
  final Tokenizer grams = new NGramTokenizer(minGram, maxGram, edgesOnly) {
    @Override
    protected boolean isTokenChar(int chr) {
      return nonTokenChars.indexOf(chr) < 0;
    }
  };
  grams.setReader(new StringReader(s));
  final CharTermAttribute termAtt = grams.addAttribute(CharTermAttribute.class);
  final PositionIncrementAttribute posIncAtt = grams.addAttribute(PositionIncrementAttribute.class);
  final PositionLengthAttribute posLenAtt = grams.addAttribute(PositionLengthAttribute.class);
  final OffsetAttribute offsetAtt = grams.addAttribute(OffsetAttribute.class);
  grams.reset();
  for (int start = 0; start < codePoints.length; ++start) {
    nextGram:
    for (int end = start + minGram; end <= start + maxGram && end <= codePoints.length; ++end) {
      if (edgesOnly && start > 0 && isTokenChar(nonTokenChars, codePoints[start - 1])) {
        // not on an edge
        continue nextGram;
      }
      for (int j = start; j < end; ++j) {
        if (!isTokenChar(nonTokenChars, codePoints[j])) {
          continue nextGram;
        }
      }
      assertTrue(grams.incrementToken());
      assertArrayEquals(ArrayUtil.copyOfSubArray(codePoints, start, end), toCodePoints(termAtt));
      assertEquals(1, posIncAtt.getPositionIncrement());
      assertEquals(1, posLenAtt.getPositionLength());
      assertEquals(offsets[start], offsetAtt.startOffset());
      assertEquals(offsets[end], offsetAtt.endOffset());
    }
  }
  assertFalse(grams.incrementToken());
  grams.end();
  assertEquals(s.length(), offsetAtt.startOffset());
  assertEquals(s.length(), offsetAtt.endOffset());
}
 
示例4
Token(AttributeSource attSource) {
  this.attSource = attSource;
  this.posIncAtt = attSource.addAttribute(PositionIncrementAttribute.class);
  boolean hasLengthAtt = attSource.hasAttribute(PositionLengthAttribute.class);
  this.lengthAtt = hasLengthAtt ? attSource.addAttribute(PositionLengthAttribute.class) : null;
}
 
示例5
/**
 * Build an automaton from the provided {@link TokenStream}.
 */
private Automaton build(final TokenStream in) throws IOException {
  Automaton.Builder builder = new Automaton.Builder();

  final PositionIncrementAttribute posIncAtt = in.addAttribute(PositionIncrementAttribute.class);
  final PositionLengthAttribute posLengthAtt = in.addAttribute(PositionLengthAttribute.class);

  in.reset();

  int pos = -1;
  int prevIncr = 1;
  int state = -1;
  int id = -1;
  int gap = 0;
  while (in.incrementToken()) {
    int currentIncr = posIncAtt.getPositionIncrement();
    if (pos == -1 && currentIncr < 1) {
      throw new IllegalStateException("Malformed TokenStream, start token can't have increment less than 1");
    }

    if (currentIncr == 0) {
      if (gap > 0) {
        pos -= gap;
      }
    }
    else {
      pos++;
      gap = currentIncr - 1;
    }

    int endPos = pos + posLengthAtt.getPositionLength() + gap;
    while (state < endPos) {
      state = builder.createState();
    }

    id++;
    if (tokens.length < id + 1) {
      tokens = ArrayUtil.grow(tokens, id + 1);
    }

    tokens[id] = in.cloneAttributes();
    builder.addTransition(pos, endPos, id);
    pos += gap;

    // we always produce linear token graphs from getFiniteStrings(), so we need to adjust
    // posLength and posIncrement accordingly
    tokens[id].addAttribute(PositionLengthAttribute.class).setPositionLength(1);
    if (currentIncr == 0) {
      // stacked token should have the same increment as original token at this position
      tokens[id].addAttribute(PositionIncrementAttribute.class).setPositionIncrement(prevIncr);
    }

    // only save last increment on non-zero increment in case we have multiple stacked tokens
    if (currentIncr > 0) {
      prevIncr = currentIncr;
    }
  }

  in.end();
  if (state != -1) {
    builder.setAccept(state, true);
  }
  return builder.finish();
}