Java源码示例:org.apache.lucene.util.automaton.CompiledAutomaton
示例1
public void testMultiTerm() throws IOException {
RegExp re = new RegExp("p.*e");
IntervalsSource source = Intervals.multiterm(new CompiledAutomaton(re.toAutomaton()), re.toString());
checkIntervals(source, "field1", 5, new int[][]{
{},
{ 0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7 },
{ 0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7 },
{ 7, 7 },
{ 0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7 },
{ 0, 0 }
});
IllegalStateException e = expectThrows(IllegalStateException.class, () -> {
IntervalsSource s = Intervals.multiterm(new CompiledAutomaton(re.toAutomaton()), 1, re.toString());
for (LeafReaderContext ctx : searcher.getIndexReader().leaves()) {
s.intervals("field1", ctx);
}
});
assertEquals("Automaton [\\p(.)*\\e] expanded to too many terms (limit 1)", e.getMessage());
checkVisits(source, 1);
}
示例2
/**
* Returns a {@link TermsEnum} over the values, filtered by a {@link CompiledAutomaton}
* The enum supports {@link TermsEnum#ord()}.
*/
public TermsEnum intersect(CompiledAutomaton automaton) throws IOException {
TermsEnum in = termsEnum();
switch (automaton.type) {
case NONE:
return TermsEnum.EMPTY;
case ALL:
return in;
case SINGLE:
return new SingleTermsEnum(in, automaton.term);
case NORMAL:
return new AutomatonTermsEnum(in, automaton);
default:
// unreachable
throw new RuntimeException("unhandled case");
}
}
示例3
@Override
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
final List<MultiTermsEnum.TermsEnumIndex> termsEnums = new ArrayList<>();
for(int i=0;i<subs.length;i++) {
final TermsEnum termsEnum = subs[i].intersect(compiled, startTerm);
if (termsEnum != null) {
termsEnums.add(new MultiTermsEnum.TermsEnumIndex(termsEnum, i));
}
}
if (termsEnums.size() > 0) {
return new MultiTermsEnum(subSlices).reset(termsEnums.toArray(MultiTermsEnum.TermsEnumIndex.EMPTY_ARRAY));
} else {
return TermsEnum.EMPTY;
}
}
示例4
/**
* Returns a {@link TermsEnum} over the values, filtered by a {@link CompiledAutomaton}
* The enum supports {@link TermsEnum#ord()}.
*/
public TermsEnum intersect(CompiledAutomaton automaton) throws IOException {
TermsEnum in = termsEnum();
switch (automaton.type) {
case NONE:
return TermsEnum.EMPTY;
case ALL:
return in;
case SINGLE:
return new SingleTermsEnum(in, automaton.term);
case NORMAL:
return new AutomatonTermsEnum(in, automaton);
default:
// unreachable
throw new RuntimeException("unhandled case");
}
}
示例5
@Override
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
}
return new IntersectTermsEnum(compiled, startTerm);
}
示例6
IntersectTermsEnum(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
super();
//if (TEST) System.out.println("Enum init, startTerm=" + startTerm);
this.fst = dict;
this.fstReader = fst.getBytesReader();
this.fstOutputs = dict.outputs;
this.fsa = compiled.runAutomaton;
this.level = -1;
this.stack = new Frame[16];
for (int i = 0 ; i < stack.length; i++) {
this.stack[i] = new Frame();
}
loadVirtualFrame(newFrame());
this.level++;
pushFrame(loadFirstFrame(newFrame()));
this.meta = null;
this.metaUpto = 1;
this.decoded = false;
this.pending = false;
if (startTerm == null) {
pending = isAccept(topFrame());
} else {
doSeekCeil(startTerm);
pending = (term == null || !startTerm.equals(term.get())) && isValid(topFrame()) && isAccept(topFrame());
}
}
示例7
@Override
public TermsEnum intersect(CompiledAutomaton compiled, final BytesRef startTerm) {
if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
}
return new DirectIntersectTermsEnum(compiled, startTerm);
}
示例8
@Override
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
}
return new OrdsIntersectTermsEnum(this, compiled, startTerm);
}
示例9
protected IntersectBlockReader(CompiledAutomaton compiled, BytesRef startTerm,
IndexDictionary.BrowserSupplier dictionaryBrowserSupplier, IndexInput blockInput,
PostingsReaderBase postingsReader, FieldMetadata fieldMetadata,
BlockDecoder blockDecoder) throws IOException {
super(dictionaryBrowserSupplier, blockInput, postingsReader, fieldMetadata, blockDecoder);
automaton = compiled.automaton;
runAutomaton = compiled.runAutomaton;
finite = compiled.finite;
commonSuffix = compiled.commonSuffixRef;
minTermLength = getMinTermLength();
nextStringCalculator = new AutomatonNextTermCalculator(compiled);
seekTerm = startTerm;
}
示例10
MultiTermIntervalsSource(CompiledAutomaton automaton, int maxExpansions, String pattern) {
this.automaton = automaton;
if (maxExpansions > IndexSearcher.getMaxClauseCount()) {
throw new IllegalArgumentException("maxExpansions [" + maxExpansions
+ "] cannot be greater than BooleanQuery.getMaxClauseCount [" + IndexSearcher.getMaxClauseCount() + "]");
}
this.maxExpansions = maxExpansions;
this.pattern = pattern;
}
示例11
@Override
public TermsEnum intersect(CompiledAutomaton automaton, BytesRef bytes) throws IOException {
TermsEnum termsEnum = in.intersect(automaton, bytes);
assert termsEnum != null;
assert bytes == null || bytes.isValid();
return new AssertingTermsEnum(termsEnum, hasFreqs());
}
示例12
/**
* Terms api equivalency
*/
public void assertTermsEquals(String info, IndexReader leftReader, Terms leftTerms, Terms rightTerms, boolean deep) throws IOException {
if (leftTerms == null || rightTerms == null) {
assertNull(info, leftTerms);
assertNull(info, rightTerms);
return;
}
assertTermsStatisticsEquals(info, leftTerms, rightTerms);
assertEquals("hasOffsets", leftTerms.hasOffsets(), rightTerms.hasOffsets());
assertEquals("hasPositions", leftTerms.hasPositions(), rightTerms.hasPositions());
assertEquals("hasPayloads", leftTerms.hasPayloads(), rightTerms.hasPayloads());
TermsEnum leftTermsEnum = leftTerms.iterator();
TermsEnum rightTermsEnum = rightTerms.iterator();
assertTermsEnumEquals(info, leftReader, leftTermsEnum, rightTermsEnum, true);
assertTermsSeekingEquals(info, leftTerms, rightTerms);
if (deep) {
int numIntersections = atLeast(3);
for (int i = 0; i < numIntersections; i++) {
String re = AutomatonTestUtil.randomRegexp(random());
CompiledAutomaton automaton = new CompiledAutomaton(new RegExp(re, RegExp.NONE).toAutomaton());
if (automaton.type == CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
// TODO: test start term too
TermsEnum leftIntersection = leftTerms.intersect(automaton, null);
TermsEnum rightIntersection = rightTerms.intersect(automaton, null);
assertTermsEnumEquals(info, leftReader, leftIntersection, rightIntersection, rarely());
}
}
}
}
示例13
@Override
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
// if (DEBUG) System.out.println(" FieldReader.intersect startTerm=" + BlockTreeTermsWriter.brToString(startTerm));
//System.out.println("intersect: " + compiled.type + " a=" + compiled.automaton);
// TODO: we could push "it's a range" or "it's a prefix" down into IntersectTermsEnum?
// can we optimize knowing that...?
if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
}
return new IntersectTermsEnum(this, compiled.automaton, compiled.runAutomaton, compiled.commonSuffixRef, startTerm);
}
示例14
CompiledAutomaton[] buildAutomatonSet() {
CompiledAutomaton[] compiled = new CompiledAutomaton[maxEdits + 1];
for (int i = 0; i <= maxEdits; i++) {
try {
compiled[i] = new CompiledAutomaton(levBuilder.toAutomaton(i, prefix), true, false);
}
catch (TooComplexToDeterminizeException e) {
throw new FuzzyTermsEnum.FuzzyTermsException(term, e);
}
}
return compiled;
}
示例15
CompiledAutomaton buildMaxEditAutomaton() {
try {
return new CompiledAutomaton(levBuilder.toAutomaton(maxEdits, prefix), true, false);
} catch (TooComplexToDeterminizeException e) {
throw new FuzzyTermsEnum.FuzzyTermsException(term, e);
}
}
示例16
private ByteRunAutomaton asByteRunAutomaton() {
TermIterator iterator = termData.iterator();
List<Automaton> automata = new ArrayList<>();
for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
automata.add(Automata.makeBinary(term));
}
return new CompiledAutomaton(Operations.union(automata)).runAutomaton;
}
示例17
/**
* Construct an enumerator based upon an automaton, enumerating the specified
* field, working on a supplied TermsEnum
*
* @lucene.experimental
* @param compiled CompiledAutomaton
*/
public AutomatonTermsEnum(TermsEnum tenum, CompiledAutomaton compiled) {
super(tenum);
if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
}
this.finite = compiled.finite;
this.runAutomaton = compiled.runAutomaton;
assert this.runAutomaton != null;
this.commonSuffixRef = compiled.commonSuffixRef;
this.automaton = compiled.automaton;
// No need to track visited states for a finite language without loops.
visited = finite ? null : new short[runAutomaton.getSize()];
}
示例18
/** Returns a TermsEnum that iterates over all terms and
* documents that are accepted by the provided {@link
* CompiledAutomaton}. If the <code>startTerm</code> is
* provided then the returned enum will only return terms
* {@code > startTerm}, but you still must call
* next() first to get to the first term. Note that the
* provided <code>startTerm</code> must be accepted by
* the automaton.
*
* <p>This is an expert low-level API and will only work
* for {@code NORMAL} compiled automata. To handle any
* compiled automata you should instead use
* {@link CompiledAutomaton#getTermsEnum} instead.
*
* <p><b>NOTE</b>: the returned TermsEnum cannot seek</p>.
*/
public TermsEnum intersect(CompiledAutomaton compiled, final BytesRef startTerm) throws IOException {
// TODO: could we factor out a common interface b/w
// CompiledAutomaton and FST? Then we could pass FST there too,
// and likely speed up resolving terms to deleted docs ... but
// AutomatonTermsEnum makes this tricky because of its on-the-fly cycle
// detection
// TODO: eventually we could support seekCeil/Exact on
// the returned enum, instead of only being able to seek
// at the start
TermsEnum termsEnum = iterator();
if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
}
if (startTerm == null) {
return new AutomatonTermsEnum(termsEnum, compiled);
} else {
return new AutomatonTermsEnum(termsEnum, compiled) {
@Override
protected BytesRef nextSeekTerm(BytesRef term) throws IOException {
if (term == null) {
term = startTerm;
}
return super.nextSeekTerm(term);
}
};
}
}
示例19
private boolean accepts(CompiledAutomaton c, BytesRef b) {
int state = 0;
for(int idx=0;idx<b.length;idx++) {
assertTrue(state != -1);
state = c.runAutomaton.step(state, b.bytes[b.offset+idx] & 0xff);
}
return c.runAutomaton.isAccept(state);
}
示例20
public void testIntersectRegexp() throws Exception {
Directory d = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), d);
Document doc = new Document();
doc.add(newStringField("field", "foobar", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
Terms terms = MultiTerms.getTerms(r, "field");
CompiledAutomaton automaton = new CompiledAutomaton(new RegExp("do_not_match_anything").toAutomaton());
String message = expectThrows(IllegalArgumentException.class, () -> {terms.intersect(automaton, null);}).getMessage();
assertEquals("please use CompiledAutomaton.getTermsEnum instead", message);
r.close();
w.close();
d.close();
}
示例21
public void assertTerms(Terms leftTerms, Terms rightTerms, boolean deep) throws Exception {
if (leftTerms == null || rightTerms == null) {
assertNull(leftTerms);
assertNull(rightTerms);
return;
}
assertTermsStatistics(leftTerms, rightTerms);
// NOTE: we don't assert hasOffsets/hasPositions/hasPayloads because they are allowed to be different
boolean bothHaveFreqs = leftTerms.hasFreqs() && rightTerms.hasFreqs();
boolean bothHavePositions = leftTerms.hasPositions() && rightTerms.hasPositions();
TermsEnum leftTermsEnum = leftTerms.iterator();
TermsEnum rightTermsEnum = rightTerms.iterator();
assertTermsEnum(leftTermsEnum, rightTermsEnum, true, bothHaveFreqs, bothHavePositions);
assertTermsSeeking(leftTerms, rightTerms);
if (deep) {
int numIntersections = atLeast(3);
for (int i = 0; i < numIntersections; i++) {
String re = AutomatonTestUtil.randomRegexp(random());
CompiledAutomaton automaton = new CompiledAutomaton(new RegExp(re, RegExp.NONE).toAutomaton());
if (automaton.type == CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
// TODO: test start term too
TermsEnum leftIntersection = leftTerms.intersect(automaton, null);
TermsEnum rightIntersection = rightTerms.intersect(automaton, null);
assertTermsEnum(leftIntersection, rightIntersection, rarely(), bothHaveFreqs, bothHavePositions);
}
}
}
}
示例22
@Override
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
}
return new IntersectEnum(compiled, startTerm);
}
示例23
@Override
public TermsEnum intersect(CompiledAutomaton automaton, BytesRef bytes) throws IOException {
TermsEnum termsEnum = in.intersect(automaton, bytes);
assert termsEnum != null;
assert bytes == null || bytes.isValid();
return new AssertingTermsEnum(termsEnum, hasFreqs());
}
示例24
private AutomatonBackedOrdinalsFilter(Automaton automaton) {
this.compiled = new CompiledAutomaton(automaton);
}
示例25
@Override
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
return new TermVectorFilteredTermsEnum(in.iterator(), filterTerms.intersect(compiled, startTerm));
}
示例26
public OrdsIntersectTermsEnum(OrdsFieldReader fr, CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
// if (DEBUG) {
// System.out.println("\nintEnum.init seg=" + segment + " commonSuffix=" + brToString(compiled.commonSuffixRef));
// }
this.fr = fr;
runAutomaton = compiled.runAutomaton;
compiledAutomaton = compiled;
in = fr.parent.in.clone();
stack = new OrdsIntersectTermsEnumFrame[5];
for(int idx=0;idx<stack.length;idx++) {
stack[idx] = new OrdsIntersectTermsEnumFrame(this, idx);
}
for(int arcIdx=0;arcIdx<arcs.length;arcIdx++) {
arcs[arcIdx] = new FST.Arc<>();
}
if (fr.index == null) {
fstReader = null;
} else {
fstReader = fr.index.getBytesReader();
}
// TODO: if the automaton is "smallish" we really
// should use the terms index to seek at least to
// the initial term and likely to subsequent terms
// (or, maybe just fallback to ATE for such cases).
// Else the seek cost of loading the frames will be
// too costly.
final FST.Arc<Output> arc = fr.index.getFirstArc(arcs[0]);
// Empty string prefix must have an output in the index!
assert arc.isFinal();
// Special pushFrame since it's the first one:
final OrdsIntersectTermsEnumFrame f = stack[0];
f.fp = f.fpOrig = fr.rootBlockFP;
f.prefix = 0;
f.setState(0);
f.arc = arc;
f.outputPrefix = arc.output();
f.load(fr.rootCode);
// for assert:
assert setSavedStartTerm(startTerm);
currentFrame = f;
if (startTerm != null) {
seekToStartTerm(startTerm);
}
}
示例27
@Override
public TermsEnum intersect(CompiledAutomaton compiled,
final BytesRef startTerm) throws IOException {
return delegateTerms.intersect(compiled, startTerm);
}
示例28
protected AutomatonNextTermCalculator(CompiledAutomaton compiled) {
visited = compiled.finite ? null : new short[runAutomaton.getSize()];
}
示例29
@Override
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
checkIntersectAutomatonType(compiled);
return new IntersectBlockReader(compiled, startTerm, dictionaryBrowserSupplier, blockInput, postingsReader, fieldMetadata, blockDecoder);
}
示例30
protected void checkIntersectAutomatonType(CompiledAutomaton automaton) {
// This check is consistent with other impls and precondition stated in javadoc.
if (automaton.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
}
}