Java源码示例:org.apache.uima.collection.CollectionException
示例1
@Override
public void getNext(JCas aJCas)
throws IOException, CollectionException
{
Resource res = nextFile();
initCas(aJCas, res);
InputStream is = null;
try {
is = res.getInputStream();
convertToCas(aJCas, is, encoding);
}
finally {
closeQuietly(is);
}
}
示例2
@Override
public void getNext(JCas aJCas)
throws IOException, CollectionException
{
Resource res = nextFile();
initCas(aJCas, res);
InputStream is = null;
try {
is = res.getInputStream();
convertToCas(aJCas, is, encoding);
}
finally {
closeQuietly(is);
}
}
示例3
public boolean hasNext() {
if (this.destroyed) {
return false;
} else {
boolean error = true;
boolean var3;
try {
boolean e = this.collectionReader.hasNext();
error = false;
var3 = e;
} catch (CollectionException var8) {
throw new IllegalStateException(var8);
} catch (IOException var9) {
throw new IllegalStateException(var9);
} finally {
if (error && this.selfDestroy) {
this.destroy();
}
}
return var3;
}
}
示例4
@Override
public void getNext(CAS aCAS)
throws IOException, CollectionException
{
// nextTarEntry cannot be null here!
ByteArrayOutputStream buffer = new ByteArrayOutputStream();
int size = IOUtils.copy(tarArchiveInputStream, buffer);
String entryName = nextTarEntry.getName();
getLogger().debug("Loaded " + size + " bytes from " + entryName);
// and move forward
fastForwardToNextValidEntry();
// and now create JCas
InputStream inputStream = new ByteArrayInputStream(buffer.toByteArray());
try {
XmiCasDeserializer.deserialize(inputStream, aCAS, lenient);
}
catch (SAXException e) {
throw new IOException(e);
}
}
示例5
protected void fillCas(CAS cas, File file) throws IOException, CollectionException {
String uri = file.toURI().toString();
SourceDocumentInformation sdi;
try {
sdi = new SourceDocumentInformation(cas.getJCas());
sdi.setUri(uri);
String text = getDocumentText(file.getAbsolutePath(), this.mEncoding);
cas.setDocumentLanguage(mLanguage.getCode());
cas.setDocumentText(preparator.prepare(text));
sdi.setDocumentSize((int)file.length());
sdi.setCumulatedDocumentSize(this.currentFileByteSize);
sdi.setCorpusSize(this.totalFileByteSize);
sdi.setBegin(0);
sdi.setEnd(text.length());
sdi.setOffsetInSource(0);
sdi.setDocumentIndex(mCurrentIndex);
sdi.setNbDocuments(this.mFiles.size());
sdi.setLastSegment(mCurrentIndex == mFiles.size() - 1);
sdi.addToIndexes();
} catch (CASException e) {
throw new CollectionException(e);
}
}
示例6
@Override
public boolean doHasNext() throws IOException, CollectionException {
if (currRow.isEmpty()) {
try {
if (!rsCurrTable.next()) {
if (!getNextTable()) {
return false;
}
rowId = 0;
rsCurrTable.next();
}
rowId++;
for (String col : columns) {
currRow.put(col, rsCurrTable.getObject(col));
}
} catch (SQLException se) {
throw new IOException(se);
}
}
return !currRow.isEmpty();
}
示例7
@Override
public void getNext(JCas aJCas)
throws IOException, CollectionException
{
Resource res = nextFile();
initCas(aJCas, res);
InputStream is = null;
try {
is = res.getInputStream();
convertToCas(aJCas, is, encoding);
}
finally {
closeQuietly(is);
}
}
示例8
@Override
protected void doGetNext(JCas jCas) throws IOException, CollectionException {
supplied = true;
// @formatter:off
jCas.setDocumentText(
new StringBuilder()
.append(SENTENCE_1)
.append(SEP)
.append(SENTENCE_2)
.append(SEP)
.append(SENTENCE_3)
.append(SEP)
.append(SENTENCE_4)
.append(SEP)
.append(SENTENCE_5)
.append(SEP)
.append(SENTENCE_6)
.append(SEP)
.append(SENTENCE_7)
.append(SEP)
.append(SENTENCE_8)
.toString());
// @formatter:on
}
示例9
@Override
public final void getNext(final JCas jCas) throws IOException, CollectionException {
getMonitor().startFunction("getNext");
MetricsFactory.getInstance()
.getPipelineMetrics(monitor.getPipelineName())
.startDocumentProcess();
jCas.setDocumentText(JobSettings.class.getSimpleName());
jCas.setDocumentLanguage("en");
final JobSettings settings = new JobSettings(jCas);
for (final Map.Entry<String, String> e : config.entrySet()) {
settings.set(e.getKey(), e.getValue());
}
getMonitor().finishFunction("getNext");
}
示例10
@Test
@SuppressWarnings("squid:S2925" /* sleep required for test */)
public void testDelay()
throws CollectionException, IOException, ResourceInitializationException,
InterruptedException {
FixedRate scheduler = create("period", "1");
long start = System.currentTimeMillis();
assertTrue(scheduler.hasNext());
Thread.sleep(1000);
assertTrue(scheduler.hasNext());
Thread.sleep(1000);
assertTrue(scheduler.hasNext());
long end = System.currentTimeMillis();
long diff = end - start;
assertTrue(String.format("Diff was %d", diff), diff >= 1900 && diff <= 2100);
}
示例11
@Test
@SuppressWarnings("squid:S2925" /* sleep required for test */)
public void testDelay()
throws CollectionException, IOException, ResourceInitializationException,
InterruptedException {
FixedDelay scheduler = create("period", "1");
long start = System.currentTimeMillis();
assertTrue(scheduler.hasNext());
Thread.sleep(1000);
assertTrue(scheduler.hasNext());
Thread.sleep(1000);
assertTrue(scheduler.hasNext());
long end = System.currentTimeMillis();
System.out.println(end - start);
long diff = end - start;
assertTrue(String.format("Diff was %d", diff), diff >= 3900 && diff <= 4100);
}
示例12
@Override
protected void doGetNext(final JCas jCas) throws IOException, CollectionException {
final String source = String.join(".", activeMQ.getResourceName(), endpoint);
try {
final Message msg = consumer.receive();
if (msg instanceof TextMessage) {
final String text = ((TextMessage) msg).getText();
final InputStream is = IOUtils.toInputStream(text, Charset.defaultCharset());
extractContent(is, source, jCas);
} else {
throw new IOException(
String.format(
"Unexpected message type for message with id %s from source %s",
msg.getJMSMessageID(), source));
}
} catch (final JMSException e) {
throw new CollectionException(e);
}
}
示例13
@Override
public boolean hasNext() throws IOException, CollectionException {
if(isOneFile && reader != null && reader.hasNextLine()) {
return true;
} else {
reader = null;
return super.hasNext();
}
}
示例14
public void getNext(CAS aCAS) throws IOException, CollectionException {
JCas jcas;
try {
jcas = aCAS.getJCas();
} catch (CASException var6) {
throw new CollectionException(var6);
}
try {
if (this.xmlReader == null) {
WstxInputFactory e = new WstxInputFactory();
this.xmlReader = e.createXMLStreamReader((File) this.xmlFiles.get(this.currentParsedFile));
this.iDoc = 0;
}
this.parseSubDocument(jcas);
System.out.println(jcas.getDocumentText());
++this.iDoc;
if (this.xmlReader.getDepth() < 2) {
this.xmlReader.closeCompletely();
this.xmlReader = null;
++this.currentParsedFile;
}
} catch (XMLStreamException var4) {
var4.printStackTrace();
throw new CollectionException(var4);
} catch (Exception var5) {
var5.printStackTrace();
throw new CollectionException(var5);
}
}
示例15
/**
* Read a single sentence.
*/
private List<String[]> readSentence()
throws IOException, CollectionException {
if (!reader.hasNextLine()) {
return null;
}
List<String[]> words = new ArrayList<>();
String line;
while (reader.hasNextLine()) {
line = reader.nextLine();
if (line.contains("DOCSTART")) {
if (isOneFile) {
nextDocId = parseDocId(line);
return null;
} else {
throw new RuntimeException("There are more than DOCSTART in one document!");
}
}
if (StringUtils.isBlank(line)) {
break; // End of sentence
}
String[] fields = line.split("\t");
words.add(fields);
if (sentenceEnd == SentenceEndType.DOT
&& ".".equals(fields[0]) && !"dummy".equals(fields[1])) {
break;
}
}
return words;
}
示例16
@Override
public String getTextClassificationOutcome(JCas jcas, TextClassificationTarget unit) throws CollectionException {
List<NamedEntity> neList = JCasUtil.selectCovering(jcas, NamedEntity.class, unit);
StringBuffer outcome = new StringBuffer();
if(neList.size() == 1){
NamedEntity ne = neList.get(0);
List<Token> tokens = JCasUtil.selectCovered(jcas, Token.class, ne);
if(tokens.size() == 1){
outcome.append("W-");
} else {
for (int i = 0; i < tokens.size(); i++) {
if(tokens.get(i).getCoveredText().equals(unit.getCoveredText())
&& tokens.get(i).getBegin() == unit.getBegin()){
if(i == 0){
outcome.append("B-");
} else if(i < tokens.size() - 1){
outcome.append("M-");
} else {
outcome.append("E-");
}
}
}
}
outcome.append(ne.getValue());
} else if(neList.size() == 0){
outcome.append("OTH");
} else {
throw new CollectionException(
new Throwable("Could not get unique NER annotation to be used as TC outome. List size: " + neList.size() + " " + unit.getCoveredText()));
}
return outcome.toString();
}
示例17
@Override
public void getNext(CAS cas) throws IOException, CollectionException {
reader.getNext(cas);
JCas jcas;
try {
jcas = cas.getJCas();
}
catch (CASException e) {
throw new CollectionException(e);
}
for(Sentence sentence : JCasUtil.select(jcas, Sentence.class)) {
TextClassificationSequence sequence = new TextClassificationSequence(jcas, sentence.getBegin(),
sentence.getEnd());
sequence.addToIndexes();
for(Token token : JCasUtil.selectCovered(jcas, Token.class, sentence)){
TextClassificationTarget unit = new TextClassificationTarget(jcas, token.getBegin(), token.getEnd());
unit.setSuffix(token.getCoveredText());
unit.addToIndexes();
TextClassificationOutcome outcome = new TextClassificationOutcome(jcas, token.getBegin(), token.getEnd());
outcome.setOutcome(getTextClassificationOutcome(jcas, unit));
outcome.addToIndexes();
}
}
}
示例18
@Override
public String getTextClassificationOutcome(JCas jcas, TextClassificationTarget unit) throws CollectionException {
List<NamedEntity> neList = JCasUtil.selectCovering(jcas, NamedEntity.class, unit);
if(neList.size() == 1) {
NamedEntity ne = neList.get(0);
PositionInEntity positionInEntity = JCasUtil.selectCovered(jcas, PositionInEntity.class, ne).get(0);
return positionInEntity.getPositionInEntity() + "-" + ne.getValue();
} else if(neList.size() == 0){
return "OTH";
} else {
throw new CollectionException(
new Throwable("Could not get unique NER annotation to be used as TC outome. List size: " + neList.size() + " " + unit.getCoveredText()));
}
}
示例19
@Override
public void getNext(JCas aJCas)
throws IOException, CollectionException
{
Resource res = nextFile();
initCas(aJCas, res);
try (InputStream is = new BufferedInputStream(res.getInputStream())) {
aJCas.setDocumentText(IOUtils.toString(is, "UTF-8"));
}
String t = aJCas.getDocumentText();
int start = 0;
int end = t.indexOf('\n');
while (end >= 0) {
createSentence(aJCas, start, end);
start = end + 1;
if (start < t.length()) {
end = t.indexOf('\n', start);
}
else {
end = -1;
}
}
if (start < t.length()) {
createSentence(aJCas, start, t.length());
}
}
示例20
@Override
public void getNext(JCas aJCas) throws IOException, CollectionException
{
Resource res = nextFile();
initCas(aJCas, res);
try (LineNumberReader br = new LineNumberReader(
new InputStreamReader(res.getInputStream(), encoding))) {
new Tsv3XDeserializer().read(br, aJCas);
}
}
示例21
@Override
public boolean hasNext() throws IOException, CollectionException {
try {
Thread.sleep(DELAY);
} catch (InterruptedException e) {
// Do nothing
}
return true;
}
示例22
@Override
public void getNext(JCas aCAS) throws IOException, CollectionException
{
Resource res = nextFile();
initCas(aCAS, res);
try (InputStream is = new BufferedInputStream(
CompressionUtils.getInputStream(res.getLocation(), res.getInputStream()))) {
List<PubAnnotationDocumentSection> sections = JSONUtil.getObjectMapper().readValue(is,
PubAnnotationDocumentSection.JACKSON_LIST_TYPE_REF);
StringBuilder sb = new StringBuilder();
for (PubAnnotationDocumentSection section : sections) {
if (sb.length() != 0) {
sb.append("\n\n");
}
int begin = sb.length();
sb.append(section.getText());
int end = sb.length();
Div div = new Div(aCAS, begin, end);
div.setId(String.valueOf(section.getDivId()));
div.setDivType(section.getSection());
div.addToIndexes();
}
aCAS.setDocumentText(sb.toString());
}
}
示例23
@Override
public boolean hasNext() throws IOException, CollectionException {
if (run) {
return false;
} else {
run = true;
return true;
}
}
示例24
public boolean hasNext() throws IOException, CollectionException {
if (currentRecord < totalRecords) {
currentRecord++;
return true;
} else {
return false;
}
}
示例25
public void getNext(CAS cas) throws IOException, CollectionException {
JCas jcas;
try {
jcas = cas.getJCas();
} catch (CASException e) {
throw new CollectionException(e);
}
String docId = totalIdList.get(currentRecord);
GetResponse response = client.prepareGet(esIndex, ElasticsearchDocumentWriter.ES_TYPE_DOCUMENT, docId)
.setFields("Content", "Created").get();
jcas.setDocumentText((String) response.getField("Content").getValue());
jcas.setDocumentLanguage(language);
// Set metadata
Metadata metaCas = new Metadata(jcas);
metaCas.setDocId(docId);
String docDate = (String) response.getField("Created").getValue();
metaCas.setTimestamp(docDate);
metaCas.addToIndexes();
// heideltime
Dct dct = new Dct(jcas);
dct.setValue(docDate);
dct.addToIndexes();
currentRecord++;
logger.log(Level.FINEST, "Document ID: " + docId);
logger.log(Level.FINEST, "Document Length: " + jcas.getDocumentText().length());
}
示例26
@Override
public boolean doHasNext() throws IOException, CollectionException {
if (!colsToProcess.isEmpty()) return true;
if (!idsToProcess.isEmpty()) return true;
idsToProcess.addAll(getIds(currId));
return !idsToProcess.isEmpty();
}
示例27
@Override
@SuppressWarnings(
"squid:S2077" /* The value of col is read from the database column names and so should be safe to use in this context */)
protected void doGetNext(JCas jCas) throws IOException, CollectionException {
if (colsToProcess.isEmpty()) {
// Get next row
currId = idsToProcess.remove(0);
colsToProcess.addAll(allCols);
}
String col = colsToProcess.remove(0);
String content;
try (ResultSet rs =
conn.prepareStatement(
"SELECT `" + col + "` FROM `" + table + "` WHERE `" + idColumn + "` = " + currId)
.executeQuery()) {
if (rs.next()) {
content = rs.getObject(col).toString();
} else {
throw new IOException("Unable to get cell content - query returned no results");
}
} catch (SQLException e) {
throw new IOException("Unable to get cell content", e);
}
String sourceUrl = sqlConn.substring(5) + "." + table + "#" + currId + "." + col;
extractContent(
new ByteArrayInputStream(content.getBytes(Charset.defaultCharset())), sourceUrl, jCas);
}
示例28
/**
* Every time doHasNext() is called, check the WatchService for new events and add all new events
* to the queue. Then return true if there are files on the queue, or false otherwise.
*
* <p>If the event indicates that a file has been deleted, ensure it is removed from the queue.
*/
@Override
public boolean doHasNext() throws IOException, CollectionException {
WatchKey key;
while ((key = watcher.poll()) != null) {
for (WatchEvent<?> event : key.pollEvents()) {
processEvent(key, event);
getMonitor().meter("events").mark();
}
key.reset();
}
return !currLines.isEmpty() || !queue.isEmpty();
}
示例29
@Override
protected void doGetNext(JCas jCas) throws IOException, CollectionException {
ObjectId id = queue.remove(0);
Document docIdField = new Document(idField, id);
Document document = coll.find(docIdField).first();
if (document == null) {
getMonitor().error("No document returned from Mongo");
throw new CollectionException();
}
String content = (String) document.get(contentField);
InputStream is = IOUtils.toInputStream(content, Charset.defaultCharset());
extractContent(is, mongo.getMongoURI() + "." + collection + "#" + id, jCas);
for (Entry<String, Object> entry : document.entrySet()) {
String key = entry.getKey();
if (contentField.equals(key) || idField.equals(key)) {
continue;
} else {
processMongoMetadataField(jCas, key, entry.getValue());
}
}
if (deleteSource) {
coll.deleteOne(docIdField);
}
}
示例30
@Override
protected void doGetNext(JCas jCas) throws IOException, CollectionException {
String key = currRow.keySet().iterator().next();
Object o = currRow.remove(key);
String sourceUrl = sqlConn.substring(5) + "." + currTable + "#" + rowId + "." + key;
extractContent(
new ByteArrayInputStream(o.toString().getBytes(Charset.defaultCharset())), sourceUrl, jCas);
}