Java源码示例:org.apache.uima.collection.CollectionException

示例1
@Override
public void getNext(JCas aJCas)
    throws IOException, CollectionException
{
    Resource res = nextFile();
    initCas(aJCas, res);
    InputStream is = null;
    try {
        is = res.getInputStream();
        convertToCas(aJCas, is, encoding);
    }
    finally {
        closeQuietly(is);
    }

}
 
示例2
@Override
public void getNext(JCas aJCas)
    throws IOException, CollectionException
{
    Resource res = nextFile();
    initCas(aJCas, res);
    InputStream is = null;
    try {
        is = res.getInputStream();
        convertToCas(aJCas, is, encoding);
    }
    finally {
        closeQuietly(is);
    }

}
 
示例3
public boolean hasNext() {
  if (this.destroyed) {
    return false;
  } else {
    boolean error = true;

    boolean var3;
    try {
      boolean e = this.collectionReader.hasNext();
      error = false;
      var3 = e;
    } catch (CollectionException var8) {
      throw new IllegalStateException(var8);
    } catch (IOException var9) {
      throw new IllegalStateException(var9);
    } finally {
      if (error && this.selfDestroy) {
        this.destroy();
      }

    }

    return var3;
  }
}
 
示例4
@Override
public void getNext(CAS aCAS)
        throws IOException, CollectionException
{
    // nextTarEntry cannot be null here!
    ByteArrayOutputStream buffer = new ByteArrayOutputStream();
    int size = IOUtils.copy(tarArchiveInputStream, buffer);

    String entryName = nextTarEntry.getName();
    getLogger().debug("Loaded " + size + " bytes from " + entryName);

    // and move forward
    fastForwardToNextValidEntry();

    // and now create JCas
    InputStream inputStream = new ByteArrayInputStream(buffer.toByteArray());
    try {
        XmiCasDeserializer.deserialize(inputStream, aCAS, lenient);
    }
    catch (SAXException e) {
        throw new IOException(e);
    }
}
 
示例5
protected void fillCas(CAS cas, File file) throws IOException, CollectionException {
	String uri = file.toURI().toString();
	SourceDocumentInformation sdi;
	try {
		sdi = new SourceDocumentInformation(cas.getJCas());
		sdi.setUri(uri);
		String text = getDocumentText(file.getAbsolutePath(), this.mEncoding);
		cas.setDocumentLanguage(mLanguage.getCode());
		cas.setDocumentText(preparator.prepare(text));
		sdi.setDocumentSize((int)file.length());
		sdi.setCumulatedDocumentSize(this.currentFileByteSize);
		sdi.setCorpusSize(this.totalFileByteSize);
		sdi.setBegin(0);
		sdi.setEnd(text.length());
		sdi.setOffsetInSource(0);
		sdi.setDocumentIndex(mCurrentIndex);
		sdi.setNbDocuments(this.mFiles.size());
		
		sdi.setLastSegment(mCurrentIndex == mFiles.size() - 1);
		sdi.addToIndexes();
	} catch (CASException e) {
		throw new CollectionException(e);
	}
}
 
示例6
@Override
public boolean doHasNext() throws IOException, CollectionException {
  if (currRow.isEmpty()) {
    try {
      if (!rsCurrTable.next()) {
        if (!getNextTable()) {
          return false;
        }
        rowId = 0;
        rsCurrTable.next();
      }

      rowId++;
      for (String col : columns) {
        currRow.put(col, rsCurrTable.getObject(col));
      }
    } catch (SQLException se) {
      throw new IOException(se);
    }
  }
  return !currRow.isEmpty();
}
 
示例7
@Override
public void getNext(JCas aJCas)
    throws IOException, CollectionException
{
    Resource res = nextFile();
    initCas(aJCas, res);
    InputStream is = null;
    try {
        is = res.getInputStream();
        convertToCas(aJCas, is, encoding);
    }
    finally {
        closeQuietly(is);
    }

}
 
示例8
@Override
protected void doGetNext(JCas jCas) throws IOException, CollectionException {
  supplied = true;
  // @formatter:off
  jCas.setDocumentText(
      new StringBuilder()
          .append(SENTENCE_1)
          .append(SEP)
          .append(SENTENCE_2)
          .append(SEP)
          .append(SENTENCE_3)
          .append(SEP)
          .append(SENTENCE_4)
          .append(SEP)
          .append(SENTENCE_5)
          .append(SEP)
          .append(SENTENCE_6)
          .append(SEP)
          .append(SENTENCE_7)
          .append(SEP)
          .append(SENTENCE_8)
          .toString());
  // @formatter:on

}
 
示例9
@Override
public final void getNext(final JCas jCas) throws IOException, CollectionException {
  getMonitor().startFunction("getNext");
  MetricsFactory.getInstance()
      .getPipelineMetrics(monitor.getPipelineName())
      .startDocumentProcess();

  jCas.setDocumentText(JobSettings.class.getSimpleName());
  jCas.setDocumentLanguage("en");

  final JobSettings settings = new JobSettings(jCas);
  for (final Map.Entry<String, String> e : config.entrySet()) {
    settings.set(e.getKey(), e.getValue());
  }

  getMonitor().finishFunction("getNext");
}
 
示例10
@Test
@SuppressWarnings("squid:S2925" /* sleep required for test */)
public void testDelay()
    throws CollectionException, IOException, ResourceInitializationException,
        InterruptedException {
  FixedRate scheduler = create("period", "1");

  long start = System.currentTimeMillis();
  assertTrue(scheduler.hasNext());

  Thread.sleep(1000);

  assertTrue(scheduler.hasNext());

  Thread.sleep(1000);

  assertTrue(scheduler.hasNext());
  long end = System.currentTimeMillis();

  long diff = end - start;
  assertTrue(String.format("Diff was %d", diff), diff >= 1900 && diff <= 2100);
}
 
示例11
@Test
@SuppressWarnings("squid:S2925" /* sleep required for test */)
public void testDelay()
    throws CollectionException, IOException, ResourceInitializationException,
        InterruptedException {
  FixedDelay scheduler = create("period", "1");

  long start = System.currentTimeMillis();
  assertTrue(scheduler.hasNext());

  Thread.sleep(1000);

  assertTrue(scheduler.hasNext());

  Thread.sleep(1000);

  assertTrue(scheduler.hasNext());
  long end = System.currentTimeMillis();

  System.out.println(end - start);
  long diff = end - start;
  assertTrue(String.format("Diff was %d", diff), diff >= 3900 && diff <= 4100);
}
 
示例12
@Override
protected void doGetNext(final JCas jCas) throws IOException, CollectionException {
  final String source = String.join(".", activeMQ.getResourceName(), endpoint);

  try {
    final Message msg = consumer.receive();
    if (msg instanceof TextMessage) {
      final String text = ((TextMessage) msg).getText();
      final InputStream is = IOUtils.toInputStream(text, Charset.defaultCharset());
      extractContent(is, source, jCas);
    } else {
      throw new IOException(
          String.format(
              "Unexpected message type for message with id %s from source %s",
              msg.getJMSMessageID(), source));
    }
  } catch (final JMSException e) {
    throw new CollectionException(e);
  }
}
 
示例13
@Override
public boolean hasNext() throws IOException, CollectionException {
  if(isOneFile && reader != null && reader.hasNextLine()) {
    return true;
  } else {
    reader = null;
    return super.hasNext();
  }
}
 
示例14
public void getNext(CAS aCAS) throws IOException, CollectionException {
  JCas jcas;
  try {
    jcas = aCAS.getJCas();
  } catch (CASException var6) {
    throw new CollectionException(var6);
  }

  try {
    if (this.xmlReader == null) {
      WstxInputFactory e = new WstxInputFactory();
      this.xmlReader = e.createXMLStreamReader((File) this.xmlFiles.get(this.currentParsedFile));
      this.iDoc = 0;
    }

    this.parseSubDocument(jcas);
    System.out.println(jcas.getDocumentText());
    ++this.iDoc;
    if (this.xmlReader.getDepth() < 2) {
      this.xmlReader.closeCompletely();
      this.xmlReader = null;
      ++this.currentParsedFile;
    }

  } catch (XMLStreamException var4) {
    var4.printStackTrace();
    throw new CollectionException(var4);
  } catch (Exception var5) {
    var5.printStackTrace();
    throw new CollectionException(var5);
  }
}
 
示例15
/**
 * Read a single sentence.
 */
private List<String[]> readSentence()
        throws IOException, CollectionException {
    if (!reader.hasNextLine()) {
        return null;
    }
    List<String[]> words = new ArrayList<>();
    String line;
    while (reader.hasNextLine()) {
        line = reader.nextLine();
        if (line.contains("DOCSTART")) {
            if (isOneFile) {
                nextDocId = parseDocId(line);
                return null;
            } else {
                throw new RuntimeException("There are more than DOCSTART in one document!");
            }
        }
        if (StringUtils.isBlank(line)) {
            break; // End of sentence
        }
        String[] fields = line.split("\t");
        words.add(fields);

        if (sentenceEnd == SentenceEndType.DOT
                && ".".equals(fields[0]) && !"dummy".equals(fields[1])) {
            break;
        }
    }
    return words;
}
 
示例16
@Override
public String getTextClassificationOutcome(JCas jcas, TextClassificationTarget unit) throws CollectionException {
	List<NamedEntity> neList = JCasUtil.selectCovering(jcas, NamedEntity.class, unit);
	StringBuffer outcome = new StringBuffer();
	if(neList.size() == 1){
		NamedEntity ne = neList.get(0);
		List<Token> tokens = JCasUtil.selectCovered(jcas, Token.class, ne);

		if(tokens.size() == 1){
			outcome.append("W-");
		} else {
			for (int i = 0; i < tokens.size(); i++) {
				if(tokens.get(i).getCoveredText().equals(unit.getCoveredText())
						&& tokens.get(i).getBegin() == unit.getBegin()){
					if(i == 0){
						outcome.append("B-");
					} else if(i < tokens.size() - 1){
						outcome.append("M-");
					} else {
						outcome.append("E-");
					}
				}
			}
		}
		outcome.append(ne.getValue());

	} else if(neList.size() == 0){
		outcome.append("OTH");
	} else {
		throw new CollectionException(
				new Throwable("Could not get unique NER annotation to be used as TC outome. List size: " + neList.size() + " " + unit.getCoveredText()));
	}

	return outcome.toString();
}
 
示例17
@Override
public void getNext(CAS cas) throws IOException, CollectionException {
	reader.getNext(cas);

	JCas jcas;
       try {
           jcas = cas.getJCas();
       }
       catch (CASException e) {
           throw new CollectionException(e);
       }

	for(Sentence sentence : JCasUtil.select(jcas, Sentence.class)) {
		TextClassificationSequence sequence = new TextClassificationSequence(jcas, sentence.getBegin(),
				sentence.getEnd());
		sequence.addToIndexes();

		for(Token token : JCasUtil.selectCovered(jcas, Token.class, sentence)){
			TextClassificationTarget unit = new TextClassificationTarget(jcas, token.getBegin(), token.getEnd());
			unit.setSuffix(token.getCoveredText());
			unit.addToIndexes();

			TextClassificationOutcome outcome = new TextClassificationOutcome(jcas, token.getBegin(), token.getEnd());
			outcome.setOutcome(getTextClassificationOutcome(jcas, unit));
			outcome.addToIndexes();
		}

	}
}
 
示例18
@Override
public String getTextClassificationOutcome(JCas jcas, TextClassificationTarget unit) throws CollectionException {
	List<NamedEntity> neList = JCasUtil.selectCovering(jcas, NamedEntity.class, unit);
	if(neList.size() == 1) {
		NamedEntity ne = neList.get(0);
           PositionInEntity positionInEntity = JCasUtil.selectCovered(jcas, PositionInEntity.class, ne).get(0);
		return positionInEntity.getPositionInEntity() + "-" + ne.getValue();

	} else if(neList.size() == 0){
		return "OTH";
	} else {
		throw new CollectionException(
				new Throwable("Could not get unique NER annotation to be used as TC outome. List size: " + neList.size() + " " + unit.getCoveredText()));
	}
}
 
示例19
@Override
public void getNext(JCas aJCas)
    throws IOException, CollectionException
{
    Resource res = nextFile();
    initCas(aJCas, res);

    try (InputStream is = new BufferedInputStream(res.getInputStream())) {
        aJCas.setDocumentText(IOUtils.toString(is, "UTF-8"));
    }

    String t = aJCas.getDocumentText();
    int start = 0;
    int end = t.indexOf('\n');
    while (end >= 0) {
        createSentence(aJCas, start, end);
        start = end + 1;
        if (start < t.length()) {
            end = t.indexOf('\n', start);
        }
        else {
            end = -1;
        }
    }

    if (start < t.length()) {
        createSentence(aJCas, start, t.length());
    }
}
 
示例20
@Override
public void getNext(JCas aJCas) throws IOException, CollectionException
{
    Resource res = nextFile();
    initCas(aJCas, res);

    try (LineNumberReader br = new LineNumberReader(
            new InputStreamReader(res.getInputStream(), encoding))) {
        new Tsv3XDeserializer().read(br, aJCas);
    }
}
 
示例21
@Override
public boolean hasNext() throws IOException, CollectionException {
  try {
    Thread.sleep(DELAY);
  } catch (InterruptedException e) {
    // Do nothing
  }

  return true;
}
 
示例22
@Override
public void getNext(JCas aCAS) throws IOException, CollectionException
{
    Resource res = nextFile();
    initCas(aCAS, res);

    try (InputStream is = new BufferedInputStream(
            CompressionUtils.getInputStream(res.getLocation(), res.getInputStream()))) {
        
        List<PubAnnotationDocumentSection> sections = JSONUtil.getObjectMapper().readValue(is,
                PubAnnotationDocumentSection.JACKSON_LIST_TYPE_REF);
        
        StringBuilder sb = new StringBuilder();
        for (PubAnnotationDocumentSection section : sections) {
            if (sb.length() != 0) {
                sb.append("\n\n");
            }
            int begin = sb.length();
            sb.append(section.getText());
            int end = sb.length();
            
            Div div = new Div(aCAS, begin, end);
            div.setId(String.valueOf(section.getDivId()));
            div.setDivType(section.getSection());
            div.addToIndexes();
        }
        
        aCAS.setDocumentText(sb.toString());
    }
}
 
示例23
@Override
public boolean hasNext() throws IOException, CollectionException {
  if (run) {
    return false;
  } else {
    run = true;
    return true;
  }
}
 
示例24
public boolean hasNext() throws IOException, CollectionException {
	if (currentRecord < totalRecords) {
		currentRecord++;
		return true;
	} else {
		return false;
	}
}
 
示例25
public void getNext(CAS cas) throws IOException, CollectionException {
	JCas jcas;
	try {
		jcas = cas.getJCas();
	} catch (CASException e) {
		throw new CollectionException(e);
	}

	String docId = totalIdList.get(currentRecord);
	GetResponse response = client.prepareGet(esIndex, ElasticsearchDocumentWriter.ES_TYPE_DOCUMENT, docId)
			.setFields("Content", "Created").get();

	jcas.setDocumentText((String) response.getField("Content").getValue());
	jcas.setDocumentLanguage(language);

	// Set metadata
	Metadata metaCas = new Metadata(jcas);
	metaCas.setDocId(docId);
	String docDate = (String) response.getField("Created").getValue();
	metaCas.setTimestamp(docDate);
	metaCas.addToIndexes();

	// heideltime
	Dct dct = new Dct(jcas);
	dct.setValue(docDate);
	dct.addToIndexes();

	currentRecord++;

	logger.log(Level.FINEST, "Document ID: " + docId);
	logger.log(Level.FINEST, "Document Length: " + jcas.getDocumentText().length());
}
 
示例26
@Override
public boolean doHasNext() throws IOException, CollectionException {
  if (!colsToProcess.isEmpty()) return true;

  if (!idsToProcess.isEmpty()) return true;

  idsToProcess.addAll(getIds(currId));
  return !idsToProcess.isEmpty();
}
 
示例27
@Override
@SuppressWarnings(
    "squid:S2077" /* The value of col is read from the database column names and so should be safe to use in this context */)
protected void doGetNext(JCas jCas) throws IOException, CollectionException {
  if (colsToProcess.isEmpty()) {
    // Get next row
    currId = idsToProcess.remove(0);

    colsToProcess.addAll(allCols);
  }

  String col = colsToProcess.remove(0);

  String content;

  try (ResultSet rs =
      conn.prepareStatement(
              "SELECT `" + col + "` FROM `" + table + "` WHERE `" + idColumn + "` = " + currId)
          .executeQuery()) {
    if (rs.next()) {
      content = rs.getObject(col).toString();
    } else {
      throw new IOException("Unable to get cell content - query returned no results");
    }

  } catch (SQLException e) {
    throw new IOException("Unable to get cell content", e);
  }

  String sourceUrl = sqlConn.substring(5) + "." + table + "#" + currId + "." + col;

  extractContent(
      new ByteArrayInputStream(content.getBytes(Charset.defaultCharset())), sourceUrl, jCas);
}
 
示例28
/**
 * Every time doHasNext() is called, check the WatchService for new events and add all new events
 * to the queue. Then return true if there are files on the queue, or false otherwise.
 *
 * <p>If the event indicates that a file has been deleted, ensure it is removed from the queue.
 */
@Override
public boolean doHasNext() throws IOException, CollectionException {
  WatchKey key;
  while ((key = watcher.poll()) != null) {
    for (WatchEvent<?> event : key.pollEvents()) {
      processEvent(key, event);
      getMonitor().meter("events").mark();
    }

    key.reset();
  }

  return !currLines.isEmpty() || !queue.isEmpty();
}
 
示例29
@Override
protected void doGetNext(JCas jCas) throws IOException, CollectionException {
  ObjectId id = queue.remove(0);

  Document docIdField = new Document(idField, id);
  Document document = coll.find(docIdField).first();

  if (document == null) {
    getMonitor().error("No document returned from Mongo");
    throw new CollectionException();
  }

  String content = (String) document.get(contentField);

  InputStream is = IOUtils.toInputStream(content, Charset.defaultCharset());

  extractContent(is, mongo.getMongoURI() + "." + collection + "#" + id, jCas);

  for (Entry<String, Object> entry : document.entrySet()) {
    String key = entry.getKey();
    if (contentField.equals(key) || idField.equals(key)) {
      continue;
    } else {
      processMongoMetadataField(jCas, key, entry.getValue());
    }
  }

  if (deleteSource) {
    coll.deleteOne(docIdField);
  }
}
 
示例30
@Override
protected void doGetNext(JCas jCas) throws IOException, CollectionException {
  String key = currRow.keySet().iterator().next();

  Object o = currRow.remove(key);

  String sourceUrl = sqlConn.substring(5) + "." + currTable + "#" + rowId + "." + key;
  extractContent(
      new ByteArrayInputStream(o.toString().getBytes(Charset.defaultCharset())), sourceUrl, jCas);
}