Java源码示例:org.apache.orc.storage.ql.exec.vector.VectorizedRowBatch

示例1
@Override
public VectorizedRowBatch next() {
  // make sure we have the next batch
  advance();
  // mark it as used
  advanced = false;
  return batch;
}
 
示例2
@SuppressWarnings("unchecked")
@Override
public void write(Record value, VectorizedRowBatch output) throws IOException {
  int row = output.size++;
  for (int c = 0; c < converters.length; ++c) {
    converters[c].addValue(row, value.get(c, converters[c].getJavaClass()), output.cols[c]);
  }
}
 
示例3
@Override
public void write(InternalRow value, VectorizedRowBatch output) {
  int row = output.size++;
  for (int c = 0; c < converters.length; ++c) {
    converters[c].addValue(row, c, value, output.cols[c]);
  }
}
 
示例4
@Override
public void add(VectorizedRowBatch datum) {
  try {
    writer.addRowBatch(datum);
  } catch (IOException e) {
    throw new RuntimeException("Problem writing to ORC file " + path, e);
  }
}
 
示例5
@Override
public VectorizedRowBatch next() {
  // make sure we have the next batch
  advance();
  // mark it as used
  advanced = false;
  return batch;
}
 
示例6
@Override
public BulkWriter<RowData> create(FSDataOutputStream out) throws IOException {
	OrcFile.WriterOptions opts = OrcFile.writerOptions(new Properties(), conf);
	TypeDescription description = TypeDescription.fromString(schema);
	opts.setSchema(description);
	opts.physicalWriter(new PhysicalWriterImpl(out, opts));
	WriterImpl writer = new WriterImpl(null, new Path("."), opts);

	VectorizedRowBatch rowBatch = description.createRowBatch();
	return new BulkWriter<RowData>() {
		@Override
		public void addElement(RowData row) throws IOException {
			int rowId = rowBatch.size++;
			for (int i = 0; i < row.getArity(); ++i) {
				setColumn(rowId, rowBatch.cols[i], fieldTypes[i], row, i);
			}
			if (rowBatch.size == rowBatch.getMaxSize()) {
				writer.addRowBatch(rowBatch);
				rowBatch.reset();
			}
		}

		@Override
		public void flush() throws IOException {
			if (rowBatch.size != 0) {
				writer.addRowBatch(rowBatch);
				rowBatch.reset();
			}
		}

		@Override
		public void finish() throws IOException {
			flush();
			writer.close();
		}
	};
}
 
示例7
public <D> FileAppender<D> build() {
  Preconditions.checkNotNull(schema, "Schema is required");
  return new OrcFileAppender<>(schema,
      this.file, createWriterFunc, conf, metadata,
      conf.getInt(VECTOR_ROW_BATCH_SIZE, VectorizedRowBatch.DEFAULT_SIZE));
}
 
示例8
@Override
public Record read(VectorizedRowBatch batch, int row) {
  return (Record) reader.read(new StructColumnVector(batch.size, batch.cols), row);
}
 
示例9
@Override
public InternalRow read(VectorizedRowBatch batch, int row) {
  return (InternalRow) reader.read(new StructColumnVector(batch.size, batch.cols), row);
}
 
示例10
@Override
public boolean nextBatch(RecordReader reader, VectorizedRowBatch rowBatch) throws IOException {
	return reader.nextBatch(rowBatch);
}
 
示例11
/**
 * Util for generating partitioned {@link OrcColumnarRowSplitReader}.
 */
public static OrcColumnarRowSplitReader<VectorizedRowBatch> genPartColumnarRowReader(
		Configuration conf,
		String[] fullFieldNames,
		DataType[] fullFieldTypes,
		Map<String, Object> partitionSpec,
		int[] selectedFields,
		List<OrcSplitReader.Predicate> conjunctPredicates,
		int batchSize,
		Path path,
		long splitStart,
		long splitLength) throws IOException {

	List<String> nonPartNames = getNonPartNames(fullFieldNames, partitionSpec);

	int[] selectedOrcFields = getSelectedOrcFields(fullFieldNames, selectedFields, nonPartNames);

	OrcColumnarRowSplitReader.ColumnBatchGenerator<VectorizedRowBatch> gen = (VectorizedRowBatch rowBatch) -> {
		// create and initialize the row batch
		ColumnVector[] vectors = new ColumnVector[selectedFields.length];
		for (int i = 0; i < vectors.length; i++) {
			String name = fullFieldNames[selectedFields[i]];
			LogicalType type = fullFieldTypes[selectedFields[i]].getLogicalType();
			vectors[i] = partitionSpec.containsKey(name) ?
					createFlinkVectorFromConstant(type, partitionSpec.get(name), batchSize) :
					createFlinkVector(rowBatch.cols[nonPartNames.indexOf(name)]);
		}
		return new VectorizedColumnBatch(vectors);
	};

	return new OrcColumnarRowSplitReader<>(
			new OrcNoHiveShim(),
			conf,
			convertToOrcTypeWithPart(fullFieldNames, fullFieldTypes, partitionSpec.keySet()),
			selectedOrcFields,
			gen,
			conjunctPredicates,
			batchSize,
			path,
			splitStart,
			splitLength);
}
 
示例12
public OrcNoHiveBatchWrapper(VectorizedRowBatch batch) {
	this.batch = batch;
}
 
示例13
@Override
public VectorizedRowBatch getBatch() {
	return batch;
}
 
示例14
@Override
protected void prepareReadFileWithTypes(String file, int rowSize) throws IOException {
	// NOTE: orc has field name information, so name should be same as orc
	TypeDescription schema =
			TypeDescription.fromString(
					"struct<" +
							"f0:float," +
							"f1:double," +
							"f2:timestamp," +
							"f3:tinyint," +
							"f4:smallint" +
							">");

	org.apache.hadoop.fs.Path filePath = new org.apache.hadoop.fs.Path(file);
	Configuration conf = new Configuration();

	Writer writer =
			OrcFile.createWriter(filePath,
					OrcFile.writerOptions(conf).setSchema(schema));

	VectorizedRowBatch batch = schema.createRowBatch(rowSize);
	DoubleColumnVector col0 = (DoubleColumnVector) batch.cols[0];
	DoubleColumnVector col1 = (DoubleColumnVector) batch.cols[1];
	TimestampColumnVector col2 = (TimestampColumnVector) batch.cols[2];
	LongColumnVector col3 = (LongColumnVector) batch.cols[3];
	LongColumnVector col4 = (LongColumnVector) batch.cols[4];

	col0.noNulls = false;
	col1.noNulls = false;
	col2.noNulls = false;
	col3.noNulls = false;
	col4.noNulls = false;
	for (int i = 0; i < rowSize - 1; i++) {
		col0.vector[i] = i;
		col1.vector[i] = i;

		Timestamp timestamp = toTimestamp(i);
		col2.time[i] = timestamp.getTime();
		col2.nanos[i] = timestamp.getNanos();

		col3.vector[i] = i;
		col4.vector[i] = i;
	}

	col0.isNull[rowSize - 1] = true;
	col1.isNull[rowSize - 1] = true;
	col2.isNull[rowSize - 1] = true;
	col3.isNull[rowSize - 1] = true;
	col4.isNull[rowSize - 1] = true;

	batch.size = rowSize;
	writer.addRowBatch(batch);
	batch.reset();
	writer.close();
}
 
示例15
/**
 * Writes the data.
 *
 * @param value the data value to write.
 * @param output the VectorizedRowBatch to which the output will be written.
 * @throws IOException if there's any IO error while writing the data value.
 */
void write(T value, VectorizedRowBatch output) throws IOException;
 
示例16
/**
 * Reads a row.
 */
T read(VectorizedRowBatch batch, int row);