Java源码示例:org.apache.orc.storage.ql.exec.vector.VectorizedRowBatch
示例1
@Override
public VectorizedRowBatch next() {
// make sure we have the next batch
advance();
// mark it as used
advanced = false;
return batch;
}
示例2
@SuppressWarnings("unchecked")
@Override
public void write(Record value, VectorizedRowBatch output) throws IOException {
int row = output.size++;
for (int c = 0; c < converters.length; ++c) {
converters[c].addValue(row, value.get(c, converters[c].getJavaClass()), output.cols[c]);
}
}
示例3
@Override
public void write(InternalRow value, VectorizedRowBatch output) {
int row = output.size++;
for (int c = 0; c < converters.length; ++c) {
converters[c].addValue(row, c, value, output.cols[c]);
}
}
示例4
@Override
public void add(VectorizedRowBatch datum) {
try {
writer.addRowBatch(datum);
} catch (IOException e) {
throw new RuntimeException("Problem writing to ORC file " + path, e);
}
}
示例5
@Override
public VectorizedRowBatch next() {
// make sure we have the next batch
advance();
// mark it as used
advanced = false;
return batch;
}
示例6
@Override
public BulkWriter<RowData> create(FSDataOutputStream out) throws IOException {
OrcFile.WriterOptions opts = OrcFile.writerOptions(new Properties(), conf);
TypeDescription description = TypeDescription.fromString(schema);
opts.setSchema(description);
opts.physicalWriter(new PhysicalWriterImpl(out, opts));
WriterImpl writer = new WriterImpl(null, new Path("."), opts);
VectorizedRowBatch rowBatch = description.createRowBatch();
return new BulkWriter<RowData>() {
@Override
public void addElement(RowData row) throws IOException {
int rowId = rowBatch.size++;
for (int i = 0; i < row.getArity(); ++i) {
setColumn(rowId, rowBatch.cols[i], fieldTypes[i], row, i);
}
if (rowBatch.size == rowBatch.getMaxSize()) {
writer.addRowBatch(rowBatch);
rowBatch.reset();
}
}
@Override
public void flush() throws IOException {
if (rowBatch.size != 0) {
writer.addRowBatch(rowBatch);
rowBatch.reset();
}
}
@Override
public void finish() throws IOException {
flush();
writer.close();
}
};
}
示例7
public <D> FileAppender<D> build() {
Preconditions.checkNotNull(schema, "Schema is required");
return new OrcFileAppender<>(schema,
this.file, createWriterFunc, conf, metadata,
conf.getInt(VECTOR_ROW_BATCH_SIZE, VectorizedRowBatch.DEFAULT_SIZE));
}
示例8
@Override
public Record read(VectorizedRowBatch batch, int row) {
return (Record) reader.read(new StructColumnVector(batch.size, batch.cols), row);
}
示例9
@Override
public InternalRow read(VectorizedRowBatch batch, int row) {
return (InternalRow) reader.read(new StructColumnVector(batch.size, batch.cols), row);
}
示例10
@Override
public boolean nextBatch(RecordReader reader, VectorizedRowBatch rowBatch) throws IOException {
return reader.nextBatch(rowBatch);
}
示例11
/**
* Util for generating partitioned {@link OrcColumnarRowSplitReader}.
*/
public static OrcColumnarRowSplitReader<VectorizedRowBatch> genPartColumnarRowReader(
Configuration conf,
String[] fullFieldNames,
DataType[] fullFieldTypes,
Map<String, Object> partitionSpec,
int[] selectedFields,
List<OrcSplitReader.Predicate> conjunctPredicates,
int batchSize,
Path path,
long splitStart,
long splitLength) throws IOException {
List<String> nonPartNames = getNonPartNames(fullFieldNames, partitionSpec);
int[] selectedOrcFields = getSelectedOrcFields(fullFieldNames, selectedFields, nonPartNames);
OrcColumnarRowSplitReader.ColumnBatchGenerator<VectorizedRowBatch> gen = (VectorizedRowBatch rowBatch) -> {
// create and initialize the row batch
ColumnVector[] vectors = new ColumnVector[selectedFields.length];
for (int i = 0; i < vectors.length; i++) {
String name = fullFieldNames[selectedFields[i]];
LogicalType type = fullFieldTypes[selectedFields[i]].getLogicalType();
vectors[i] = partitionSpec.containsKey(name) ?
createFlinkVectorFromConstant(type, partitionSpec.get(name), batchSize) :
createFlinkVector(rowBatch.cols[nonPartNames.indexOf(name)]);
}
return new VectorizedColumnBatch(vectors);
};
return new OrcColumnarRowSplitReader<>(
new OrcNoHiveShim(),
conf,
convertToOrcTypeWithPart(fullFieldNames, fullFieldTypes, partitionSpec.keySet()),
selectedOrcFields,
gen,
conjunctPredicates,
batchSize,
path,
splitStart,
splitLength);
}
示例12
public OrcNoHiveBatchWrapper(VectorizedRowBatch batch) {
this.batch = batch;
}
示例13
@Override
public VectorizedRowBatch getBatch() {
return batch;
}
示例14
@Override
protected void prepareReadFileWithTypes(String file, int rowSize) throws IOException {
// NOTE: orc has field name information, so name should be same as orc
TypeDescription schema =
TypeDescription.fromString(
"struct<" +
"f0:float," +
"f1:double," +
"f2:timestamp," +
"f3:tinyint," +
"f4:smallint" +
">");
org.apache.hadoop.fs.Path filePath = new org.apache.hadoop.fs.Path(file);
Configuration conf = new Configuration();
Writer writer =
OrcFile.createWriter(filePath,
OrcFile.writerOptions(conf).setSchema(schema));
VectorizedRowBatch batch = schema.createRowBatch(rowSize);
DoubleColumnVector col0 = (DoubleColumnVector) batch.cols[0];
DoubleColumnVector col1 = (DoubleColumnVector) batch.cols[1];
TimestampColumnVector col2 = (TimestampColumnVector) batch.cols[2];
LongColumnVector col3 = (LongColumnVector) batch.cols[3];
LongColumnVector col4 = (LongColumnVector) batch.cols[4];
col0.noNulls = false;
col1.noNulls = false;
col2.noNulls = false;
col3.noNulls = false;
col4.noNulls = false;
for (int i = 0; i < rowSize - 1; i++) {
col0.vector[i] = i;
col1.vector[i] = i;
Timestamp timestamp = toTimestamp(i);
col2.time[i] = timestamp.getTime();
col2.nanos[i] = timestamp.getNanos();
col3.vector[i] = i;
col4.vector[i] = i;
}
col0.isNull[rowSize - 1] = true;
col1.isNull[rowSize - 1] = true;
col2.isNull[rowSize - 1] = true;
col3.isNull[rowSize - 1] = true;
col4.isNull[rowSize - 1] = true;
batch.size = rowSize;
writer.addRowBatch(batch);
batch.reset();
writer.close();
}
示例15
/**
* Writes the data.
*
* @param value the data value to write.
* @param output the VectorizedRowBatch to which the output will be written.
* @throws IOException if there's any IO error while writing the data value.
*/
void write(T value, VectorizedRowBatch output) throws IOException;
示例16
/**
* Reads a row.
*/
T read(VectorizedRowBatch batch, int row);