Java源码示例:org.apache.parquet.io.OutputFile
示例1
private static <T> ParquetWriter<T> createAvroParquetWriter(
String schemaString,
GenericData dataModel,
OutputFile out) throws IOException {
final Schema schema = new Schema.Parser().parse(schemaString);
return AvroParquetWriter.<T>builder(out)
.withSchema(schema)
.withDataModel(dataModel)
.build();
}
示例2
private static <T> ParquetWriter<T> createAvroParquetWriter(
String schemaString,
GenericData dataModel,
OutputFile out) throws IOException {
final Schema schema = new Schema.Parser().parse(schemaString);
return AvroParquetWriter.<T>builder(out)
.withSchema(schema)
.withDataModel(dataModel)
.build();
}
示例3
public ParquetRowDataBuilder(
OutputFile path,
RowType rowType,
boolean utcTimestamp) {
super(path);
this.rowType = rowType;
this.utcTimestamp = utcTimestamp;
}
示例4
@Override
public ParquetWriter<RowData> createWriter(OutputFile out) throws IOException {
Configuration conf = configuration.conf();
return new ParquetRowDataBuilder(out, rowType, utcTimestamp)
.withCompressionCodec(getParquetCompressionCodec(conf))
.withRowGroupSize(getBlockSize(conf))
.withPageSize(getPageSize(conf))
.withDictionaryPageSize(getDictionaryPageSize(conf))
.withMaxPaddingSize(conf.getInt(
MAX_PADDING_BYTES, ParquetWriter.MAX_PADDING_SIZE_DEFAULT))
.withDictionaryEncoding(getEnableDictionary(conf))
.withValidation(getValidation(conf))
.withWriterVersion(getWriterVersion(conf))
.withConf(conf).build();
}
示例5
private static <T> ParquetWriter<T> createAvroParquetWriter(
String schemaString,
GenericData dataModel,
OutputFile out) throws IOException {
final Schema schema = new Schema.Parser().parse(schemaString);
return AvroParquetWriter.<T>builder(out)
.withSchema(schema)
.withDataModel(dataModel)
.build();
}
示例6
/**
* @param file OutputFile to create or overwrite
* @param schema the schema of the data
* @param mode file creation mode
* @param rowGroupSize the row group size
* @param maxPaddingSize the maximum padding
* @throws IOException if the file can not be created
* @deprecated will be removed in 2.0.0
*/
@Deprecated
public ParquetFileWriter(OutputFile file, MessageType schema, Mode mode,
long rowGroupSize, int maxPaddingSize)
throws IOException {
this(file, schema, mode, rowGroupSize, maxPaddingSize,
ParquetProperties.DEFAULT_COLUMN_INDEX_TRUNCATE_LENGTH,
ParquetProperties.DEFAULT_STATISTICS_TRUNCATE_LENGTH,
ParquetProperties.DEFAULT_PAGE_WRITE_CHECKSUM_ENABLED);
}
示例7
/**
* @param file OutputFile to create or overwrite
* @param schema the schema of the data
* @param mode file creation mode
* @param rowGroupSize the row group size
* @param maxPaddingSize the maximum padding
* @param columnIndexTruncateLength the length which the min/max values in column indexes tried to be truncated to
* @param statisticsTruncateLength the length which the min/max values in row groups tried to be truncated to
* @param pageWriteChecksumEnabled whether to write out page level checksums
* @throws IOException if the file can not be created
*/
public ParquetFileWriter(OutputFile file, MessageType schema, Mode mode,
long rowGroupSize, int maxPaddingSize, int columnIndexTruncateLength,
int statisticsTruncateLength, boolean pageWriteChecksumEnabled)
throws IOException {
TypeUtil.checkValidWriteSchema(schema);
this.schema = schema;
long blockSize = rowGroupSize;
if (file.supportsBlockSize()) {
blockSize = Math.max(file.defaultBlockSize(), rowGroupSize);
this.alignment = PaddingAlignment.get(blockSize, rowGroupSize, maxPaddingSize);
} else {
this.alignment = NoAlignment.get(rowGroupSize);
}
if (mode == Mode.OVERWRITE) {
this.out = file.createOrOverwrite(blockSize);
} else {
this.out = file.create(blockSize);
}
this.encodingStatsBuilder = new EncodingStats.Builder();
this.columnIndexTruncateLength = columnIndexTruncateLength;
this.pageWriteChecksumEnabled = pageWriteChecksumEnabled;
this.crc = pageWriteChecksumEnabled ? new CRC32() : null;
this.metadataConverter = new ParquetMetadataConverter(statisticsTruncateLength);
}
示例8
ParquetWriter(
OutputFile file,
ParquetFileWriter.Mode mode,
WriteSupport<T> writeSupport,
CompressionCodecName compressionCodecName,
int rowGroupSize,
boolean validating,
Configuration conf,
int maxPaddingSize,
ParquetProperties encodingProps) throws IOException {
WriteSupport.WriteContext writeContext = writeSupport.init(conf);
MessageType schema = writeContext.getSchema();
ParquetFileWriter fileWriter = new ParquetFileWriter(
file, schema, mode, rowGroupSize, maxPaddingSize,
encodingProps.getColumnIndexTruncateLength(), encodingProps.getStatisticsTruncateLength(),
encodingProps.getPageWriteChecksumEnabled());
fileWriter.start();
this.codecFactory = new CodecFactory(conf, encodingProps.getPageSizeThreshold());
CodecFactory.BytesCompressor compressor = codecFactory.getCompressor(compressionCodecName);
this.writer = new InternalParquetRecordWriter<T>(
fileWriter,
writeSupport,
schema,
writeContext.getExtraMetaData(),
rowGroupSize,
compressor,
validating,
encodingProps);
}
示例9
public WriteParquetResult(final Schema schema, final OutputStream out, final ParquetConfig parquetConfig, final ComponentLog componentLogger) throws IOException {
super(out);
this.schema = schema;
this.componentLogger = componentLogger;
final Configuration conf = new Configuration();
final OutputFile outputFile = new NifiParquetOutputFile(out);
final AvroParquetWriter.Builder<GenericRecord> writerBuilder =
AvroParquetWriter.<GenericRecord>builder(outputFile).withSchema(schema);
applyCommonConfig(writerBuilder, conf, parquetConfig);
parquetWriter = writerBuilder.build();
}
示例10
@Override
public BulkWriter<T> create(FSDataOutputStream stream) throws IOException {
final OutputFile out = new StreamOutputFile(stream);
final ParquetWriter<T> writer = writerBuilder.createWriter(out);
return new ParquetBulkWriter<>(writer);
}
示例11
@Override
public BulkWriter<T> create(FSDataOutputStream stream) throws IOException {
final OutputFile out = new StreamOutputFile(stream);
final ParquetWriter<T> writer = writerBuilder.createWriter(out);
return new ParquetBulkWriter<>(writer);
}
示例12
private ParquetOutputFile(org.apache.iceberg.io.OutputFile file) {
this.file = file;
}
示例13
private ParquetOutputFile(com.netflix.iceberg.io.OutputFile file) {
this.file = file;
}
示例14
@Override
public BulkWriter<T> create(FSDataOutputStream stream) throws IOException {
final OutputFile out = new StreamOutputFile(stream);
final ParquetWriter<T> writer = writerBuilder.createWriter(out);
return new ParquetBulkWriter<>(writer);
}
示例15
private Builder(OutputFile file) {
super(file);
}
示例16
protected Builder(OutputFile path) {
this.file = path;
}
示例17
/**
* Test whether corruption in the page content is detected by checksum verification
*/
@Test
public void testCorruptedPage() throws IOException {
Configuration conf = new Configuration();
conf.setBoolean(ParquetOutputFormat.PAGE_WRITE_CHECKSUM_ENABLED, true);
Path path = writeSimpleParquetFile(conf, CompressionCodecName.UNCOMPRESSED);
InputFile inputFile = HadoopInputFile.fromPath(path, conf);
try (SeekableInputStream inputStream = inputFile.newStream()) {
int fileLen = (int) inputFile.getLength();
byte[] fileBytes = new byte[fileLen];
inputStream.readFully(fileBytes);
inputStream.close();
// There are 4 pages in total (2 per column), we corrupt the first page of the first column
// and the second page of the second column. We do this by altering a byte roughly in the
// middle of each page to be corrupted
fileBytes[fileLen / 8]++;
fileBytes[fileLen / 8 + ((fileLen / 4) * 3)]++;
OutputFile outputFile = HadoopOutputFile.fromPath(path, conf);
try (PositionOutputStream outputStream = outputFile.createOrOverwrite(1024 * 1024)) {
outputStream.write(fileBytes);
outputStream.close();
// First we disable checksum verification, the corruption will go undetected as it is in the
// data section of the page
conf.setBoolean(ParquetInputFormat.PAGE_VERIFY_CHECKSUM_ENABLED, false);
try (ParquetFileReader reader = getParquetFileReader(path, conf,
Arrays.asList(colADesc, colBDesc))) {
PageReadStore pageReadStore = reader.readNextRowGroup();
DataPageV1 colAPage1 = readNextPage(colADesc, pageReadStore);
assertFalse("Data in page was not corrupted",
Arrays.equals(colAPage1.getBytes().toByteArray(), colAPage1Bytes));
readNextPage(colADesc, pageReadStore);
readNextPage(colBDesc, pageReadStore);
DataPageV1 colBPage2 = readNextPage(colBDesc, pageReadStore);
assertFalse("Data in page was not corrupted",
Arrays.equals(colBPage2.getBytes().toByteArray(), colBPage2Bytes));
}
// Now we enable checksum verification, the corruption should be detected
conf.setBoolean(ParquetInputFormat.PAGE_VERIFY_CHECKSUM_ENABLED, true);
try (ParquetFileReader reader =
getParquetFileReader(path, conf, Arrays.asList(colADesc, colBDesc))) {
// We expect an exception on the first encountered corrupt page (in readAllPages)
assertVerificationFailed(reader);
}
}
}
}
示例18
public static <T> Builder<T> builder(OutputFile file) {
return new Builder<T>(file);
}
示例19
private Builder(OutputFile file) {
super(file);
}
示例20
/**
* Creates and configures a parquet writer to the given output file.
*/
ParquetWriter<T> createWriter(OutputFile out) throws IOException;
示例21
/**
* Creates and configures a parquet writer to the given output file.
*/
ParquetWriter<T> createWriter(OutputFile out) throws IOException;
示例22
/**
* Creates and configures a parquet writer to the given output file.
*/
ParquetWriter<T> createWriter(OutputFile out) throws IOException;
示例23
/**
* Creates a Builder for configuring ParquetWriter with the example object
* model. THIS IS AN EXAMPLE ONLY AND NOT INTENDED FOR USE.
*
* @param file the output file to create
* @return a {@link Builder} to create a {@link ParquetWriter}
*/
public static Builder builder(OutputFile file) {
return new Builder(file);
}