Java源码示例:org.apache.parquet.io.OutputFile

示例1
private static <T> ParquetWriter<T> createAvroParquetWriter(
		String schemaString,
		GenericData dataModel,
		OutputFile out) throws IOException {

	final Schema schema = new Schema.Parser().parse(schemaString);

	return AvroParquetWriter.<T>builder(out)
			.withSchema(schema)
			.withDataModel(dataModel)
			.build();
}
 
示例2
private static <T> ParquetWriter<T> createAvroParquetWriter(
		String schemaString,
		GenericData dataModel,
		OutputFile out) throws IOException {

	final Schema schema = new Schema.Parser().parse(schemaString);

	return AvroParquetWriter.<T>builder(out)
			.withSchema(schema)
			.withDataModel(dataModel)
			.build();
}
 
示例3
public ParquetRowDataBuilder(
		OutputFile path,
		RowType rowType,
		boolean utcTimestamp) {
	super(path);
	this.rowType = rowType;
	this.utcTimestamp = utcTimestamp;
}
 
示例4
@Override
public ParquetWriter<RowData> createWriter(OutputFile out) throws IOException {
	Configuration conf = configuration.conf();
	return new ParquetRowDataBuilder(out, rowType, utcTimestamp)
			.withCompressionCodec(getParquetCompressionCodec(conf))
			.withRowGroupSize(getBlockSize(conf))
			.withPageSize(getPageSize(conf))
			.withDictionaryPageSize(getDictionaryPageSize(conf))
			.withMaxPaddingSize(conf.getInt(
					MAX_PADDING_BYTES, ParquetWriter.MAX_PADDING_SIZE_DEFAULT))
			.withDictionaryEncoding(getEnableDictionary(conf))
			.withValidation(getValidation(conf))
			.withWriterVersion(getWriterVersion(conf))
			.withConf(conf).build();
}
 
示例5
private static <T> ParquetWriter<T> createAvroParquetWriter(
		String schemaString,
		GenericData dataModel,
		OutputFile out) throws IOException {

	final Schema schema = new Schema.Parser().parse(schemaString);

	return AvroParquetWriter.<T>builder(out)
			.withSchema(schema)
			.withDataModel(dataModel)
			.build();
}
 
示例6
/**
 * @param file OutputFile to create or overwrite
 * @param schema the schema of the data
 * @param mode file creation mode
 * @param rowGroupSize the row group size
 * @param maxPaddingSize the maximum padding
 * @throws IOException if the file can not be created
 * @deprecated will be removed in 2.0.0
 */
@Deprecated
public ParquetFileWriter(OutputFile file, MessageType schema, Mode mode,
                         long rowGroupSize, int maxPaddingSize)
    throws IOException {
  this(file, schema, mode, rowGroupSize, maxPaddingSize,
      ParquetProperties.DEFAULT_COLUMN_INDEX_TRUNCATE_LENGTH,
    ParquetProperties.DEFAULT_STATISTICS_TRUNCATE_LENGTH,
      ParquetProperties.DEFAULT_PAGE_WRITE_CHECKSUM_ENABLED);
}
 
示例7
/**
 * @param file OutputFile to create or overwrite
 * @param schema the schema of the data
 * @param mode file creation mode
 * @param rowGroupSize the row group size
 * @param maxPaddingSize the maximum padding
 * @param columnIndexTruncateLength the length which the min/max values in column indexes tried to be truncated to
 * @param statisticsTruncateLength the length which the min/max values in row groups tried to be truncated to
 * @param pageWriteChecksumEnabled whether to write out page level checksums
 * @throws IOException if the file can not be created
 */
public ParquetFileWriter(OutputFile file, MessageType schema, Mode mode,
                         long rowGroupSize, int maxPaddingSize, int columnIndexTruncateLength,
                         int statisticsTruncateLength, boolean pageWriteChecksumEnabled)
  throws IOException {
  TypeUtil.checkValidWriteSchema(schema);

  this.schema = schema;

  long blockSize = rowGroupSize;
  if (file.supportsBlockSize()) {
    blockSize = Math.max(file.defaultBlockSize(), rowGroupSize);
    this.alignment = PaddingAlignment.get(blockSize, rowGroupSize, maxPaddingSize);
  } else {
    this.alignment = NoAlignment.get(rowGroupSize);
  }

  if (mode == Mode.OVERWRITE) {
    this.out = file.createOrOverwrite(blockSize);
  } else {
    this.out = file.create(blockSize);
  }

  this.encodingStatsBuilder = new EncodingStats.Builder();
  this.columnIndexTruncateLength = columnIndexTruncateLength;
  this.pageWriteChecksumEnabled = pageWriteChecksumEnabled;
  this.crc = pageWriteChecksumEnabled ? new CRC32() : null;

  this.metadataConverter = new ParquetMetadataConverter(statisticsTruncateLength);
}
 
示例8
ParquetWriter(
    OutputFile file,
    ParquetFileWriter.Mode mode,
    WriteSupport<T> writeSupport,
    CompressionCodecName compressionCodecName,
    int rowGroupSize,
    boolean validating,
    Configuration conf,
    int maxPaddingSize,
    ParquetProperties encodingProps) throws IOException {

  WriteSupport.WriteContext writeContext = writeSupport.init(conf);
  MessageType schema = writeContext.getSchema();

  ParquetFileWriter fileWriter = new ParquetFileWriter(
    file, schema, mode, rowGroupSize, maxPaddingSize,
    encodingProps.getColumnIndexTruncateLength(), encodingProps.getStatisticsTruncateLength(),
    encodingProps.getPageWriteChecksumEnabled());
  fileWriter.start();

  this.codecFactory = new CodecFactory(conf, encodingProps.getPageSizeThreshold());
  CodecFactory.BytesCompressor compressor =	codecFactory.getCompressor(compressionCodecName);
  this.writer = new InternalParquetRecordWriter<T>(
      fileWriter,
      writeSupport,
      schema,
      writeContext.getExtraMetaData(),
      rowGroupSize,
      compressor,
      validating,
      encodingProps);
}
 
示例9
public WriteParquetResult(final Schema schema, final OutputStream out, final ParquetConfig parquetConfig, final ComponentLog componentLogger) throws IOException {
    super(out);
    this.schema = schema;
    this.componentLogger = componentLogger;

    final Configuration conf = new Configuration();
    final OutputFile outputFile = new NifiParquetOutputFile(out);

    final AvroParquetWriter.Builder<GenericRecord> writerBuilder =
            AvroParquetWriter.<GenericRecord>builder(outputFile).withSchema(schema);
    applyCommonConfig(writerBuilder, conf, parquetConfig);
    parquetWriter = writerBuilder.build();
}
 
示例10
@Override
public BulkWriter<T> create(FSDataOutputStream stream) throws IOException {
	final OutputFile out = new StreamOutputFile(stream);
	final ParquetWriter<T> writer = writerBuilder.createWriter(out);
	return new ParquetBulkWriter<>(writer);
}
 
示例11
@Override
public BulkWriter<T> create(FSDataOutputStream stream) throws IOException {
	final OutputFile out = new StreamOutputFile(stream);
	final ParquetWriter<T> writer = writerBuilder.createWriter(out);
	return new ParquetBulkWriter<>(writer);
}
 
示例12
private ParquetOutputFile(org.apache.iceberg.io.OutputFile file) {
  this.file = file;
}
 
示例13
private ParquetOutputFile(com.netflix.iceberg.io.OutputFile file) {
  this.file = file;
}
 
示例14
@Override
public BulkWriter<T> create(FSDataOutputStream stream) throws IOException {
	final OutputFile out = new StreamOutputFile(stream);
	final ParquetWriter<T> writer = writerBuilder.createWriter(out);
	return new ParquetBulkWriter<>(writer);
}
 
示例15
private Builder(OutputFile file) {
  super(file);
}
 
示例16
protected Builder(OutputFile path) {
  this.file = path;
}
 
示例17
/**
 * Test whether corruption in the page content is detected by checksum verification
 */
@Test
public void testCorruptedPage() throws IOException {
  Configuration conf = new Configuration();
  conf.setBoolean(ParquetOutputFormat.PAGE_WRITE_CHECKSUM_ENABLED, true);

  Path path = writeSimpleParquetFile(conf, CompressionCodecName.UNCOMPRESSED);

  InputFile inputFile = HadoopInputFile.fromPath(path, conf);
  try (SeekableInputStream inputStream = inputFile.newStream()) {
    int fileLen = (int) inputFile.getLength();
    byte[] fileBytes = new byte[fileLen];
    inputStream.readFully(fileBytes);
    inputStream.close();

    // There are 4 pages in total (2 per column), we corrupt the first page of the first column
    // and the second page of the second column. We do this by altering a byte roughly in the
    // middle of each page to be corrupted
    fileBytes[fileLen / 8]++;
    fileBytes[fileLen / 8 + ((fileLen / 4) * 3)]++;

    OutputFile outputFile = HadoopOutputFile.fromPath(path, conf);
    try (PositionOutputStream outputStream = outputFile.createOrOverwrite(1024 * 1024)) {
      outputStream.write(fileBytes);
      outputStream.close();

      // First we disable checksum verification, the corruption will go undetected as it is in the
      // data section of the page
      conf.setBoolean(ParquetInputFormat.PAGE_VERIFY_CHECKSUM_ENABLED, false);
      try (ParquetFileReader reader = getParquetFileReader(path, conf,
        Arrays.asList(colADesc, colBDesc))) {
        PageReadStore pageReadStore = reader.readNextRowGroup();

        DataPageV1 colAPage1 = readNextPage(colADesc, pageReadStore);
        assertFalse("Data in page was not corrupted",
          Arrays.equals(colAPage1.getBytes().toByteArray(), colAPage1Bytes));
        readNextPage(colADesc, pageReadStore);
        readNextPage(colBDesc, pageReadStore);
        DataPageV1 colBPage2 = readNextPage(colBDesc, pageReadStore);
        assertFalse("Data in page was not corrupted",
          Arrays.equals(colBPage2.getBytes().toByteArray(), colBPage2Bytes));
      }

      // Now we enable checksum verification, the corruption should be detected
      conf.setBoolean(ParquetInputFormat.PAGE_VERIFY_CHECKSUM_ENABLED, true);
      try (ParquetFileReader reader =
             getParquetFileReader(path, conf, Arrays.asList(colADesc, colBDesc))) {
        // We expect an exception on the first encountered corrupt page (in readAllPages)
        assertVerificationFailed(reader);
      }
    }
  }
}
 
示例18
public static <T> Builder<T> builder(OutputFile file) {
  return new Builder<T>(file);
}
 
示例19
private Builder(OutputFile file) {
  super(file);
}
 
示例20
/**
 * Creates and configures a parquet writer to the given output file.
 */
ParquetWriter<T> createWriter(OutputFile out) throws IOException;
 
示例21
/**
 * Creates and configures a parquet writer to the given output file.
 */
ParquetWriter<T> createWriter(OutputFile out) throws IOException;
 
示例22
/**
 * Creates and configures a parquet writer to the given output file.
 */
ParquetWriter<T> createWriter(OutputFile out) throws IOException;
 
示例23
/**
 * Creates a Builder for configuring ParquetWriter with the example object
 * model. THIS IS AN EXAMPLE ONLY AND NOT INTENDED FOR USE.
 *
 * @param file the output file to create
 * @return a {@link Builder} to create a {@link ParquetWriter}
 */
public static Builder builder(OutputFile file) {
  return new Builder(file);
}