Java源码示例:org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat

示例1
public static RecordWriter createRecordWriter(Path target, JobConf conf, Properties properties, String outputFormatName, ConnectorSession session)
{
    try {
        boolean compress = HiveConf.getBoolVar(conf, COMPRESSRESULT);
        if (outputFormatName.equals(MapredParquetOutputFormat.class.getName())) {
            return createParquetWriter(target, conf, properties, session);
        }
        if (outputFormatName.equals(HiveIgnoreKeyTextOutputFormat.class.getName())) {
            return new TextRecordWriter(target, conf, properties, compress);
        }
        if (outputFormatName.equals(HiveSequenceFileOutputFormat.class.getName())) {
            return new SequenceFileRecordWriter(target, conf, Text.class, compress);
        }
        if (outputFormatName.equals(AvroContainerOutputFormat.class.getName())) {
            return new AvroRecordWriter(target, conf, compress, properties);
        }
        Object writer = Class.forName(outputFormatName).getConstructor().newInstance();
        return ((HiveOutputFormat<?, ?>) writer).getHiveRecordWriter(conf, target, Text.class, compress, properties, Reporter.NULL);
    }
    catch (IOException | ReflectiveOperationException e) {
        throw new PrestoException(HIVE_WRITER_DATA_ERROR, e);
    }
}
 
示例2
Table createParquetPartitionedTable(
        URI tableUri,
        String database,
        String table,
        Schema schema,
        String fieldName,
        Object fieldData,
        int version) throws Exception {
  List<FieldSchema> columns = new ArrayList<>();
  AvroObjectInspectorGenerator schemaInspector = new AvroObjectInspectorGenerator(schema);
  for (int i = 0; i < schemaInspector.getColumnNames().size(); i++) {
    columns.add(new FieldSchema(
            schemaInspector.getColumnNames().get(i), schemaInspector.getColumnTypes().get(i).toString(), ""
    ));
  }
  List<FieldSchema> partitionKeys = Arrays.asList(new FieldSchema("hour", "string", ""));
  Table parquetTable = TestUtils
          .createPartitionedTable(metaStoreClient, database, table, tableUri, columns, partitionKeys,
                  "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe", MapredParquetInputFormat.class.getName(),
                  MapredParquetOutputFormat.class.getName());
  URI partition = createData(tableUri, schema, Integer.toString(version), version, fieldName, fieldData);
  metaStoreClient.add_partitions(Arrays.asList(newTablePartition(parquetTable,
          Arrays.asList(Integer.toString(version)), partition)));
  return metaStoreClient.getTable(database, table);
}
 
示例3
public static String getOutputFormatClassName(HoodieFileFormat baseFileFormat) {
  switch (baseFileFormat) {
    case PARQUET:
      return MapredParquetOutputFormat.class.getName();
    default:
      throw new HoodieIOException("No OutputFormat for base file format " + baseFileFormat);
  }
}
 
示例4
@Override
public Optional<FileWriter> createFileWriter(
        Path path,
        List<String> inputColumnNames,
        StorageFormat storageFormat,
        Properties schema,
        JobConf conf,
        ConnectorSession session)
{
    if (!HiveSessionProperties.isParquetOptimizedWriterEnabled(session)) {
        return Optional.empty();
    }

    if (!MapredParquetOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
        return Optional.empty();
    }

    ParquetWriterOptions parquetWriterOptions = ParquetWriterOptions.builder()
            .setMaxPageSize(HiveSessionProperties.getParquetWriterPageSize(session))
            .setMaxBlockSize(HiveSessionProperties.getParquetWriterBlockSize(session))
            .build();

    CompressionCodecName compressionCodecName = getCompression(conf);

    List<String> fileColumnNames = getColumnNames(schema);
    List<Type> fileColumnTypes = getColumnTypes(schema).stream()
            .map(hiveType -> hiveType.getType(typeManager))
            .collect(toList());

    int[] fileInputColumnIndexes = fileColumnNames.stream()
            .mapToInt(inputColumnNames::indexOf)
            .toArray();

    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, conf);

        Callable<Void> rollbackAction = () -> {
            fileSystem.delete(path, false);
            return null;
        };

        return Optional.of(new ParquetFileWriter(
                fileSystem.create(path),
                rollbackAction,
                fileColumnNames,
                fileColumnTypes,
                parquetWriterOptions,
                fileInputColumnIndexes,
                compressionCodecName));
    }
    catch (IOException e) {
        throw new PrestoException(HIVE_WRITER_OPEN_ERROR, "Error creating Parquet file", e);
    }
}