Java源码示例:org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat
示例1
public static RecordWriter createRecordWriter(Path target, JobConf conf, Properties properties, String outputFormatName, ConnectorSession session)
{
try {
boolean compress = HiveConf.getBoolVar(conf, COMPRESSRESULT);
if (outputFormatName.equals(MapredParquetOutputFormat.class.getName())) {
return createParquetWriter(target, conf, properties, session);
}
if (outputFormatName.equals(HiveIgnoreKeyTextOutputFormat.class.getName())) {
return new TextRecordWriter(target, conf, properties, compress);
}
if (outputFormatName.equals(HiveSequenceFileOutputFormat.class.getName())) {
return new SequenceFileRecordWriter(target, conf, Text.class, compress);
}
if (outputFormatName.equals(AvroContainerOutputFormat.class.getName())) {
return new AvroRecordWriter(target, conf, compress, properties);
}
Object writer = Class.forName(outputFormatName).getConstructor().newInstance();
return ((HiveOutputFormat<?, ?>) writer).getHiveRecordWriter(conf, target, Text.class, compress, properties, Reporter.NULL);
}
catch (IOException | ReflectiveOperationException e) {
throw new PrestoException(HIVE_WRITER_DATA_ERROR, e);
}
}
示例2
Table createParquetPartitionedTable(
URI tableUri,
String database,
String table,
Schema schema,
String fieldName,
Object fieldData,
int version) throws Exception {
List<FieldSchema> columns = new ArrayList<>();
AvroObjectInspectorGenerator schemaInspector = new AvroObjectInspectorGenerator(schema);
for (int i = 0; i < schemaInspector.getColumnNames().size(); i++) {
columns.add(new FieldSchema(
schemaInspector.getColumnNames().get(i), schemaInspector.getColumnTypes().get(i).toString(), ""
));
}
List<FieldSchema> partitionKeys = Arrays.asList(new FieldSchema("hour", "string", ""));
Table parquetTable = TestUtils
.createPartitionedTable(metaStoreClient, database, table, tableUri, columns, partitionKeys,
"org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe", MapredParquetInputFormat.class.getName(),
MapredParquetOutputFormat.class.getName());
URI partition = createData(tableUri, schema, Integer.toString(version), version, fieldName, fieldData);
metaStoreClient.add_partitions(Arrays.asList(newTablePartition(parquetTable,
Arrays.asList(Integer.toString(version)), partition)));
return metaStoreClient.getTable(database, table);
}
示例3
public static String getOutputFormatClassName(HoodieFileFormat baseFileFormat) {
switch (baseFileFormat) {
case PARQUET:
return MapredParquetOutputFormat.class.getName();
default:
throw new HoodieIOException("No OutputFormat for base file format " + baseFileFormat);
}
}
示例4
@Override
public Optional<FileWriter> createFileWriter(
Path path,
List<String> inputColumnNames,
StorageFormat storageFormat,
Properties schema,
JobConf conf,
ConnectorSession session)
{
if (!HiveSessionProperties.isParquetOptimizedWriterEnabled(session)) {
return Optional.empty();
}
if (!MapredParquetOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
return Optional.empty();
}
ParquetWriterOptions parquetWriterOptions = ParquetWriterOptions.builder()
.setMaxPageSize(HiveSessionProperties.getParquetWriterPageSize(session))
.setMaxBlockSize(HiveSessionProperties.getParquetWriterBlockSize(session))
.build();
CompressionCodecName compressionCodecName = getCompression(conf);
List<String> fileColumnNames = getColumnNames(schema);
List<Type> fileColumnTypes = getColumnTypes(schema).stream()
.map(hiveType -> hiveType.getType(typeManager))
.collect(toList());
int[] fileInputColumnIndexes = fileColumnNames.stream()
.mapToInt(inputColumnNames::indexOf)
.toArray();
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, conf);
Callable<Void> rollbackAction = () -> {
fileSystem.delete(path, false);
return null;
};
return Optional.of(new ParquetFileWriter(
fileSystem.create(path),
rollbackAction,
fileColumnNames,
fileColumnTypes,
parquetWriterOptions,
fileInputColumnIndexes,
compressionCodecName));
}
catch (IOException e) {
throw new PrestoException(HIVE_WRITER_OPEN_ERROR, "Error creating Parquet file", e);
}
}