Java源码示例:org.apache.tika.detect.AutoDetectReader
示例1
private static Charset detectCharset(final Path path, final Metadata metadata) throws IOException {
final Charset charset;
// Try to parse the character set from the content-encoding.
String orig = metadata.get(Metadata.CONTENT_ENCODING);
// Try to detect the character set.
if (null != orig && Charset.isSupported(orig)) {
return Charset.forName(orig);
}
try (
final InputStream input = new BufferedInputStream(Files.newInputStream(path));
final AutoDetectReader detector = new AutoDetectReader(input, metadata)
) {
charset = detector.getCharset();
} catch (TikaException e) {
throw new IOException("Unable to detect charset.", e);
}
return charset;
}
示例2
private ZipInputStream validateZip(File file) throws AxelorException {
try (AutoDetectReader autoDetectReader = new AutoDetectReader(new FileInputStream(file));
ZipInputStream zis =
new ZipInputStream(new FileInputStream(file), autoDetectReader.getCharset())) {
return zis;
} catch (IOException | TikaException e) {
throw new AxelorException(
TraceBackRepository.CATEGORY_CONFIGURATION_ERROR,
I18n.get(IExceptionMessage.DMS_IMPORT_INVALID_ZIP_ERROR));
}
}
示例3
public static String detectContentTypeAndCharset(String uri) throws FileNotFoundException, IOException, TikaException {
final Detector detector = config.getDetector();
final TikaInputStream inputStream = createInputStream(uri);
final Metadata metadata = new Metadata();
// Set the file name. This provides some level of type-hinting.
metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, new File(uri).getName());
// Detect the content type.
String contentType = detector.detect(inputStream, metadata).toString();
// Use metadata to provide type-hinting to the AutoDetectReader.
fillMetadata(metadata, contentType, uri);
// Detect the character set.
final AutoDetectReader reader = new AutoDetectReader(inputStream, metadata);
String charset = reader.getCharset().toString();
inputStream.close();
// Return the default content-type if undetermined.
if (contentType == null || contentType.isEmpty()) {
return MediaType.OCTET_STREAM.toString();
}
// Append the charset if the content-type was determined.
if (charset != null && !charset.isEmpty()) {
return contentType + "; charset=" + charset;
}
return contentType;
}
示例4
public static String detectCharset(String uri, String contentType) throws FileNotFoundException, IOException, TikaException {
final Metadata metadata = new Metadata();
// Use metadata to provide type-hinting to the AutoDetectReader.
fillMetadata(metadata, contentType, uri);
final TikaInputStream inputStream = createInputStream(uri, metadata);
// Detect the character set.
final AutoDetectReader reader = new AutoDetectReader(inputStream, metadata);
String charset = reader.getCharset().toString();
inputStream.close();
return charset;
}