Java源码示例:org.apache.tika.detect.AutoDetectReader

示例1
private static Charset detectCharset(final Path path, final Metadata metadata) throws IOException {
	final Charset charset;

	// Try to parse the character set from the content-encoding.
	String orig = metadata.get(Metadata.CONTENT_ENCODING);

	// Try to detect the character set.
	if (null != orig && Charset.isSupported(orig)) {
		return Charset.forName(orig);
	}

	try (
		final InputStream input = new BufferedInputStream(Files.newInputStream(path));
		final AutoDetectReader detector = new AutoDetectReader(input, metadata)
	) {
		charset = detector.getCharset();
	} catch (TikaException e) {
		throw new IOException("Unable to detect charset.", e);
	}

	return charset;
}
 
示例2
private ZipInputStream validateZip(File file) throws AxelorException {
  try (AutoDetectReader autoDetectReader = new AutoDetectReader(new FileInputStream(file));
      ZipInputStream zis =
          new ZipInputStream(new FileInputStream(file), autoDetectReader.getCharset())) {
    return zis;
  } catch (IOException | TikaException e) {
    throw new AxelorException(
        TraceBackRepository.CATEGORY_CONFIGURATION_ERROR,
        I18n.get(IExceptionMessage.DMS_IMPORT_INVALID_ZIP_ERROR));
  }
}
 
示例3
public static String detectContentTypeAndCharset(String uri) throws FileNotFoundException, IOException, TikaException {
	final Detector detector = config.getDetector();
	final TikaInputStream inputStream = createInputStream(uri);
	final Metadata metadata = new Metadata();

	// Set the file name. This provides some level of type-hinting.
	metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, new File(uri).getName());

	// Detect the content type.
	String contentType = detector.detect(inputStream, metadata).toString();

	// Use metadata to provide type-hinting to the AutoDetectReader.
	fillMetadata(metadata, contentType, uri);

	// Detect the character set.
	final AutoDetectReader reader = new AutoDetectReader(inputStream, metadata);
	String charset = reader.getCharset().toString();

	inputStream.close();

	// Return the default content-type if undetermined.
	if (contentType == null || contentType.isEmpty()) {
		return MediaType.OCTET_STREAM.toString();
	}

	// Append the charset if the content-type was determined.
	if (charset != null && !charset.isEmpty()) {
		return contentType + "; charset=" + charset;
	}

	return contentType;
}
 
示例4
public static String detectCharset(String uri, String contentType) throws FileNotFoundException, IOException, TikaException {
	final Metadata metadata = new Metadata();

	// Use metadata to provide type-hinting to the AutoDetectReader.
	fillMetadata(metadata, contentType, uri);

	final TikaInputStream inputStream = createInputStream(uri, metadata);

	// Detect the character set.
	final AutoDetectReader reader = new AutoDetectReader(inputStream, metadata);
	String charset = reader.getCharset().toString();

	inputStream.close();

	return charset;
}