Java源码示例:net.sourceforge.tess4j.Tesseract
示例1
@Override
public boolean makeActualParameters() {
if (!super.makeActualParameters()) {
return false;
}
try {
OCRinstance = new Tesseract();
// https://stackoverflow.com/questions/58286373/tess4j-pdf-to-tiff-to-tesseract-warning-invalid-resolution-0-dpi-using-70/58296472#58296472
OCRinstance.setTessVariable("user_defined_dpi", "96");
OCRinstance.setTessVariable("debug_file", "/dev/null");
String path = AppVariables.getUserConfigValue("TessDataPath", null);
if (path != null) {
OCRinstance.setDatapath(path);
}
if (selectedLanguages != null) {
OCRinstance.setLanguage(selectedLanguages);
}
textFiles = new ArrayList<>();
return true;
} catch (Exception e) {
logger.error(e.toString());
return false;
}
}
示例2
/**
* 从图片中提取文字,默认设置英文字库,使用classpath目录下的训练库
* @param path
* @return
*/
public static String take(String path){
// JNA Interface Mapping
ITesseract instance = new Tesseract();
// JNA Direct Mapping
// ITesseract instance = new Tesseract1();
File imageFile = new File(path);
//In case you don't have your own tessdata, let it also be extracted for you
//这样就能使用classpath目录下的训练库了
File tessDataFolder = LoadLibs.extractTessResources("tessdata");
//Set the tessdata path
instance.setDatapath(tessDataFolder.getAbsolutePath());
//英文库识别数字比较准确
instance.setLanguage(Constants.ENG);
return getOCRText(instance, imageFile);
}
示例3
public static void main(String[] args) {
String result = null;
try {
File image = new File("src/main/resources/images/baeldung.png");
Tesseract tesseract = new Tesseract();
tesseract.setLanguage("spa");
tesseract.setPageSegMode(1);
tesseract.setOcrEngineMode(1);
tesseract.setHocr(true);
tesseract.setDatapath("src/main/resources/tessdata");
result = tesseract.doOCR(image, new Rectangle(1200, 200));
} catch (TesseractException e) {
e.printStackTrace();
}
System.out.println(result);
}
示例4
@RequestMapping(value = "ocr/v1/convert", method = RequestMethod.POST, consumes = MediaType.APPLICATION_JSON_VALUE, produces = MediaType.APPLICATION_JSON_VALUE)
public Text convertImageToText(@RequestBody final Image image) throws Exception {
File tmpFile = File.createTempFile("ocr_image", image.getExtension());
try {
FileUtils.writeByteArrayToFile(tmpFile, Base64.decodeBase64(image.getImage()));
Tesseract tesseract = new Tesseract(); // JNA Interface Mapping
String imageText = tesseract.doOCR(tmpFile);
LOGGER.debug("OCR Image Text = " + imageText);
return new Text(imageText);
} catch (Exception e) {
LOGGER.error("Exception while converting/uploading image: ", e);
throw new TesseractException();
} finally {
tmpFile.delete();
}
}
示例5
@RequestMapping(value = "ocr/v1/upload", method = RequestMethod.POST, consumes = MediaType.APPLICATION_JSON_VALUE, produces = MediaType.APPLICATION_JSON_VALUE)
public Status doOcr(@RequestBody Image image) throws Exception {
try {
//FileUtils.writeByteArrayToFile(tmpFile, Base64.decodeBase64(image.getImage()));
ByteArrayInputStream bis = new ByteArrayInputStream(Base64.decodeBase64(image.getImage()));
Tesseract tesseract = new Tesseract(); // JNA Interface Mapping
String imageText = tesseract.doOCR(ImageIO.read(bis));
image.setText(imageText);
repository.save(image);
LOGGER.debug("OCR Result = " + imageText);
} catch (Exception e) {
LOGGER.error("TessearctException while converting/uploading image: ", e);
throw new TesseractException();
}
return new Status("success");
}
示例6
TessOcr() {
instance = new Tesseract();
File tessDataFolder = LoadLibs.extractTessResources("tessdata");
instance.setLanguage("chi_sim");
//Set the tessdata path
instance.setDatapath(tessDataFolder.getAbsolutePath());
}
示例7
@Override
public boolean makeActualParameters() {
if (!super.makeActualParameters()) {
return false;
}
separator = separatorInput.getText();
if (!separatorCheck.isSelected() || separator == null || separator.isEmpty()) {
separator = null;
}
try {
OCRinstance = new Tesseract();
// https://stackoverflow.com/questions/58286373/tess4j-pdf-to-tiff-to-tesseract-warning-invalid-resolution-0-dpi-using-70/58296472#58296472
if (convertRadio.isSelected()) {
OCRinstance.setTessVariable("user_defined_dpi", dpi + "");
} else {
OCRinstance.setTessVariable("user_defined_dpi", "96");
}
OCRinstance.setTessVariable("debug_file", "/dev/null");
String path = AppVariables.getUserConfigValue("TessDataPath", null);
if (path != null) {
OCRinstance.setDatapath(path);
}
OCRinstance.setLanguage(selectedLanguages);
return true;
} catch (Exception e) {
logger.error(e.toString());
return false;
}
}
示例8
public static void main(String[] args) {
ITesseract instance = new Tesseract();
instance.setLanguage("eng");
try {
String result;
result = instance.doOCR(new File("OCRExample.png"));
System.out.println(result);
} catch (TesseractException e) {
System.err.println(e.getMessage());
}
}
示例9
/**
* 从图片中提取文字
* @param path
* @param dataPath
* @param language
* @return
*/
public static String take(String path, String dataPath, String language){
File imageFile = new File(path);
ITesseract instance = new Tesseract();
instance.setDatapath(dataPath);
//英文库识别数字比较准确
instance.setLanguage(language);
return getOCRText(instance, imageFile);
}
示例10
public static void main(String[] args){
try {
boolean load = true;
load = false;
// BufferedImage image = ImageIO.read(new URL("http://www.miitbeian.gov.cn/captcha.jpg")) ;
// if(load){
// ImageIO.write(image, "jpg", new File("E:/captcha.jpg") );
// }else{
// image = ImageIO.read(new File("D:\\爬虫测试\\yzm\\111.png")) ;
// }
BufferedImage image = ImageIO.read(new File("D:\\爬虫测试\\yzm\\11.jpg")) ;
// image = ImageUtil.grayFilter(image);
image = ImageUtil.binaryFilter(image);
image = ImageUtil.lineFilter(image);
// image = ImageUtil.lineFilter(image);
// image = ImageUtil.line2Filter(image);
// image = ImageUtil.point2Filter(image);
// image = ImageUtil.lineFilter(image);
image = ImageUtil.meanFilter(image);
// image = ImageUtil.lineFilter(image);
// image = ImageUtil.binaryFilter(image);
File imageFile = new File("E:/captcha5.jpg");
// imageFile = new File("E:/test/test.jpg");
ImageIO.write(image, "jpg", imageFile);
Tesseract tesseract = Tesseract.getInstance();
tesseract.setLanguage("eng");
String code = tesseract.doOCR(imageFile);
System.out.println(code);
} catch (Exception e) {
e.printStackTrace();
}
}
示例11
@FXML
public void startOCR() {
checkLanguages();
if (imageView.getImage() == null
|| selectedLanguages == null || selectedLanguages.isEmpty()) {
return;
}
synchronized (this) {
if (task != null) {
return;
}
task = new SingletonTask<Void>() {
private String result;
@Override
protected boolean handle() {
try {
ITesseract instance = new Tesseract();
instance.setTessVariable("user_defined_dpi", "96");
instance.setTessVariable("debug_file", "/dev/null");
String path = AppVariables.getUserConfigValue("TessDataPath", null);
if (path != null) {
instance.setDatapath(path);
}
if (selectedLanguages != null) {
instance.setLanguage(selectedLanguages);
}
Image selected = cropImage();
if (selected == null) {
selected = imageView.getImage();
}
BufferedImage bufferedImage = SwingFXUtils.fromFXImage(selected, null);
if (task == null || isCancelled()) {
return false;
}
result = instance.doOCR(bufferedImage);
return result != null;
} catch (Exception e) {
error = e.toString();
return false;
}
}
@Override
protected void whenSucceeded() {
if (result.length() == 0) {
popText(message("OCRMissComments"), 5000, "white", "1.1em", null);
}
ocrArea.setText(result);
resultLabel.setText(MessageFormat.format(message("OCRresults"),
result.length(), DateTools.showTime(cost)));
orcPage = currentPage;
}
};
openHandlingStage(task, Modality.WINDOW_MODAL);
Thread thread = new Thread(task);
thread.setDaemon(true);
thread.start();
}
}
示例12
@Autowired
public OCR(Tesseract ocr) {
this.ocr = ocr;
}
示例13
@Bean
public Tesseract tesseract() {
Tesseract tess = new Tesseract();
tess.setDatapath(tesseractDataPath);
return tess;
}
示例14
public void helloWorldTest() throws TesseractException {
TakesScreenshot driver = mock(TakesScreenshot.class);
target = new OCR(new Tesseract()).withDriver(driver);
when(driver.getScreenshotAs(OutputType.FILE)).thenReturn(getPath("helloworld.png"));
assertThat(target.getText().trim()).isEqualTo("hello world");
}
示例15
public TesseractOcr() {
this.tess = new Tesseract();
this.tess.setDatapath(System.getProperty("user.dir"));
// this.tess.setOcrEngineMode(ITessAPI.TessOcrEngineMode.OEM_CUBE_ONLY);
}