Java源码示例:net.sourceforge.tess4j.Tesseract

示例1
@Override
public boolean makeActualParameters() {
    if (!super.makeActualParameters()) {
        return false;
    }

    try {
        OCRinstance = new Tesseract();
        // https://stackoverflow.com/questions/58286373/tess4j-pdf-to-tiff-to-tesseract-warning-invalid-resolution-0-dpi-using-70/58296472#58296472
        OCRinstance.setTessVariable("user_defined_dpi", "96");
        OCRinstance.setTessVariable("debug_file", "/dev/null");
        String path = AppVariables.getUserConfigValue("TessDataPath", null);
        if (path != null) {
            OCRinstance.setDatapath(path);
        }
        if (selectedLanguages != null) {
            OCRinstance.setLanguage(selectedLanguages);
        }
        textFiles = new ArrayList<>();
        return true;
    } catch (Exception e) {
        logger.error(e.toString());
        return false;
    }

}
 
示例2
/**
 * 从图片中提取文字,默认设置英文字库,使用classpath目录下的训练库
 * @param path
 * @return
 */
public static String take(String path){
    // JNA Interface Mapping
    ITesseract instance = new Tesseract();
    // JNA Direct Mapping
    // ITesseract instance = new Tesseract1();
    File imageFile = new File(path);
    //In case you don't have your own tessdata, let it also be extracted for you
    //这样就能使用classpath目录下的训练库了
    File tessDataFolder = LoadLibs.extractTessResources("tessdata");
    //Set the tessdata path
    instance.setDatapath(tessDataFolder.getAbsolutePath());
    //英文库识别数字比较准确
    instance.setLanguage(Constants.ENG);
    return getOCRText(instance, imageFile);
}
 
示例3
public static void main(String[] args) {
    String result = null;
    try {
        File image = new File("src/main/resources/images/baeldung.png");
        Tesseract tesseract = new Tesseract();
        tesseract.setLanguage("spa");
        tesseract.setPageSegMode(1);
        tesseract.setOcrEngineMode(1);
        tesseract.setHocr(true);
        tesseract.setDatapath("src/main/resources/tessdata");
        result = tesseract.doOCR(image, new Rectangle(1200, 200));
    } catch (TesseractException e) {
        e.printStackTrace();
    }
    System.out.println(result);
}
 
示例4
@RequestMapping(value = "ocr/v1/convert", method = RequestMethod.POST, consumes = MediaType.APPLICATION_JSON_VALUE, produces = MediaType.APPLICATION_JSON_VALUE)
public Text convertImageToText(@RequestBody final Image image) throws Exception {

    File tmpFile = File.createTempFile("ocr_image", image.getExtension());
    try {
        FileUtils.writeByteArrayToFile(tmpFile, Base64.decodeBase64(image.getImage()));
        Tesseract tesseract = new Tesseract(); // JNA Interface Mapping
        String imageText = tesseract.doOCR(tmpFile);
        LOGGER.debug("OCR Image Text = " + imageText);
        return new Text(imageText);
    } catch (Exception e) {
        LOGGER.error("Exception while converting/uploading image: ", e);
        throw new TesseractException();
    } finally {
        tmpFile.delete();
    }
}
 
示例5
@RequestMapping(value = "ocr/v1/upload", method = RequestMethod.POST, consumes = MediaType.APPLICATION_JSON_VALUE, produces = MediaType.APPLICATION_JSON_VALUE)
public Status doOcr(@RequestBody Image image) throws Exception {
    try {
        //FileUtils.writeByteArrayToFile(tmpFile, Base64.decodeBase64(image.getImage()));
        ByteArrayInputStream bis = new ByteArrayInputStream(Base64.decodeBase64(image.getImage()));
        Tesseract tesseract = new Tesseract(); // JNA Interface Mapping
        String imageText = tesseract.doOCR(ImageIO.read(bis));
        image.setText(imageText);
        repository.save(image);
        LOGGER.debug("OCR Result = " + imageText);
    } catch (Exception e) {
        LOGGER.error("TessearctException while converting/uploading image: ", e);
        throw new TesseractException();
    }

    return new Status("success");
}
 
示例6
TessOcr() {
    instance = new Tesseract();
    File tessDataFolder = LoadLibs.extractTessResources("tessdata");
    instance.setLanguage("chi_sim");
    //Set the tessdata path
    instance.setDatapath(tessDataFolder.getAbsolutePath());
}
 
示例7
@Override
public boolean makeActualParameters() {
    if (!super.makeActualParameters()) {
        return false;
    }
    separator = separatorInput.getText();
    if (!separatorCheck.isSelected() || separator == null || separator.isEmpty()) {
        separator = null;
    }
    try {
        OCRinstance = new Tesseract();
        // https://stackoverflow.com/questions/58286373/tess4j-pdf-to-tiff-to-tesseract-warning-invalid-resolution-0-dpi-using-70/58296472#58296472
        if (convertRadio.isSelected()) {
            OCRinstance.setTessVariable("user_defined_dpi", dpi + "");
        } else {
            OCRinstance.setTessVariable("user_defined_dpi", "96");
        }
        OCRinstance.setTessVariable("debug_file", "/dev/null");

        String path = AppVariables.getUserConfigValue("TessDataPath", null);
        if (path != null) {
            OCRinstance.setDatapath(path);
        }
        OCRinstance.setLanguage(selectedLanguages);
        return true;
    } catch (Exception e) {
        logger.error(e.toString());
        return false;
    }
}
 
示例8
public static void main(String[] args) {
    ITesseract instance = new Tesseract();
    instance.setLanguage("eng");
    try {
        String result;
        result = instance.doOCR(new File("OCRExample.png"));
        System.out.println(result);
    } catch (TesseractException e) {
        System.err.println(e.getMessage());
    }
}
 
示例9
/**
 * 从图片中提取文字
 * @param path
 * @param dataPath
 * @param language
 * @return
 */
public static String take(String path, String dataPath, String language){
    File imageFile = new File(path);
    ITesseract instance = new Tesseract();
    instance.setDatapath(dataPath);
    //英文库识别数字比较准确
    instance.setLanguage(language);
    return getOCRText(instance, imageFile);
}
 
示例10
public static void main(String[] args){
		try {
			boolean load = true;
			load = false;
//			BufferedImage image = ImageIO.read(new URL("http://www.miitbeian.gov.cn/captcha.jpg")) ;
//			if(load){
//				ImageIO.write(image, "jpg", new File("E:/captcha.jpg") );
//			}else{
//				image = ImageIO.read(new File("D:\\爬虫测试\\yzm\\111.png")) ;
//			}
			BufferedImage image = ImageIO.read(new File("D:\\爬虫测试\\yzm\\11.jpg")) ;
//			image = ImageUtil.grayFilter(image);
			image = ImageUtil.binaryFilter(image);
			image = ImageUtil.lineFilter(image);
//			image = ImageUtil.lineFilter(image);
//			image = ImageUtil.line2Filter(image);
//			image = ImageUtil.point2Filter(image);
//			image = ImageUtil.lineFilter(image);
			image = ImageUtil.meanFilter(image);
//			image = ImageUtil.lineFilter(image);
//			image = ImageUtil.binaryFilter(image);
			
			
			File imageFile = new File("E:/captcha5.jpg");
//			imageFile = new File("E:/test/test.jpg");
			
			ImageIO.write(image, "jpg", imageFile);
			
			Tesseract tesseract = Tesseract.getInstance();
			tesseract.setLanguage("eng");
			String code = tesseract.doOCR(imageFile);

			System.out.println(code);
			
		} catch (Exception e) {
			e.printStackTrace();
		}

	}
 
示例11
@FXML
public void startOCR() {
    checkLanguages();
    if (imageView.getImage() == null
            || selectedLanguages == null || selectedLanguages.isEmpty()) {
        return;
    }
    synchronized (this) {
        if (task != null) {
            return;
        }
        task = new SingletonTask<Void>() {

            private String result;

            @Override
            protected boolean handle() {
                try {
                    ITesseract instance = new Tesseract();
                    instance.setTessVariable("user_defined_dpi", "96");
                    instance.setTessVariable("debug_file", "/dev/null");
                    String path = AppVariables.getUserConfigValue("TessDataPath", null);
                    if (path != null) {
                        instance.setDatapath(path);
                    }
                    if (selectedLanguages != null) {
                        instance.setLanguage(selectedLanguages);
                    }

                    Image selected = cropImage();
                    if (selected == null) {
                        selected = imageView.getImage();
                    }
                    BufferedImage bufferedImage = SwingFXUtils.fromFXImage(selected, null);
                    if (task == null || isCancelled()) {
                        return false;
                    }
                    result = instance.doOCR(bufferedImage);
                    return result != null;
                } catch (Exception e) {
                    error = e.toString();
                    return false;
                }
            }

            @Override
            protected void whenSucceeded() {
                if (result.length() == 0) {
                    popText(message("OCRMissComments"), 5000, "white", "1.1em", null);
                }
                ocrArea.setText(result);
                resultLabel.setText(MessageFormat.format(message("OCRresults"),
                        result.length(), DateTools.showTime(cost)));

                orcPage = currentPage;
            }

        };
        openHandlingStage(task, Modality.WINDOW_MODAL);
        Thread thread = new Thread(task);
        thread.setDaemon(true);
        thread.start();
    }

}
 
示例12
@Autowired
public OCR(Tesseract ocr) {
  this.ocr = ocr;
}
 
示例13
@Bean
public Tesseract tesseract() {
  Tesseract tess = new Tesseract();
  tess.setDatapath(tesseractDataPath);
  return tess;
}
 
示例14
public void helloWorldTest() throws TesseractException {
  TakesScreenshot driver = mock(TakesScreenshot.class);
  target = new OCR(new Tesseract()).withDriver(driver);
  when(driver.getScreenshotAs(OutputType.FILE)).thenReturn(getPath("helloworld.png"));
  assertThat(target.getText().trim()).isEqualTo("hello world");
}
 
示例15
public TesseractOcr() {
        this.tess = new Tesseract();
        this.tess.setDatapath(System.getProperty("user.dir"));
//        this.tess.setOcrEngineMode(ITessAPI.TessOcrEngineMode.OEM_CUBE_ONLY);
    }