学校网站建设策划书建设银行网站打不开用什么浏览器
1.找开发去掉验证码或者使用万能验证码
2.使用OCR自动识别
使用OCR自动化识别,一般识别率不是太高,处理一般简单验证码还是没问题
这里使用的是Tesseract-OCR,下载地址:https://github.com/A9T9/Free-Ocr-Windows-Desktop/releases
怎么使用呢?
进入安装后的目录:
tesseract.exe test.png test -1
准备一份网页,上面使用该验证码
<html>
<head>
<title>Table test by Young</title>
</head>
<body></br>
<h1> Test </h1><img src="http://csujwc.its.csu.edu.cn/sys/ValidateCode.aspx?t=1"></br>
</body>
</html>
 
要识别验证码,首先得取得验证码,这两款采取对 页面元素部分截图的方式,首先获取整个页面的截图
然后找到页面元素坐标进行截取
/*** This method for screen shot element* * @param driver* @param element* @param path* @throws InterruptedException*/public static void screenShotForElement(WebDriver driver,WebElement element, String path) throws InterruptedException {File scrFile = ((TakesScreenshot) driver).getScreenshotAs(OutputType.FILE);try {Point p = element.getLocation();int width = element.getSize().getWidth();int height = element.getSize().getHeight();Rectangle rect = new Rectangle(width, height);BufferedImage img = ImageIO.read(scrFile);BufferedImage dest = img.getSubimage(p.getX(), p.getY(),rect.width, rect.height);ImageIO.write(dest, "png", scrFile);Thread.sleep(1000);FileUtils.copyFile(scrFile, new File(path));} catch (IOException e) {e.printStackTrace();}}
 
截取完元素,就可以调用Tesseract-OCR生成text
// use Tesseract to get stringsRuntime rt = Runtime.getRuntime();rt.exec("cmd.exe /C  tesseract.exe D:\\Tesseract-OCR\\test.png  D:\\Tesseract-OCR\\test -1 ");
 
接下来通过java读取txt
/*** This method for read TXT file* * @param filePath*/public static void readTextFile(String filePath) {try {String encoding = "GBK";File file = new File(filePath);if (file.isFile() && file.exists()) { // 判断文件是否存在InputStreamReader read = new InputStreamReader(new FileInputStream(file), encoding);// 考虑到编码格式BufferedReader bufferedReader = new BufferedReader(read);String lineTxt = null;while ((lineTxt = bufferedReader.readLine()) != null) {System.out.println(lineTxt);}read.close();} else {System.out.println("找不到指定的文件");}} catch (Exception e) {System.out.println("读取文件内容出错");e.printStackTrace();}}
 
整体代码如下:
1 package com.dbyl.tests;2 3 import java.awt.Rectangle;4 import java.awt.image.BufferedImage;5 import java.io.BufferedReader;6 import java.io.File;7 import java.io.FileInputStream;8 import java.io.IOException;9 import java.io.InputStreamReader;10 import java.io.Reader;11 import java.util.concurrent.TimeUnit;12 13 import javax.imageio.ImageIO;14 15 import org.apache.commons.io.FileUtils;16 import org.openqa.selenium.By;17 import org.openqa.selenium.OutputType;18 import org.openqa.selenium.Point;19 import org.openqa.selenium.TakesScreenshot;20 import org.openqa.selenium.WebDriver;21 import org.openqa.selenium.WebElement;22 23 import com.dbyl.libarary.utils.DriverFactory;24 25 public class TesseractTest {26 27     public static void main(String[] args) throws IOException,28             InterruptedException {29 30         WebDriver driver = DriverFactory.getChromeDriver();31         driver.get("file:///C:/Users/validation.html");32         driver.manage().timeouts().pageLoadTimeout(30, TimeUnit.SECONDS);33         WebElement element = driver.findElement(By.xpath("//img"));34 35         // take screen shot for element36         screenShotForElement(driver, element, "D:\\Tesseract-OCR\\test.png");37 38         driver.quit();39         40         // use Tesseract to get strings41         Runtime rt = Runtime.getRuntime();42         rt.exec("cmd.exe /C  tesseract.exe D:\\Tesseract-OCR\\test.png  D:\\Tesseract-OCR\\test -1 ");43 44         Thread.sleep(1000);45         // Read text46         readTextFile("D:\\Tesseract-OCR\\test.txt");47     }48 49     /**50      * This method for read TXT file51      * 52      * @param filePath53      */54     public static void readTextFile(String filePath) {55         try {56             String encoding = "GBK";57             File file = new File(filePath);58             if (file.isFile() && file.exists()) { // 判断文件是否存在59                 InputStreamReader read = new InputStreamReader(60                         new FileInputStream(file), encoding);// 考虑到编码格式61                 BufferedReader bufferedReader = new BufferedReader(read);62                 String lineTxt = null;63                 while ((lineTxt = bufferedReader.readLine()) != null) {64                     System.out.println(lineTxt);65                 }66                 read.close();67             } else {68                 System.out.println("找不到指定的文件");69             }70         } catch (Exception e) {71             System.out.println("读取文件内容出错");72             e.printStackTrace();73         }74     }75 76     /**77      * This method for screen shot element78      * 79      * @param driver80      * @param element81      * @param path82      * @throws InterruptedException83      */84     public static void screenShotForElement(WebDriver driver,85             WebElement element, String path) throws InterruptedException {86         File scrFile = ((TakesScreenshot) driver)87                 .getScreenshotAs(OutputType.FILE);88         try {89             Point p = element.getLocation();90             int width = element.getSize().getWidth();91             int height = element.getSize().getHeight();92             Rectangle rect = new Rectangle(width, height);93             BufferedImage img = ImageIO.read(scrFile);94             BufferedImage dest = img.getSubimage(p.getX(), p.getY(),95                     rect.width, rect.height);96             ImageIO.write(dest, "png", scrFile);97             Thread.sleep(1000);98             FileUtils.copyFile(scrFile, new File(path));99         } catch (IOException e) {
100             e.printStackTrace();
101         }
102     }
103 
104 }
