| src/main/java/org/springblade/modules/test4j/Test4jController.java | ●●●●● patch | view | raw | blame | history | |
| src/main/java/org/springblade/modules/test4j/util/Test4jUtil.java | ●●●●● patch | view | raw | blame | history |
src/main/java/org/springblade/modules/test4j/Test4jController.java
@@ -2,8 +2,11 @@ import io.swagger.annotations.Api; import lombok.AllArgsConstructor; import net.sourceforge.tess4j.ITesseract; import net.sourceforge.tess4j.Tesseract; import net.sourceforge.tess4j.TesseractException; import org.springblade.core.tool.api.R; import org.springblade.modules.test4j.util.Test4jUtil; import org.springframework.web.bind.annotation.PostMapping; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestParam; @@ -11,6 +14,11 @@ import org.springframework.web.multipart.MultipartFile; import javax.imageio.ImageIO; import java.awt.*; import java.awt.image.BufferedImage; import java.io.IOException; import java.util.HashMap; import java.util.Map; @RestController @AllArgsConstructor @@ -20,17 +28,37 @@ @PostMapping("/read-id") public R readText(@RequestParam("file") MultipartFile file) { // 在这里添加识别文本的代码,例如Tesseract OCR Tesseract tesseract = new Tesseract(); try { BufferedImage grayscaleImage = grayscale(ImageIO.read(file.getInputStream())); String text = recognizeText(grayscaleImage); return R.data(text); } catch (IOException e) { e.printStackTrace(); return null; } } public BufferedImage grayscale(BufferedImage image) { int width = image.getWidth(); int height = image.getHeight(); BufferedImage result = new BufferedImage(width, height, BufferedImage.TYPE_BYTE_GRAY); Graphics g = result.getGraphics(); g.drawImage(image, 0, 0, null); g.dispose(); return result; } public String recognizeText(BufferedImage image) { ITesseract tesseract = new Tesseract(); // 设置Tesseract的路径 tesseract.setDatapath("F:\\test4jdata"); // 设置为中文简体 tesseract.setLanguage("chi_sim"); try { String text = tesseract.doOCR(ImageIO.read(file.getInputStream())); return R.data(text); } catch (Exception e) { return R.data(e.getMessage()); return tesseract.doOCR(image); } catch (TesseractException e) { e.printStackTrace(); return null; } } } src/main/java/org/springblade/modules/test4j/util/Test4jUtil.java
New file @@ -0,0 +1,85 @@ package org.springblade.modules.test4j.util; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * 身份证信息读取工具 */ public class Test4jUtil { /** * 去掉字符串里的非中文字符 * * @param inputString 字符串 * @return 中文字符串 */ public static String removeNonChinese(String inputString) { // 匹配非汉字字符的正则表达式 String regex = "[^\u4E00-\u9FA5]"; Pattern pattern = Pattern.compile(regex); Matcher matcher = pattern.matcher(inputString); // 替换非汉字字符为空格 return matcher.replaceAll(""); } /** * 提取出生日期 * * @param inputString 字符串 * @return 出生日期 */ private static String extractBirthDate(String inputString) { // 匹配日期格式的正则表达式 String regex = "(\\d{4}年\\d{2}月\\d{2}日)"; Pattern pattern = Pattern.compile(regex); Matcher matcher = pattern.matcher(inputString); // 提取匹配到的日期 if (matcher.find()) { return matcher.group(1); } else { return "未找到日期"; } } /** * 截取指定字符 * * @param inputString 字符串 * @param indexStart 开始Index * @return 截取的字符串 */ public static String getStringByIndex(String inputString, int indexStart) { return getStringByIndex(inputString, indexStart, -1); } /** * 截取指定字符 * * @param inputString 字符串 * @param indexStart 开始Index * @param size 截取的字符个数 * @return 截取的字符串 */ public static String getStringByIndex(String inputString, int indexStart, int size) { // 去除字符串两端的空白字符 String trimmedString = inputString.trim(); // 将字符串以空白字符分割 StringBuilder res = new StringBuilder(); String[] words = trimmedString.split("\\s+"); int length = words.length; int contentSize = indexStart + size; if (length > indexStart) { int index = length; if (size > 0 && length > contentSize) { index = contentSize; } for (int i = indexStart; i < index; i++) { res.append(words[i]); } } return res.toString(); } }