我有申请表的图像,因为我只想提取被文本框包围的姓名、出生日期、签名和勾选文本框,但我得到的结果以及其他意外结果。
输入图片:
预期结果:
我的结果:
我试过下面的代码
import numpy as np
from PIL import Image
import tensorflow as tf
import os
import pytesseract
import sys
import re
#from pdf2image import convert_from_path #need proppeler windows distrubution
import cv2
#from pdf2image.exceptions import PDFInfoNotInstalledError,PDFPageCountError,PDFSyntaxError
pdftoppm_path = r"C:\Program Files (x86)\Poppler\poppler-0.68.0\bin\pdftoppm.exe"
#path to tesseract instalattion
pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'
def sort_contours(cnts, method="left-to-right"):
# initialize the reverse flag and sort index
reverse = False
i = 0
# handle if we need to sort in reverse
if method == "right-to-left" or method == "bottom-to-top":
reverse = True
# handle if we are sorting against the y-coordinate rather than
# the x-coordinate of the bounding box
if method == "top-to-bottom" or method == "bottom-to-top":
i = 1
# construct the list of bounding boxes and sort them from top to
# bottom
boundingBoxes = [cv2.boundingRect(c) for c in cnts]
(cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes),
key=lambda b: b[1][i], reverse=reverse))
# return the list of sorted contours and bounding boxes
return (cnts, boundingBoxes)
def box_extraction(img_for_box_extraction_path, cropped_dir_path):
img1 = cv2.imread(img_for_box_extraction_path, 0) # Read the image
img = cv2.resize(img1, (800, 800))
(thresh, img_bin) = cv2.threshold(img, 120, 255,
cv2.THRESH_BINARY | cv2.THRESH_OTSU) # Thresholding the image
img_bin = 255-img_bin # Invert the image
##cv2.imshow("Image_bin.jpg",img_bin)
# Defining a kernel length
kernel_length = np.array(img).shape[1]//150
# A verticle kernel of (1 X kernel_length), which will detect all the verticle lines from the image.
verticle_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, kernel_length))
# A horizontal kernel of (kernel_length X 1), which will help to detect all the horizontal line from the image.
hori_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_length, 1))
# A kernel of (3 X 3) ones.
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
# Morphological operation to detect verticle lines from an image
img_temp1 = cv2.erode(img_bin, verticle_kernel, iterations=3)
verticle_lines_img = cv2.dilate(img_temp1, verticle_kernel, iterations=3)
#cv2.imwrite("verticle_lines.jpg",verticle_lines_img)
# Morphological operation to detect horizontal lines from an image
img_temp2 = cv2.erode(img_bin, hori_kernel, iterations=3)
horizontal_lines_img = cv2.dilate(img_temp2, hori_kernel, iterations=3)
#cv2.imwrite("horizontal_lines.jpg",horizontal_lines_img)
# Weighting parameters,a=0.4 and b=0.8. this will decide the quantity of an image to be added to make a new image.
alpha = 0.8
beta = 5.0 - alpha
# This function helps to add two image with specific weight parameter to get a third image as summation of two image.
img_final_bin = cv2.addWeighted(verticle_lines_img, alpha, horizontal_lines_img, beta, 0.0)
img_final_bin = cv2.erode(~img_final_bin, kernel, iterations=2)
(thresh, img_final_bin) = cv2.threshold(img_final_bin, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
# For Debugging
# Enable this line to see verticle and horizontal lines in the image which is used to find boxes
##cv2.imshow("img_final_bin.jpg",img_final_bin)
# Find contours for image, which will detect all the boxes
contours, hierarchy = cv2.findContours(
img_final_bin, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# Sort all the contours by top to bottom.
(contours, boundingBoxes) = sort_contours(contours)#, method="top-to-bottom")
idx = 0
for c in contours:
# Returns the location and width,height for every contour
x, y, w, h = cv2.boundingRect(c)
#print(x,y,w,h)
# If the box height is less than 80, widht is <400, then only save it as a box in "cropped/" folder.
if (w < 400 and h < 80): # and h < 6*w:
idx += 1
new_img = img[y:y+h+10, x:x+w+10]
contours1, hierarchy1 = cv2.findContours(img_final_bin, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# Sort all the contours by top to bottom.
(contours1, boundingBoxes) = sort_contours(contours1, method="top-to-bottom")
# get the thresholded crop
retval, thresh_crop = cv2.threshold(new_img, thresh=200, maxval=255, type=cv2.THRESH_BINARY)
cv2.imwrite(cropped_dir_path+str(idx) + '.png', thresh_crop)
# For Debugging
# Enable this line to see all contours.
# cv2.drawContours(img, contours, -1, (0, 0, 255), 3)
# cv2.imwrite("./Temp/img_contour.jpg", img)
box_extraction("X:\PDF2IMG\TEST.jpeg", "X:\PDF2IMG\cropped")
#cv2.waitKey(0)
#cv2.destroyAllWindows()
如何才能只得到预期的结果?
我有一张申请表的图片,因为我只想提取被文本框包围的姓名、出生日期、签名和勾号文本框,但我得到的结果以及其他意想不到的结果。
最佳答案
要提取所需区域,我们可以使用矩形框的属性,因为它们可以使用轮廓近似和轮廓区域来隔离。这是一种方法:
- 将图像转换为灰度、模糊和阈值
- 执行形态学操作以平滑图像并去除噪声
- 寻找轮廓
- 使用轮廓近似和轮廓区域进行过滤
- 使用 Numpy 切片提取和保存 ROI
这是检测到的以绿色突出显示的矩形文本框
由于我们有边界框,我们只需提取 ROI
import cv2
image = cv2.imread('1.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5,5), 0)
thresh = cv2.threshold(blur,0,255,cv2.THRESH_OTSU + cv2.THRESH_BINARY_INV)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
cnts = cv2.findContours(opening, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
ROI_number = 0
for c in cnts:
area = cv2.contourArea(c)
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.02 * peri, True)
x,y,w,h = cv2.boundingRect(approx)
if len(approx) == 4 and (area > 1000) and (area < 80000):
ROI = image[y:y+h, x:x+w]
cv2.imwrite('ROI_{}.png'.format(ROI_number), ROI)
ROI_number += 1
cv2.imshow('thresh', thresh)
cv2.imshow('opening', opening)
cv2.waitKey()
关于python - 从图像中提取矩形文本框,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/58979313/