python - 从图像中提取矩形文本框

我有申请表的图像，因为我只想提取被文本框包围的姓名、出生日期、签名和勾选文本框，但我得到的结果以及其他意外结果。

输入图片:

Input image

预期结果:

expected

我的结果:

result

我试过下面的代码

import numpy as np
from PIL import Image
import tensorflow as tf
import os
import pytesseract 
import sys 
import re

#from pdf2image import convert_from_path #need proppeler windows distrubution 
import cv2
#from pdf2image.exceptions import PDFInfoNotInstalledError,PDFPageCountError,PDFSyntaxError

pdftoppm_path = r"C:\Program Files (x86)\Poppler\poppler-0.68.0\bin\pdftoppm.exe"

#path to tesseract instalattion
pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'

def sort_contours(cnts, method="left-to-right"):
    # initialize the reverse flag and sort index
    reverse = False
    i = 0

    # handle if we need to sort in reverse
    if method == "right-to-left" or method == "bottom-to-top":
        reverse = True

    # handle if we are sorting against the y-coordinate rather than
    # the x-coordinate of the bounding box
    if method == "top-to-bottom" or method == "bottom-to-top":
        i = 1

    # construct the list of bounding boxes and sort them from top to
    # bottom
    boundingBoxes = [cv2.boundingRect(c) for c in cnts]
    (cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes),
                                        key=lambda b: b[1][i], reverse=reverse))

    # return the list of sorted contours and bounding boxes
    return (cnts, boundingBoxes)

def box_extraction(img_for_box_extraction_path, cropped_dir_path):

    img1 = cv2.imread(img_for_box_extraction_path, 0)  # Read the image
    img = cv2.resize(img1, (800, 800))
    (thresh, img_bin) = cv2.threshold(img, 120, 255,
                                      cv2.THRESH_BINARY | cv2.THRESH_OTSU)  # Thresholding the image
    img_bin = 255-img_bin  # Invert the image

    ##cv2.imshow("Image_bin.jpg",img_bin)

    # Defining a kernel length
    kernel_length = np.array(img).shape[1]//150

    # A verticle kernel of (1 X kernel_length), which will detect all the verticle lines from the image.
    verticle_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, kernel_length))
    # A horizontal kernel of (kernel_length X 1), which will help to detect all the horizontal line from the image.
    hori_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_length, 1))
    # A kernel of (3 X 3) ones.
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))

    # Morphological operation to detect verticle lines from an image
    img_temp1 = cv2.erode(img_bin, verticle_kernel, iterations=3)
    verticle_lines_img = cv2.dilate(img_temp1, verticle_kernel, iterations=3)
    #cv2.imwrite("verticle_lines.jpg",verticle_lines_img)

    # Morphological operation to detect horizontal lines from an image
    img_temp2 = cv2.erode(img_bin, hori_kernel, iterations=3)
    horizontal_lines_img = cv2.dilate(img_temp2, hori_kernel, iterations=3)
    #cv2.imwrite("horizontal_lines.jpg",horizontal_lines_img)

    # Weighting parameters,a=0.4 and b=0.8. this will decide the quantity of an image to be added to make a new image.
    alpha = 0.8
    beta = 5.0 - alpha
    # This function helps to add two image with specific weight parameter to get a third image as summation of two image.
    img_final_bin = cv2.addWeighted(verticle_lines_img, alpha, horizontal_lines_img, beta, 0.0)
    img_final_bin = cv2.erode(~img_final_bin, kernel, iterations=2)
    (thresh, img_final_bin) = cv2.threshold(img_final_bin, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)

    # For Debugging
    # Enable this line to see verticle and horizontal lines in the image which is used to find boxes
    ##cv2.imshow("img_final_bin.jpg",img_final_bin)
    # Find contours for image, which will detect all the boxes
    contours, hierarchy = cv2.findContours(
        img_final_bin, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    # Sort all the contours by top to bottom.
    (contours, boundingBoxes) = sort_contours(contours)#, method="top-to-bottom")

    idx = 0
    for c in contours:
        # Returns the location and width,height for every contour
        x, y, w, h = cv2.boundingRect(c)
        #print(x,y,w,h)

        # If the box height is less than 80, widht is <400, then only save it as a box in "cropped/" folder.
        if (w < 400 and h < 80): # and h < 6*w:
            idx += 1
            new_img = img[y:y+h+10, x:x+w+10]
            contours1, hierarchy1 = cv2.findContours(img_final_bin, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
            # Sort all the contours by top to bottom.
            (contours1, boundingBoxes) = sort_contours(contours1, method="top-to-bottom")
            # get the thresholded crop
            retval, thresh_crop = cv2.threshold(new_img, thresh=200, maxval=255, type=cv2.THRESH_BINARY)
            cv2.imwrite(cropped_dir_path+str(idx) + '.png', thresh_crop)

    # For Debugging
    # Enable this line to see all contours.
    # cv2.drawContours(img, contours, -1, (0, 0, 255), 3)
    # cv2.imwrite("./Temp/img_contour.jpg", img)


box_extraction("X:\PDF2IMG\TEST.jpeg", "X:\PDF2IMG\cropped")

#cv2.waitKey(0)
#cv2.destroyAllWindows()

如何才能只得到预期的结果？

我有一张申请表的图片，因为我只想提取被文本框包围的姓名、出生日期、签名和勾号文本框，但我得到的结果以及其他意想不到的结果。

最佳答案

要提取所需区域，我们可以使用矩形框的属性，因为它们可以使用轮廓近似和轮廓区域来隔离。这是一种方法:

将图像转换为灰度、模糊和阈值
执行形态学操作以平滑图像并去除噪声
寻找轮廓
- 使用轮廓近似和轮廓区域进行过滤
- 使用 Numpy 切片提取和保存 ROI

这是检测到的以绿色突出显示的矩形文本框

由于我们有边界框，我们只需提取 ROI

import cv2

image = cv2.imread('1.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5,5), 0)
thresh = cv2.threshold(blur,0,255,cv2.THRESH_OTSU + cv2.THRESH_BINARY_INV)[1]

kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
cnts = cv2.findContours(opening, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]

ROI_number = 0
for c in cnts:
    area = cv2.contourArea(c)
    peri = cv2.arcLength(c, True)
    approx = cv2.approxPolyDP(c, 0.02 * peri, True)
    x,y,w,h = cv2.boundingRect(approx)
    if len(approx) == 4 and (area > 1000) and (area < 80000):
        ROI = image[y:y+h, x:x+w]
        cv2.imwrite('ROI_{}.png'.format(ROI_number), ROI)
        ROI_number += 1

cv2.imshow('thresh', thresh)
cv2.imshow('opening', opening)
cv2.waitKey()

关于python - 从图像中提取矩形文本框，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/58979313/

python - 从图像中提取矩形文本框

上一篇：grails - Grails 2.4.4是否支持SCSS？

下一篇：grails - Grails的KahaDb属性