python - 在 python 中使用 opencv 检测低对比度图像中的正方形,以便通过 tesseract 读取

标签 python opencv tesseract

我想检测像这样的图像中的标签,以便使用 tesseract 提取文本。我尝试了各种阈值组合和使用边缘检测。但是我一次最多只能检测大约一半的标签。这些是我一直试图从中读取标签的一些图像:

enter image description here

enter image description here

所有标签都具有相同的纵横比(宽度是高度的 3.5 倍),因此我试图找到具有相同纵横比的 minAreaRect 的轮廓。困难的部分是在较浅的背景上处理标签。这是我到目前为止的代码:

from PIL import Image
import pytesseract
import numpy as np
import argparse
import cv2
import os

ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True,
    help="path to input image to be OCR'd")
args = vars(ap.parse_args())

#function to crop an image to a minAreaRect
def crop_minAreaRect(img, rect):
    # rotate img
    angle = rect[2]
    rows,cols = img.shape[0], img.shape[1]
    M = cv2.getRotationMatrix2D((cols/2,rows/2),angle,1)
    img_rot = cv2.warpAffine(img,M,(cols,rows))

    # rotate bounding box
    rect0 = (rect[0], rect[1], 0.0)
    box = cv2.boxPoints(rect)
    pts = np.int0(cv2.transform(np.array([box]), M))[0] 
    pts[pts < 0] = 0

    # crop
    img_crop = img_rot[pts[1][1]:pts[0][1], 
                       pts[1][0]:pts[2][0]]

    return img_crop




# load image and apply threshold
image = cv2.imread(args["image"])
bw = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
#bw = cv2.threshold(bw, 210, 255, cv2.THRESH_BINARY)[1]
bw = cv2.adaptiveThreshold(bw, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 27, 20)
#do edge detection
v = np.median(bw)
sigma = 0.5
lower = int(max(0, (1.0 - sigma) * v))
upper = int(min(255, (1.0 + sigma) * v))
bw = cv2.Canny(bw, lower, upper)
kernel = np.ones((5,5), np.uint8)
bw = cv2.dilate(bw,kernel,iterations=1)

#find contours
image2, contours, hierarchy = cv2.findContours(bw,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
bw = cv2.drawContours(bw,contours,0,(0,0,255),2)
cv2.imwrite("edge.png", bw)

#test which contours have the correct aspect ratio
largestarea = 0.0
passes = []
for contour in contours:
    (x,y),(w,h),a = cv2.minAreaRect(contour)
    if h > 20 and w > 20:
        if h > w:
            maxdim = h
            mindim = w
        else:
            maxdim = w
            mindim = h
        ratio = maxdim/mindim
        print("ratio: {}".format(ratio))
        if (ratio > 3.4 and ratio < 3.6):
            passes.append(contour)
if not passes:
    print "no passes"
    exit()

passboxes = []
i = 1

#crop out each label and attemp to extract text
for ps in passes:
    rect = cv2.minAreaRect(ps)
    bw = crop_minAreaRect(image, rect)
    cv2.imwrite("{}.png".format(i), bw)
    i += 1
    h, w = bw.shape[:2]
    print str(h) + "x" + str(w)
    if w and h:
        bw = cv2.cvtColor(bw, cv2.COLOR_BGR2GRAY)
        bw = cv2.threshold(bw, 50, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
        cv2.imwrite("output.png", bw)
        im = Image.open("output.png")
        w, h = im.size
        print "W:{} H:{}".format(w,h)
        if h > w:
            print ("rotating")
            im.rotate(90)
            im.save("output.png")
        print pytesseract.image_to_string(Image.open("output.png"))
        im.rotate(180)
        im.save("output.png")
        print pytesseract.image_to_string(Image.open("output.png"))
        box = cv2.boxPoints(cv2.minAreaRect(ps))
        passboxes.append(np.int0(box))
        im.close()

cnts = cv2.drawContours(image,passboxes,0,(0,0,255),2)
cnts = cv2.drawContours(cnts,contours,-1,(255,255,0),2)
cnts = cv2.drawContours(cnts, passes, -1, (0,255,0), 3)
cv2.imwrite("output2.png", image)

我相信我遇到的问题可能是阈值参数。或者我可能把这个复杂化了。

最佳答案

只有带有“A-08337”之类的白色标签?以下代码在两张图片上检测到所有这些:

import numpy as np
import cv2

img = cv2.imread('labels.jpg')

#downscale the image because Canny tends to work better on smaller images
w, h, c = img.shape
resize_coeff = 0.25
img = cv2.resize(img, (int(resize_coeff*h), int(resize_coeff*w)))

#find edges, then contours
canny = cv2.Canny(img, 100, 200)
_, contours, _ = cv2.findContours(canny, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

#draw the contours, do morphological close operation
#to close possible small gaps, then find contours again on the result
w, h, c = img.shape
blank = np.zeros((w, h)).astype(np.uint8)
cv2.drawContours(blank, contours, -1, 1, 1)
blank = cv2.morphologyEx(blank, cv2.MORPH_CLOSE, np.ones((3, 3), np.uint8))
_, contours, _ = cv2.findContours(blank, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

#keep only contours of more or less correct area and perimeter
contours = [c for c in contours if 800 < cv2.contourArea(c) < 1600]
contours = [c for c in contours if cv2.arcLength(c, True) < 200]
cv2.drawContours(img, contours, -1, (0, 0, 255), 1)

cv2.imwrite("contours.png", img)

可能通过一些额外的凸性检查,您可以摆脱“Verbatim”等高线等(例如,只保留其面积与凸包面积之间几乎为零的差异的等高线)。 enter image description here

enter image description here

关于python - 在 python 中使用 opencv 检测低对比度图像中的正方形,以便通过 tesseract 读取,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/45202770/

相关文章:

Python 字符串的所有可能组合

python - 字符串的近似周期 - 将 Python 代码移植到 F#

c++ - 'catkin_make' 期间 ROS hydro opencv2 链接错误

c# - 使用 Tesseract Engine 的 Tessnet2 - 为什么输出非常糟糕?

python - pytesseract 输出未定义

java - 将输入流转换为文件

python - 如何使用 Python @singledispatch 注册 Typing.Callable?

python - tf.app.flags 是做什么的?为什么我们需要那个?

c++ - 使用 OpenCV 偏移图像的最佳方法是什么?

c++ - 如何使用 QDataStream 在文件中写入和读取 QList<Mat>?