c++ - 计算文本 OpenCV 的偏斜

标签 c++ opencv skew

我正在尝试计算图像中文本的倾斜,以便我可以更正它以获得最佳 OCR 结果。

目前这是我正在使用的功能:

double compute_skew(Mat &img)
{

    // Binarize
    cv::threshold(img, img, 225, 255, cv::THRESH_BINARY);

    // Invert colors
    cv::bitwise_not(img, img);

    cv::Mat element = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(5, 3));
    cv::erode(img, img, element);

    std::vector<cv::Point> points;
    cv::Mat_<uchar>::iterator it = img.begin<uchar>();
    cv::Mat_<uchar>::iterator end = img.end<uchar>();
    for (; it != end; ++it)
        if (*it)
            points.push_back(it.pos());

    cv::RotatedRect box = cv::minAreaRect(cv::Mat(points));

    double angle = box.angle;
    if (angle < -45.)
        angle += 90.;

    cv::Point2f vertices[4];
    box.points(vertices);
    for(int i = 0; i < 4; ++i)
        cv::line(img, vertices[i], vertices[(i + 1) % 4], cv::Scalar(255, 0, 0), 1, CV_AA);

    return angle;
}

当我在调试中查看 then 角度时,我得到 0.000000 enter image description here

然而,当我给它这个图像时,我得到了大约 16 度倾斜的正确结果:

enter image description here

如何正确检测第一张图片中的倾斜?

最佳答案

还有一些其他方法可以获取倾斜度,1) 通过霍夫变换 2) 通过水平投影剖面。在不同的角度箱中旋转图像并计算水平投影。具有最大水平直方图值的角度是校正角度。

我在下面提供了 1) 的实现。我相信这优于您正在使用的装箱方法,因为它要求您完全清除图像中的任何噪声,这在大多数情况下是不可能的。

您应该知道,如果噪音太大,该方法将无法正常工作。您可以根据要将哪种类型的“线”视为图像中最主要的“线”,以不同的方式减少噪点。我为此提供了两种方法。一定要玩参数和阈值等。

结果(全部使用 preprocess2 运行,全部使用相同的参数集运行)

代码

#include <opencv2/opencv.hpp>
using namespace cv;
using namespace std;

void hough_transform(Mat& im,Mat& orig,double* skew)
{
    double max_r=sqrt(pow(.5*im.cols,2)+pow(.5*im.rows,2));
    int angleBins = 180;
    Mat acc = Mat::zeros(Size(2*max_r,angleBins),CV_32SC1);
    int cenx = im.cols/2;
    int ceny = im.rows/2;
    for(int x=1;x<im.cols-1;x++)
    {
        for(int y=1;y<im.rows-1;y++)
        {
            if(im.at<uchar>(y,x)==255)
            {
                for(int t=0;t<angleBins;t++)
                {
                    double r =(x-cenx)*cos((double)t/angleBins*CV_PI)+(y-ceny)*sin((double)t    /angleBins*CV_PI);
                    r+=max_r;
                    acc.at<int>(t,int(r))++;
                }
            }
        }
    }
    Mat thresh;
    normalize(acc,acc,255,0,NORM_MINMAX);
    convertScaleAbs(acc,acc);
    /*debug
    Mat cmap;
    applyColorMap(acc,cmap,COLORMAP_JET);
    imshow("cmap",cmap);
    imshow("acc",acc);*/

    Point maxLoc;
    minMaxLoc(acc,0,0,0,&maxLoc);
    double theta = (double)maxLoc.y/angleBins*CV_PI;
    double rho = maxLoc.x-max_r;
    if(abs(sin(theta))<0.000001)//check vertical
    {
        //when vertical, line equation becomes
        //x = rho
        double m = -cos(theta)/sin(theta);
        Point2d p1 = Point2d(rho+im.cols/2,0);
        Point2d p2 = Point2d(rho+im.cols/2,im.rows);
        line(orig,p1,p2,Scalar(0,0,255),1);
        *skew=90;
        cout<<"skew angle "<<" 90"<<endl;
    }else
    {
        //convert normal form back to slope intercept form
        //y = mx + b
        double m = -cos(theta)/sin(theta);
        double b = rho/sin(theta)+im.rows/2.-m*im.cols/2.;
        Point2d p1 = Point2d(0,b);
        Point2d p2 = Point2d(im.cols,im.cols*m+b);
        line(orig,p1,p2,Scalar(0,0,255),1);
        double skewangle;
        skewangle= p1.x-p2.x>0? (atan2(p1.y-p2.y,p1.x-p2.x)*180./CV_PI):(atan2(p2.y-p1.y,p2.    x-p1.x)*180./CV_PI);
        *skew=skewangle;
        cout<<"skew angle "<<skewangle<<endl;
    }
    imshow("orig",orig);
}

Mat preprocess1(Mat& im)
{
    Mat ret = Mat::zeros(im.size(),CV_32SC1);

    for(int x=1;x<im.cols-1;x++)
    {
        for(int y=1;y<im.rows-1;y++)
        {

            int gy = (im.at<uchar>(y-1,x+1)-im.at<uchar>(y-1,x-1))
                +2*(im.at<uchar>(y,x+1)-im.at<uchar>(y,x-1))
                +(im.at<uchar>(y+1,x+1)-im.at<uchar>(y+1,x-1));
            int gx = (im.at<uchar>(y+1,x-1) -im.at<uchar>(y-1,x-1))
                +2*(im.at<uchar>(y+1,x)-im.at<uchar>(y-1,x))
                +(im.at<uchar>(y+1,x+1)-im.at<uchar>(y-1,x+1));
            int g2 = (gy*gy + gx*gx);
            ret.at<int>(y,x)=g2;
        }
    }
    normalize(ret,ret,255,0,NORM_MINMAX);
    ret.convertTo(ret,CV_8UC1);
    threshold(ret,ret,50,255,THRESH_BINARY);
    return ret;
}

Mat preprocess2(Mat& im)
{
    // 1) assume white on black and does local thresholding
    // 2) only allow voting top is white and buttom is black(buttom text line)
    Mat thresh;
    //thresh=255-im;
    thresh=im.clone();
    adaptiveThreshold(thresh,thresh,255,CV_ADAPTIVE_THRESH_GAUSSIAN_C,THRESH_BINARY,15,-2);
    Mat ret = Mat::zeros(im.size(),CV_8UC1);
    for(int x=1;x<thresh.cols-1;x++)
    {
        for(int y=1;y<thresh.rows-1;y++)
        {
            bool toprowblack = thresh.at<uchar>(y-1,x)==0 ||  thresh.at<uchar>(y-1,x-1)==0     || thresh.at<uchar>(y-1,x+1)==0;
            bool belowrowblack = thresh.at<uchar>(y+1,x)==0 ||  thresh.at<uchar>(y+1,    x-1)==0 || thresh.at<uchar>(y+1,x+1)==0;

            uchar pix=thresh.at<uchar>(y,x);
            if((!toprowblack && pix==255 && belowrowblack))
            {
                ret.at<uchar>(y,x) = 255;
            }
        }
    }
    return ret;
}
Mat rot(Mat& im,double thetaRad)
{
    cv::Mat rotated;
    double rskew = thetaRad* CV_PI/180;
    double nw = abs(sin(thetaRad))*im.rows+abs(cos(thetaRad))*im.cols;
    double nh = abs(cos(thetaRad))*im.rows+abs(sin(thetaRad))*im.cols;
    cv::Mat rot_mat = cv::getRotationMatrix2D(Point2d(nw*.5,nh*.5), thetaRad*180/CV_PI, 1);
    Mat pos = Mat::zeros(Size(1,3),CV_64FC1);
    pos.at<double>(0)=(nw-im.cols)*.5;
    pos.at<double>(1)=(nh-im.rows)*.5;
    Mat res = rot_mat*pos;
    rot_mat.at<double>(0,2) += res.at<double>(0);
    rot_mat.at<double>(1,2) += res.at<double>(1);
    cv::warpAffine(im, rotated, rot_mat,Size(nw,nh), cv::INTER_LANCZOS4);
    return rotated;
}

int main(int argc, char** argv)
{
    string src="C:/data/skew.png";
    Mat im= imread(src);
    Mat gray;
    cvtColor(im,gray,CV_BGR2GRAY);

    Mat preprocessed = preprocess2(gray);
    imshow("preprocessed2",preprocessed);
    double skew;
    hough_transform(preprocessed,im,&skew);
    Mat rotated = rot(im,skew* CV_PI/180);
    imshow("corrected",rotated);

    waitKey(0);
    return 0;
}

关于c++ - 计算文本 OpenCV 的偏斜,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/24046089/

相关文章:

c++ - const char** x 和 const char* x[] 有何相似之处?

c++ - 我从哪里获得适用于 VS21015 的正确 gdi+ c++ 包装器?

c++ - 我们如何确认内在矩阵是正确的?

c++ - opencv中的偏斜检测和减少

c++ - 在每个元音处添加字符

c++ - 使用 C++Builder 后期绑定(bind) COM 对象

opencv - 如何针对低分辨率/模糊素材优化 OpenCV 的 Canny 边缘检测

visual-c++ - 从 MinGW 的 MSVC DLL 调用函数

javascript - 布料模拟+图像上的 Canvas 变换

css - 如何倾斜 ul li 但不倾斜 ul li a 中的 <a>?