我想将手写的阿拉伯语单词从图片中分割成字符。我可以遵循哪种算法来实现这一点?我如何使用 python 或 matLab 做到这一点?或者我可以使用直方图方法吗?另外,我如何在 python 或 matlab 中实现它?请我需要帮助和简单明了的方法,以便我可以遵循它。
我想分割图像以获得此结果(图片中的每个字符)
这是我在 github 中找到的代码,用于英文草书手写:
import numpy as np
import cv2 as cv
from matplotlib import pyplot as plt
import pandas as pd
import matplotlib as mpl
import traceback
mpl.rcParams['legend.fontsize'] = 10
pd.set_option('display.expand_frame_repr', False)
fn=0
path='result/'
#Taking any image from the sample images
#In case of slanted image, straighten it using image-straighten.py, then use it
img = cv.imread('sample_images/7.PNG')
# In[findFeaturPoints]
def findCapPoints(img):
cpoints=[]
dpoints=[]
for i in range(img.shape[1]):
col = img[:,i:i+1]
k = col.shape[0]
while k > 0:
if col[k-1]==255:
dpoints.append((i,k))
break
k-=1
for j in range(col.shape[0]):
if col[j]==255:
cpoints.append((i,j))
break
return cpoints,dpoints
# In[wordSegment]
#*****************************************************************************#
def wordSegment(textLines):
wordImgList=[]
counter=0
cl=0
for txtLine in textLines:
gray = cv.cvtColor(txtLine, cv.COLOR_BGR2GRAY)
th, threshed = cv.threshold(gray, 100, 255, cv.THRESH_BINARY_INV|cv.THRESH_OTSU)
final_thr = cv.dilate(threshed,None,iterations = 20)
plt.imshow(final_thr)
plt.show()
contours, hierarchy = cv.findContours(final_thr,cv.RETR_EXTERNAL,cv.CHAIN_APPROX_SIMPLE)
boundingBoxes = [cv.boundingRect(c) for c in contours]
(contours, boundingBoxes) = zip(*sorted(zip(contours, boundingBoxes), key=lambda b: b[1][0], reverse=False))
for cnt in contours:
area = cv.contourArea(cnt)
# print area
if area > 10000:
print ('Area= ',area)
x,y,w,h = cv.boundingRect(cnt)
print (x,y,w,h)
letterBgr = txtLine[0:txtLine.shape[1],x:x+w]
wordImgList.append(letterBgr)
cv.imwrite("result/words/" + str(counter) +".jpg",letterBgr)
counter=counter+1
cl=cl+1
return wordImgList
#*****************************************************************************#
# In[fitToSize]
#*****************************************************************************#
def fitToSize(thresh1):
mask = thresh1 > 0
coords = np.argwhere(mask)
x0, y0 = coords.min(axis=0)
x1, y1 = coords.max(axis=0) + 1 # slices are exclusive at the top
cropped = thresh1[x0:x1,y0:y1]
return cropped
#*****************************************************************************#
# In[lineSegment]
#*****************************************************************************#
def lineSegment(img):
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
th, threshed = cv.threshold(gray, 127, 255, cv.THRESH_BINARY_INV|cv.THRESH_OTSU)
upper=[]
lower=[]
flag=True
for i in range(threshed.shape[0]):
col = threshed[i:i+1,:]
cnt=0
if flag:
cnt=np.count_nonzero(col == 255)
if cnt >0:
upper.append(i)
flag=False
else:
cnt=np.count_nonzero(col == 255)
if cnt <2:
lower.append(i)
flag=True
textLines=[]
if len(upper)!= len(lower):lower.append(threshed.shape[0])
# print upper
# print lower
for i in range(len(upper)):
timg=img[upper[i]:lower[i],0:]
if timg.shape[0]>5:
# plt.imshow(timg)
# plt.show()
timg=cv.resize(timg,((timg.shape[1]*5,timg.shape[0]*8)))
textLines.append(timg)
return textLines
#*****************************************************************************#
# In[baselines]:
##******************************************************************************#
def baselines(letter2, upoints, dpoints):
##-------------------------Creating upper baseline-------------------------------##
colu = []
for i in range(len(upoints)):
colu.append(upoints[i][1])
maxyu = max(colu)
minyu = min(colu)
avgu = (maxyu + minyu) // 2
meanu = np.around(np.mean(colu)).astype(int)
print('Upper:: Max, min, avg, mean:: ',maxyu, minyu, avgu, meanu)
##-------------------------------------------------------------------------------##
##-------------------------Creating lower baseline process 1--------------------------##
cold = []
for i in range(len(dpoints)):
cold.append(dpoints[i][1])
maxyd = max(cold)
minyd = min(cold)
avgd = (maxyd + minyd) // 2
meand = np.around(np.mean(cold)).astype(int)
print('Lower:: Max, min, avg, mean:: ',maxyd, minyd, avgd, meand)
##-------------------------------------------------------------------------------##
##-------------------------Creating lower baseline process 2---------------------------##
cn = []
count = 0
for i in range(h):
for j in range(w):
if(letterGray[i,j] == 255):
count+=1
if(count != 0):
cn.append(count)
count = 0
maxindex = cn.index(max(cn))
print('Max pixels at: ',maxindex)
##------------------Printing upper and lower baselines-----------------------------##
cv.line(letter2,(0,meanu),(w,meanu),(255,0,0),2)
lb = 0
if(maxindex > meand):
lb = maxindex
cv.line(letter2,(0,maxindex),(w,maxindex),(255,0,0),2)
else:
lb = meand
cv.line(letter2,(0,meand),(w,meand),(255,0,0),2)
plt.imshow(letter2)
plt.show()
return meanu, lb
##******************************************************************************###
# In[histogram]:
##*******************************************************************************###
def histogram(letter2, upper_baseline, lower_baseline):
##------------Making Histograms (Default)------------------------######
cropped = letter2[upper_baseline:lower_baseline,0:w]
plt.imshow(cropped)
plt.show()
colcnt = np.sum(cropped==255, axis=0)
x = list(range(len(colcnt)))
plt.plot(colcnt)
plt.fill_between(x, colcnt, 1, facecolor='blue', alpha=0.5)
plt.show()
return colcnt
####---------------------------------------------------------------------------#####
# In[Visualize]:
##*******************************************************************************###
def visualize(letter2, upper_baseline, lower_baseline, min_pixel_threshold, min_separation_threshold, min_round_letter_threshold):
seg = []
seg1 = []
seg2 = []
## Check if pixel count is less than min_pixel_threshold, add segmentation point
for i in range(len(colcnt)):
if(colcnt[i] < min_pixel_threshold):
seg1.append(i)
## Check if 2 consequtive seg points are greater than min_separation_threshold in distance
for i in range(len(seg1)-1):
if(seg1[i+1]-seg1[i] > min_separation_threshold):
seg2.append(seg1[i])
##------------Modified segmentation for removing circles----------------------------###
arr=[]
for i in (seg2):
arr1 = []
j = upper_baseline
while(j <= lower_baseline):
if(letterGray[j,i] == 255):
arr1.append(1)
else:
arr1.append(0)
j+=1
arr.append(arr1)
print('At arr Seg here: ', seg2)
ones = []
for i in (arr):
ones1 = []
for j in range(len(i)):
if (i[j] == 1):
ones1.append([j])
ones.append(ones1)
diffarr = []
for i in (ones):
diff = i[len(i)-1][0] - i[0][0]
diffarr.append(diff)
print('Difference array: ',diffarr)
for i in range(len(seg2)):
if(diffarr[i] < min_round_letter_threshold):
seg.append(seg2[i])
##---------------------------------------------------------------------------##
## Make the Cut
for i in range(len(seg)):
letter3 = cv.line(letter2,(seg[i],0),(seg[i],h),(255,0,0),2)
print("Does it work::::")
plt.imshow(letter3)
plt.show()
return seg
###---------------------------------------------------------------------------#####
# In[segmentCharacters]
def segmentCharacters(seg,lettergray):
s=0
wordImgList = []
global fn
for i in range(len(seg)):
if i==0:
s=seg[i]
if s > 15:
wordImg = lettergray[0:,0:s]
cntx=np.count_nonzero(wordImg == 255)
print ('count',cntx)
plt.imshow(wordImg)
plt.show()
fn=fn+1
else:
continue
elif (i != (len(seg)-1)):
if seg[i]-s > 15:
wordImg = lettergray[0:,s:seg[i]]
cntx=np.count_nonzero(wordImg == 255)
print ('count',cntx)
plt.imshow(wordImg)
plt.show()
fn=fn+1
s=seg[i]
else:
continue
else:
wordImg = lettergray[0:,seg[len(seg)-1]:]
cntx=np.count_nonzero(wordImg == 255)
print ('count',cntx)
plt.imshow(wordImg)
plt.show()
fn=fn+1
wordImgList.append(wordImg)
return wordImgList
#*****************************************************************************#
# In[Main]:
try:
textLines=lineSegment(img)
print ('No. of Lines',len(textLines))
imgList=wordSegment(textLines)
print ('No. of Words',len(imgList))
counter = 0
for letterGray in imgList:
print ('LetterGray shape: ',letterGray.shape)
gray = cv.cvtColor(letterGray, cv.COLOR_BGR2GRAY)
th, letterGray = cv.threshold(gray, 127, 255, cv.THRESH_BINARY_INV|cv.THRESH_OTSU)
letterGray = fitToSize(letterGray)
letter2 = letterGray.copy()
letterGray = cv.dilate(letterGray,None,iterations = 4)
h = letterGray.shape[0]
w = letterGray.shape[1]
upoints, dpoints=findCapPoints(letterGray)
meanu, lb = baselines(letter2, upoints, dpoints)
##-----------Final Baseline row numbers-----------------------####
# Ignore all points avove and below these rows
upper_baseline = meanu
lower_baseline = lb
##--------------------Make histogram-------------------------------------###
colcnt = histogram(letter2, upper_baseline, lower_baseline)
###------------------------Visualize segmentation------------------------------#####
## Tuning Parameters
min_pixel_threshold = 80
min_separation_threshold = 60
min_round_letter_threshold = 500
seg = visualize(letter2, upper_baseline, lower_baseline, min_pixel_threshold, min_separation_threshold, min_round_letter_threshold)
wordImgList = segmentCharacters(seg,letterGray)
for i in wordImgList:
cv.imwrite("result/characters/" + str(counter) +".jpeg",i)
counter=counter+1
###---------------------------------------------------------------------------#####
print('Original Image')
plt.imshow(img)
plt.show()
except Exception as e:
print ('Error Message ',e)
cv.destroyAllWindows()
traceback.print_exc()
pass
traceback.print_exc()
问题是我找不到分割阿拉伯语单词的最佳参数:这是我得到的直方图,我想在曲线的每一端准确裁剪:
提前致谢
最佳答案
好的,英文草书就是一个很好的例子,但你必须用阿拉伯文字的图像来训练它。
一个好主意:下载阿拉伯字符的图像数据集。这个会很棒:https://www.kaggle.com/mloey1/ahcd1
然后,训练一个物体检测机器来使用它。它应该能够在阿拉伯语单词或句子的图像中指出正在使用哪些字符。然后,您可以为每个字符使用边界框的尺寸,并为每个字符输出一个图像,裁剪为该字符。
关于python - 逐字分割阿拉伯语单词python,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/60480915/