我正在尝试针对街景门牌号数据集训练 keras CNN。你可以找到项目 here . 问题是在训练过程中,损失和准确度都不会随时间变化。我尝试过使用 1 channel (灰度)图像、RGB(3 channel )图像、更宽(50,50)和更小(28,28)的图像、在卷积层中使用或多或少的过滤器、更宽和更小池化层中的补丁,有和没有丢失,有更大和更小的批处理,优化器的学习步骤越来越小,有不同的优化器,...
训练仍然坚持不断的损失和准确性
这是我准备数据的方式
from PIL import Image
from PIL import ImageFilter
train_folders = 'sv_train/train'
test_folders = 'test'
extra_folders = 'extra'
SV_IMG_SIZE = 28
SV_CHANNELS = 3
train_imsize = np.ndarray([len(train_data),2])
k = 500
sv_images = []
max_images = 20000#len(train_data)
max_digits = 5
sv_labels = np.ones([max_images, max_digits], dtype=int) * 10 # init to 10 cause it would be no digit
nboxes = [[] for i in range(max_images)]
print ("%d to load" % len(train_data))
def getBBox(i,perc):
boxes = train_data[i]['boxes']
x_min=9990
y_min=9990
x_max=0
y_max=0
for bid,b in enumerate(boxes):
x_min = b['left'] if b['left'] <= x_min else x_min
y_min = b['top'] if b['top'] <= y_min else y_min
x_max = b['left']+b['width'] if b['left']+b['width'] >= x_max else x_max
y_max = b['top']+b['height'] if b['top']+b['height'] >= y_max else y_max
dy = y_max-y_min
dx = x_max-x_min
dpy = dy*perc
dpx = dx*perc
nboxes[i]=[dpx,dpy,dx,dy]
return x_min-dpx, y_min-dpy, x_max+dpx, y_max+dpy
for i in range(max_images):
print (" \r%d" % i ,end="")
filename = train_data[i]['filename']
fullname = os.path.join(train_folders, filename)
boxes = train_data[i]['boxes']
label = [10,10,10,10,10]
lb = len(boxes)
if lb <= max_digits:
im = Image.open(fullname)
x_min, y_min, x_max, y_max = getBBox(i,0.3)
im = im.crop([x_min,y_min,x_max,y_max])
owidth, oheight = im.size
wr = SV_IMG_SIZE/float(owidth)
hr = SV_IMG_SIZE/float(oheight)
for bid,box in enumerate(boxes):
sv_labels[i][max_digits-lb+bid] = int(box['label'])
box = nboxes[i]
box[0]*=wr
box[1]*=wr
box[2]*=hr
box[3]*=hr
im = im.resize((SV_IMG_SIZE,SV_IMG_SIZE),Image.ANTIALIAS)
array = np.asarray(im)
array = array.reshape((SV_IMG_SIZE,SV_IMG_SIZE,SV_CHANNELS)).astype(np.float32)
na = np.zeros([SV_IMG_SIZE,SV_IMG_SIZE,SV_CHANNELS],dtype=int)
sv_images.append(array.astype(np.float32))
这是模型
from keras.optimizers import Adam
from keras.utils.np_utils import to_categorical
adam = Adam(lr=0.5)
model = Sequential()
x = Input((SV_IMG_SIZE, SV_IMG_SIZE,SV_CHANNELS))
y = Convolution2D(16, 3, 3, activation='relu', border_mode='same')(x)
y = Convolution2D(32, 3, 3, activation='relu', border_mode='valid')(y)
y = MaxPooling2D((2, 2))(y)
y = Convolution2D(128, 3, 3, activation='relu', border_mode='valid')(y)
y = MaxPooling2D((2, 2))(y)
y = Flatten()(y)
y = Dense(512, activation='relu')(y)
digit1 = Dense(11, activation="softmax")(y)
digit2 = Dense(11, activation="softmax")(y)
digit3 = Dense(11, activation="softmax")(y)
digit4 = Dense(11, activation="softmax")(y)
digit5 = Dense(11, activation="softmax")(y)
model = Model(input=x, output=[digit1, digit2, digit3,digit4,digit5])
model.compile(optimizer=adam,
loss='categorical_crossentropy',
metrics=['accuracy'])
sv_train_labels = [to_categorical(svt_labels[:,0]),
to_categorical(svt_labels[:,1]),
to_categorical(svt_labels[:,2]),
to_categorical(svt_labels[:,3]),
to_categorical(svt_labels[:,4])]
sv_validation_labels = [to_categorical(svv_labels[:,0]),
to_categorical(svv_labels[:,1]),
to_categorical(svv_labels[:,2]),
to_categorical(svv_labels[:,3]),
to_categorical(svv_labels[:,4])]
model.fit(sv_train, sv_train_labels, nb_epoch=50, batch_size=8,validation_data=(sv_validation, sv_validation_labels))
最佳答案
正如我上面的评论,我建议避免训练模型来预测 5 位数字组合。训练模型预测单个数字会更有效率。我尝试基于 Keras 示例构建快速示例 cifar10_cnn.py在 MNIST SHVN format 2 (cropped digits) :
import numpy as np
import scipy.io as sio
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.utils.np_utils import to_categorical
# parameters
nb_epoch = 10
batch_size = 32
# load data
nb_classes = 10
train_data = sio.loadmat('train_32x32.mat')
test_data = sio.loadmat('test_32x32.mat')
X_train = train_data['X'].T / 255
X_test = test_data['X'].T / 255
y_train = to_categorical(train_data['y'] % nb_classes)
y_test = to_categorical(test_data['y'] % nb_classes)
# model
model = Sequential()
model.add(Convolution2D(32, 3, 3, border_mode='same', input_shape=X_train.shape[1:]))
model.add(Activation('relu'))
model.add(Convolution2D(32, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Convolution2D(64, 3, 3, border_mode='same'))
model.add(Activation('relu'))
model.add(Convolution2D(64, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(nb_classes))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
# train
model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, validation_data=(X_test, y_test), shuffle=True)
训练模型后,训练另一个模型到 recognize/extract each number from an image使用诸如 OpenCV 之类的库
关于python - Keras 模型获得恒定的损失和准确性,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/42453546/