引言:
過去幾周我一直在涉足深度學習領域,尤其是卷積神經網絡模型。最近,谷歌圍繞街景多位數字識別技術發布了一篇不錯的paper。該文章描述了一個用于提取街景門牌號的單個端到端神經網絡系統。然后,作者闡述了基于同樣的網絡結構如何來突破谷歌驗證碼識別系統的準確率。
為了親身體驗神經網絡的實現,我決定嘗試設計一個可以解決類似問題的系統:國內車牌號自動識別系統。設計這樣一個系統的原因有3點:
- 我應該能夠參照谷歌那篇paper搭建一個同樣的或者類似的網絡架構:谷歌提供的那個網絡架構在驗證碼識別上相當不錯,那么講道理的話,用它來識別車牌號應該也會很給力。擁有一個知名的網絡架構將會大大地簡化我學習CNN的步驟。
- 我可以很容易地生成訓練數據(車牌數)。訓練神經網絡存在一個很大的問題就是需要大量的標簽樣本。通常要訓練好一個網絡就需要幾十萬張標記過的圖片。?
- 好奇心。傳統的車牌號自動識別系統依賴于自己編寫算法來實現車牌定位,標準化,分割和字符識別等功能。照這樣的話,實現這些系統的代碼可能達到上千行。然而,我比較感興趣的是,如何使用相對較少的代碼和最少的專業領域知識來開發一個不錯的系統。
開發該項目的環境要求有Python,Tensorflow,OpenCV和NumPy等軟件。源代碼在這里。
首先項目結構及成果圖:
合成圖片
genplate.py
為了訓練任何一個神經網絡,必須提供一套擁有正確輸出的訓練數據。?
?
文本和車牌背景國內正常的車牌格式,但是文本顏色必須比車牌顏色更深一些。這是為了模擬真實場景的光線變化。最后再加入一些噪音,這樣不僅能夠解釋真實傳感器的噪音,而且能夠避免過多依賴于銳化的輪廓邊界而看到的將會是離焦的輸入圖片。font目錄導入了字體包、image導入了車牌背景及噪聲圖片。
?
車牌變換采用了一種基于隨機滾轉、傾斜、偏轉、平移以及縮放的仿射變換。每個參數允許的范圍是車牌號可能被看到的所有情況的集合。比如,偏轉比滾轉允許變化更多(你更可能看到一輛汽車在拐彎而不是翻轉到一邊)。
import PIL
from PIL import ImageFont
from PIL import Image
from PIL import ImageDraw
import cv2;
import numpy as np;
import os;
from math import *# 生成車牌
# font = ImageFont.truetype("Arial-Bold.ttf",14)index = {"京": 0, "滬": 1, "津": 2, "渝": 3, "冀": 4, "晉": 5, "蒙": 6, "遼": 7, "吉": 8, "黑": 9, "蘇": 10, "浙": 11, "皖": 12,"閩": 13, "贛": 14, "魯": 15, "豫": 16, "鄂": 17, "湘": 18, "粵": 19, "桂": 20, "瓊": 21, "川": 22, "貴": 23, "云": 24,"藏": 25, "陜": 26, "甘": 27, "青": 28, "寧": 29, "新": 30, "0": 31, "1": 32, "2": 33, "3": 34, "4": 35, "5": 36,"6": 37, "7": 38, "8": 39, "9": 40, "A": 41, "B": 42, "C": 43, "D": 44, "E": 45, "F": 46, "G": 47, "H": 48,"J": 49, "K": 50, "L": 51, "M": 52, "N": 53, "P": 54, "Q": 55, "R": 56, "S": 57, "T": 58, "U": 59, "V": 60,"W": 61, "X": 62, "Y": 63, "Z": 64};chars = ["京", "滬", "津", "渝", "冀", "晉", "蒙", "遼", "吉", "黑", "蘇", "浙", "皖", "閩", "贛", "魯", "豫", "鄂", "湘", "粵", "桂","瓊", "川", "貴", "云", "藏", "陜", "甘", "青", "寧", "新", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A","B", "C", "D", "E", "F", "G", "H", "J", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "U", "V", "W", "X","Y", "Z"];def AddSmudginess(img, Smu):rows = r(Smu.shape[0] - 50)cols = r(Smu.shape[1] - 50)adder = Smu[rows:rows + 50, cols:cols + 50];adder = cv2.resize(adder, (50, 50));# adder = cv2.bitwise_not(adder)img = cv2.resize(img,(50,50))img = cv2.bitwise_not(img)img = cv2.bitwise_and(adder, img)img = cv2.bitwise_not(img)return img
def rot(img,angel,shape,max_angel):""" 使圖像輕微的畸變img 輸入圖像factor 畸變的參數size 為圖片的目標尺寸"""size_o = [shape[1],shape[0]]size = (shape[1]+ int(shape[0]*cos((float(max_angel )/180) * 3.14)),shape[0])interval = abs( int( sin((float(angel) /180) * 3.14)* shape[0]));pts1 = np.float32([[0,0],[0,size_o[1]],[size_o[0],0],[size_o[0],size_o[1]]])if(angel>0):pts2 = np.float32([[interval,0],[0,size[1] ],[size[0],0 ],[size[0]-interval,size_o[1]]])else:pts2 = np.float32([[0,0],[interval,size[1] ],[size[0]-interval,0 ],[size[0],size_o[1]]])M = cv2.getPerspectiveTransform(pts1,pts2);dst = cv2.warpPerspective(img,M,size);return dst;
def rotRandrom(img, factor, size):shape = size;pts1 = np.float32([[0, 0], [0, shape[0]], [shape[1], 0], [shape[1], shape[0]]])pts2 = np.float32([[r(factor), r(factor)], [ r(factor), shape[0] - r(factor)], [shape[1] - r(factor), r(factor)],[shape[1] - r(factor), shape[0] - r(factor)]])M = cv2.getPerspectiveTransform(pts1, pts2);dst = cv2.warpPerspective(img, M, size);return dst;
def tfactor(img):hsv = cv2.cvtColor(img,cv2.COLOR_BGR2HSV);hsv[:,:,0] = hsv[:,:,0]*(0.8+ np.random.random()*0.2);hsv[:,:,1] = hsv[:,:,1]*(0.3+ np.random.random()*0.7);hsv[:,:,2] = hsv[:,:,2]*(0.2+ np.random.random()*0.8);img = cv2.cvtColor(hsv,cv2.COLOR_HSV2BGR);return img
def random_envirment(img,data_set):index=r(len(data_set))env = cv2.imread(data_set[index])env = cv2.resize(env,(img.shape[1],img.shape[0]))bak = (img==0);bak = bak.astype(np.uint8)*255;inv = cv2.bitwise_and(bak,env)img = cv2.bitwise_or(inv,img)return img
def GenCh(f,val):img=Image.new("RGB", (45,70),(255,255,255))draw = ImageDraw.Draw(img)draw.text((0, 3),val,(0,0,0),font=f)img = img.resize((23,70))A = np.array(img)return A
def GenCh1(f,val):img=Image.new("RGB", (23,70),(255,255,255))draw = ImageDraw.Draw(img)#draw.text((0, 2),val.decode('utf-8'),(0,0,0),font=f)draw.text((0, 2),val,(0,0,0),font=f)A = np.array(img)return A
def AddGauss(img, level):return cv2.blur(img, (level * 2 + 1, level * 2 + 1));def r(val):return int(np.random.random() * val)def AddNoiseSingleChannel(single):diff = 255-single.max();noise = np.random.normal(0,1+r(6),single.shape);noise = (noise - noise.min())/(noise.max()-noise.min())noise= diff*noise;noise= noise.astype(np.uint8)dst = single + noisereturn dstdef addNoise(img,sdev = 0.5,avg=10):img[:,:,0] = AddNoiseSingleChannel(img[:,:,0]);img[:,:,1] = AddNoiseSingleChannel(img[:,:,1]);img[:,:,2] = AddNoiseSingleChannel(img[:,:,2]);return img;class GenPlate:def __init__(self,fontCh,fontEng,NoPlates):self.fontC = ImageFont.truetype(fontCh,43,0);self.fontE = ImageFont.truetype(fontEng,60,0);self.img=np.array(Image.new("RGB", (226,70),(255,255,255)))self.bg = cv2.resize(cv2.imread("./images/template.bmp"),(226,70));self.smu = cv2.imread("./images/smu2.jpg");self.noplates_path = [];for parent,parent_folder,filenames in os.walk(NoPlates):for filename in filenames:path = parent+"/"+filename;self.noplates_path.append(path);def draw(self,val):offset= 2 ;self.img[0:70,offset+8:offset+8+23]= GenCh(self.fontC,val[0]);self.img[0:70,offset+8+23+6:offset+8+23+6+23]= GenCh1(self.fontE,val[1]);for i in range(5):base = offset+8+23+6+23+17 +i*23 + i*6 ;self.img[0:70, base : base+23]= GenCh1(self.fontE,val[i+2]);return self.imgdef generate(self,text):if len(text) == 7:fg = self.draw(text.encode('utf-8').decode(encoding="utf-8"));fg = cv2.bitwise_not(fg);com = cv2.bitwise_or(fg,self.bg);com = rot(com,r(60)-30,com.shape,30);com = rotRandrom(com,10,(com.shape[1],com.shape[0]));#com = AddSmudginess(com,self.smu)com = tfactor(com)com = random_envirment(com,self.noplates_path);com = AddGauss(com, 1+r(4));com = addNoise(com);return comdef genPlateString(self,pos,val):plateStr = "";box = [0,0,0,0,0,0,0];if(pos!=-1):box[pos]=1;for unit,cpos in zip(box,range(len(box))):if unit == 1:plateStr += valelse:if cpos == 0:plateStr += chars[r(31)]elif cpos == 1:plateStr += chars[41+r(24)]else:plateStr += chars[31 + r(34)]return plateStr;def genBatch(self, batchSize,pos,charRange, outputPath,size):if (not os.path.exists(outputPath)):os.mkdir(outputPath)for i in range(batchSize):plateStr = G.genPlateString(-1,-1)img = G.generate(plateStr);img = cv2.resize(img,size);cv2.imwrite(outputPath + "/" + str(i).zfill(2) + ".jpg", img);# return img
G = GenPlate("./font/platech.ttf",'./font/platechar.ttf',"./NoPlates")
G.genBatch(15,2,range(31,65),"./plate",(272,72)) #注釋原因為每次其他模塊運行,若導入該庫,都會刷性該函數
inputdata.py 省略....
產生用于訓練的數據
# 批量生成車牌,供算法調用
?
?
網絡結構? # 算法的核心,卷積神經網絡
使用的卷積神經網絡結構如model.py所示:
import tensorflow as tf
import numpy as np# 算法的核心,卷積神經網絡
def inference (images,keep_prob):'''Build the modelArgs:image: image batch,4D tensor,tf.float32,[batch_size,height,width,channels]Returns:output tensor with the computed logits,float,[batch_size,65]'''# conv1with tf.variable_scope('conv1') as scope:weights = tf.get_variable('weights',shape = [3,3,3,32],dtype = tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.1,dtype=tf.float32))conv = tf.nn.conv2d(images,weights,strides=[1,1,1,1],padding='VALID')biases = tf.get_variable('biases',shape=[32],dtype=tf.float32,initializer=tf.constant_initializer(0.1))pre_activation = tf.nn.bias_add(conv,biases)conv1 = tf.nn.relu(pre_activation,name= scope.name)# conv2with tf.variable_scope('conv2') as scope:weights = tf.get_variable('weights',shape=[3,3,32,32],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.1,dtype=tf.float32))conv = tf.nn.conv2d(conv1,weights,strides=[1,1,1,1],padding='VALID')biases = tf.get_variable('biases',shape=[32],dtype=tf.float32,initializer=tf.constant_initializer(0.1))pre_activation = tf.nn.bias_add(conv,biases)conv2 = tf.nn.relu(pre_activation,name= scope.name)with tf.variable_scope('max_pooling1') as scope:pool1 = tf.nn.max_pool(conv2,ksize = [1,2,2,1],strides= [1,2,2,1],padding='VALID',name='pooling1')#conv3with tf.variable_scope('conv3') as scope:weights = tf.get_variable('weights',shape=[3,3,32,64],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.1,dtype=tf.float32))conv = tf.nn.conv2d(pool1,weights,strides=[1,1,1,1],padding='VALID')biases = tf.get_variable('biases',shape=[64],dtype = tf.float32,initializer= tf.constant_initializer(0.1))pre_activation = tf.nn.bias_add(conv,biases)conv3 = tf.nn.relu(pre_activation,name=scope.name)#conv4with tf.variable_scope('conv4') as scope:weights = tf.get_variable('weights',shape=[3,3,64,64],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.1,dtype=tf.float32))conv =tf.nn.conv2d(conv3,weights,strides=[1,1,1,1],padding='VALID')biases = tf.get_variable('biases',shape=[64],dtype=tf.float32,initializer=tf.constant_initializer(0.1))pre_activation = tf.nn.bias_add(conv,biases)conv4 = tf.nn.relu(pre_activation,name=scope.name)with tf.variable_scope('max_pooling2') as scope:pool2 = tf.nn.max_pool(conv4,ksize=[1,2,2,1],strides=[1,2,2,1],padding='VALID',name='pooling2')#conv5with tf.variable_scope('conv5') as scope:weights = tf.get_variable('weights',shape=[3,3,64,128],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.1,dtype=tf.float32))conv =tf.nn.conv2d(pool2,weights,strides=[1,1,1,1],padding='VALID')biases = tf.get_variable('biases',shape=[128],dtype=tf.float32,initializer=tf.constant_initializer(0.1))pre_activation = tf.nn.bias_add(conv,biases)conv5 = tf.nn.relu(pre_activation,name=scope.name)#conv6with tf.variable_scope('conv6') as scope:weights = tf.get_variable('weights',shape=[3,3,128,128],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.1,dtype=tf.float32))conv =tf.nn.conv2d(conv5,weights,strides=[1,1,1,1],padding='VALID')biases = tf.get_variable('biases',shape=[128],dtype=tf.float32,initializer=tf.constant_initializer(0.1))pre_activation = tf.nn.bias_add(conv,biases)conv6 = tf.nn.relu(pre_activation,name=scope.name)#pool3with tf.variable_scope('max_pool3') as scope:pool3 = tf.nn.max_pool(conv6,ksize=[1,2,2,1],strides=[1,2,2,1],padding='VALID',name='pool3')#%%#fc1_flattenwith tf.variable_scope('fc1') as scope:shp = pool3.get_shape()flattened_shape =shp[1].value*shp[2].value*shp[3].valuereshape = tf.reshape(pool3,[-1,flattened_shape])fc1 = tf.nn.dropout(reshape,keep_prob,name='fc1_dropdot')# 全連接層# 第一個全連接層,識別車牌上的省with tf.variable_scope('fc21') as scope:weights = tf.get_variable('weights',shape=[flattened_shape,65],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))biases = tf.get_variable('biases',shape=[65],dtype=tf.float32,initializer = tf.truncated_normal_initializer(0.1))fc21 = tf.matmul(fc1,weights)+biases# 第二個全連接層,識別車牌上的市with tf.variable_scope('fc22') as scope:weights = tf.get_variable('weights',shape=[flattened_shape,65],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))biases = tf.get_variable('biases',shape=[65],dtype=tf.float32,initializer = tf.truncated_normal_initializer(0.1))fc22 = tf.matmul(fc1,weights)+biases# 第三個全連接層,識別車牌第三位的字母或者數字with tf.variable_scope('fc23') as scope:weights = tf.get_variable('weights',shape=[flattened_shape,65],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))biases = tf.get_variable('biases',shape=[65],dtype=tf.float32,initializer = tf.truncated_normal_initializer(0.1))fc23= tf.matmul(fc1,weights)+biaseswith tf.variable_scope('fc24') as scope:weights = tf.get_variable('weights',shape=[flattened_shape,65],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))biases = tf.get_variable('biases',shape=[65],dtype=tf.float32,initializer = tf.truncated_normal_initializer(0.1))fc24 = tf.matmul(fc1,weights)+biaseswith tf.variable_scope('fc25') as scope:weights = tf.get_variable('weights',shape=[flattened_shape,65],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))biases = tf.get_variable('biases',shape=[65],dtype=tf.float32,initializer = tf.truncated_normal_initializer(0.1))fc25 = tf.matmul(fc1,weights)+biaseswith tf.variable_scope('fc26') as scope:weights = tf.get_variable('weights',shape=[flattened_shape,65],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))biases = tf.get_variable('biases',shape=[65],dtype=tf.float32,initializer = tf.truncated_normal_initializer(0.1))fc26 = tf.matmul(fc1,weights)+biaseswith tf.variable_scope('fc27') as scope:weights = tf.get_variable('weights',shape=[flattened_shape,65],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))biases = tf.get_variable('biases',shape=[65],dtype=tf.float32,initializer = tf.truncated_normal_initializer(0.1))fc27 = tf.matmul(fc1,weights)+biasesreturn fc21,fc22,fc23,fc24,fc25,fc26,fc27 #shape = [7,batch_size,65]# 卷積神經網絡返回的輸出層,進行交叉熵計算
def losses(logits1,logits2,logits3,logits4,logits5,logits6,logits7,labels):'''Compute loss from logits and labelsArgs:logits: logits tensor, float, [7*batch_size, 65]labels: label tensor, tf.int32, [7*batch_size]Returns:loss tensor of float type'''labels = tf.convert_to_tensor(labels,tf.int32)with tf.variable_scope('loss1') as scope:cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits1, labels=labels[:,0], name='xentropy_per_example')#cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=labels,name='xentropy_per_example')loss1 = tf.reduce_mean(cross_entropy, name='loss1')tf.summary.scalar(scope.name+'/loss1', loss1)with tf.variable_scope('loss2') as scope:cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits2, labels=labels[:,1], name='xentropy_per_example')#cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=labels,name='xentropy_per_example')loss2 = tf.reduce_mean(cross_entropy, name='loss2')tf.summary.scalar(scope.name+'/loss2', loss2)with tf.variable_scope('loss3') as scope:cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits3, labels=labels[:,2], name='xentropy_per_example')#cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=labels,name='xentropy_per_example')loss3 = tf.reduce_mean(cross_entropy, name='loss3')tf.summary.scalar(scope.name+'/loss3', loss3)with tf.variable_scope('loss4') as scope:cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits4, labels=labels[:,3], name='xentropy_per_example')#cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=labels,name='xentropy_per_example')loss4 = tf.reduce_mean(cross_entropy, name='loss4')tf.summary.scalar(scope.name+'/loss4', loss4)with tf.variable_scope('loss5') as scope:cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits5, labels=labels[:,4], name='xentropy_per_example')#cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=labels,name='xentropy_per_example')loss5 = tf.reduce_mean(cross_entropy, name='loss5')tf.summary.scalar(scope.name+'/loss5', loss5)with tf.variable_scope('loss6') as scope:cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits6, labels=labels[:,5], name='xentropy_per_example')#cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=labels,name='xentropy_per_example')loss6 = tf.reduce_mean(cross_entropy, name='loss6')tf.summary.scalar(scope.name+'/loss6', loss6)with tf.variable_scope('loss7') as scope:cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits7, labels=labels[:,6], name='xentropy_per_example')#cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=labels,name='xentropy_per_example')loss7 = tf.reduce_mean(cross_entropy, name='loss7')tf.summary.scalar(scope.name+'/loss7', loss7)return loss1,loss2,loss3,loss4,loss5,loss6,loss7# 最優化,自適應梯度下降,進行優化
def trainning( loss1,loss2,loss3,loss4,loss5,loss6,loss7, learning_rate):'''Training ops, the Op returned by this function is what must be passed to'sess.run()' call to cause the model to train.Args:loss: loss tensor, from losses()Returns:train_op: The op for trainning'''with tf.name_scope('optimizer1'):optimizer1 = tf.train.AdamOptimizer(learning_rate= learning_rate)global_step = tf.Variable(0, name='global_step', trainable=False)train_op1 = optimizer1.minimize(loss1, global_step= global_step)with tf.name_scope('optimizer2'):optimizer2 = tf.train.AdamOptimizer(learning_rate= learning_rate)global_step = tf.Variable(0, name='global_step', trainable=False)train_op2 = optimizer2.minimize(loss2, global_step= global_step)with tf.name_scope('optimizer3'):optimizer3 = tf.train.AdamOptimizer(learning_rate= learning_rate)global_step = tf.Variable(0, name='global_step', trainable=False)train_op3 = optimizer3.minimize(loss3, global_step= global_step)with tf.name_scope('optimizer4'):optimizer4 = tf.train.AdamOptimizer(learning_rate= learning_rate)global_step = tf.Variable(0, name='global_step', trainable=False)train_op4 = optimizer4.minimize(loss4, global_step= global_step)with tf.name_scope('optimizer5'):optimizer5 = tf.train.AdamOptimizer(learning_rate= learning_rate)global_step = tf.Variable(0, name='global_step', trainable=False)train_op5 = optimizer5.minimize(loss5, global_step= global_step)with tf.name_scope('optimizer6'):optimizer6 = tf.train.AdamOptimizer(learning_rate= learning_rate)global_step = tf.Variable(0, name='global_step', trainable=False)train_op6 = optimizer6.minimize(loss6, global_step= global_step)with tf.name_scope('optimizer7'):optimizer7 = tf.train.AdamOptimizer(learning_rate= learning_rate)global_step = tf.Variable(0, name='global_step', trainable=False)train_op7 = optimizer7.minimize(loss7, global_step= global_step)return train_op1,train_op2,train_op3,train_op4,train_op5,train_op6,train_op7# 對模型評估
def evaluation(logits1,logits2,logits3,logits4,logits5,logits6,logits7,labels):"""Evaluate the quality of the logits at predicting the label.Args:logits: Logits tensor, float - [batch_size, NUM_CLASSES].labels: Labels tensor, int32 - [batch_size], with values in therange [0, NUM_CLASSES).Returns:A scalar int32 tensor with the number of examples (out of batch_size)that were predicted correctly."""logits_all = tf.concat([logits1,logits2,logits3,logits4,logits5,logits6,logits7],0)labels = tf.convert_to_tensor(labels,tf.int32)labels_all = tf.reshape(tf.transpose(labels),[-1])with tf.variable_scope('accuracy') as scope:correct = tf.nn.in_top_k(logits_all, labels_all, 1)correct = tf.cast(correct, tf.float16)accuracy = tf.reduce_mean(correct)tf.summary.scalar(scope.name+'/accuracy', accuracy)return accuracy
訓練模型:train.py
import os
import numpy as np
import tensorflow as tf
from input_data import OCRIter
import model
import time
import datetime# train訓練
img_w = 272
img_h = 72
num_label=7
batch_size = 8
# count =30000
count = 500000
learning_rate = 0.0001#默認參數[N,H,W,C]
image_holder = tf.placeholder(tf.float32,[batch_size,img_h,img_w,3])
label_holder = tf.placeholder(tf.int32,[batch_size,7])
keep_prob = tf.placeholder(tf.float32)logs_train_dir = r'H:\GPpython\第三階段數據分析\神經網絡\Licence_plate_recognize\Licence_plate_recognize\train_result'def get_batch():data_batch = OCRIter(batch_size,img_h,img_w)image_batch,label_batch = data_batch.iter()image_batch1 = np.array(image_batch)label_batch1 = np.array(label_batch)return image_batch1,label_batch1def fit():train_logits1, train_logits2, train_logits3, train_logits4, train_logits5, train_logits6, train_logits7 = model.inference(image_holder, keep_prob)train_loss1, train_loss2, train_loss3, train_loss4, train_loss5, train_loss6, train_loss7 = model.losses(train_logits1, train_logits2, train_logits3, train_logits4, train_logits5, train_logits6, train_logits7,label_holder)train_op1, train_op2, train_op3, train_op4, train_op5, train_op6, train_op7 = model.trainning(train_loss1,train_loss2,train_loss3,train_loss4,train_loss5,train_loss6,train_loss7,learning_rate)train_acc = model.evaluation(train_logits1, train_logits2, train_logits3, train_logits4, train_logits5,train_logits6, train_logits7, label_holder)input_image = tf.summary.image('input', image_holder)summary_op = tf.summary.merge(tf.get_collection(tf.GraphKeys.SUMMARIES))sess = tf.Session()train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph)saver = tf.train.Saver()sess.run(tf.global_variables_initializer())start_time1 = time.time()for step in range(count):x_batch, y_batch = get_batch()start_time2 = time.time()time_str = datetime.datetime.now().isoformat()feed_dict = {image_holder: x_batch, label_holder: y_batch, keep_prob: 0.5}_, _, _, _, _, _, _, tra_loss1, tra_loss2, tra_loss3, tra_loss4, tra_loss5, tra_loss6, tra_loss7, acc, summary_str = sess.run([train_op1, train_op2, train_op3, train_op4, train_op5, train_op6, train_op7, train_loss1, train_loss2,train_loss3, train_loss4, train_loss5, train_loss6, train_loss7, train_acc, summary_op], feed_dict)train_writer.add_summary(summary_str, step)duration = time.time() - start_time2tra_all_loss = tra_loss1 + tra_loss2 + tra_loss3 + tra_loss4 + tra_loss5 + tra_loss6 + tra_loss7# print(y_batch) #僅測試代碼訓練實際樣本與標簽是否一致if step % 10 == 0:sec_per_batch = float(duration)print('%s : Step %d,train_loss = %.2f,acc= %.2f,sec/batch=%.3f' % (time_str, step, tra_all_loss, acc, sec_per_batch))if step % 10000 == 0 or (step + 1) == count:checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt')saver.save(sess, checkpoint_path, global_step=step)sess.close()print(time.time() - start_time1)
if __name__ == '__main__':# fit()data,labels = get_batch()print(data.shape)print(labels.shape)print(labels[0])
除了輸出層使用ReLU激活函數之外,所有層都采用深度神經網絡的標準結構。指示存在的節點使用sigmoid激活函數,典型地用于二值輸出。其他輸出節點使用softmax貫穿字符(結果是每一列的概率之和為1),是模型化離散概率分布的標準方法。
根據標簽和網絡輸出的交叉熵來定義損失函數。為了數值穩定性,利用softmax_cross_entropy_with_logits和sigmoid_cross_entropy_with_logits將最后一層的激活函數卷入交叉熵的計算。關于對交叉熵詳細而直觀的介紹可以參考Michael A. Nielsen的free online book中查看這一節。
使用一塊nVidia GTX 960m花費大約數小時來訓練(train.py),通過CPU的一個后臺進程來運行訓練數據的生成。
輸出處理
?
?
import tensorflow as tf
import numpy as np
import os
from PIL import Image
import cv2
import matplotlib.pyplot as plt
import model
import genplate
index = {"京": 0, "滬": 1, "津": 2, "渝": 3, "冀": 4, "晉": 5, "蒙": 6, "遼": 7, "吉": 8, "黑": 9, "蘇": 10, "浙": 11, "皖": 12,"閩": 13, "贛": 14, "魯": 15, "豫": 16, "鄂": 17, "湘": 18, "粵": 19, "桂": 20, "瓊": 21, "川": 22, "貴": 23, "云": 24,"藏": 25, "陜": 26, "甘": 27, "青": 28, "寧": 29, "新": 30, "0": 31, "1": 32, "2": 33, "3": 34, "4": 35, "5": 36,"6": 37, "7": 38, "8": 39, "9": 40, "A": 41, "B": 42, "C": 43, "D": 44, "E": 45, "F": 46, "G": 47, "H": 48,"J": 49, "K": 50, "L": 51, "M": 52, "N": 53, "P": 54, "Q": 55, "R": 56, "S": 57, "T": 58, "U": 59, "V": 60,"W": 61, "X": 62, "Y": 63, "Z": 64};chars = ["京", "滬", "津", "渝", "冀", "晉", "蒙", "遼", "吉", "黑", "蘇", "浙", "皖", "閩", "贛", "魯", "豫", "鄂", "湘", "粵", "桂","瓊", "川", "貴", "云", "藏", "陜", "甘", "青", "寧", "新", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A","B", "C", "D", "E", "F", "G", "H", "J", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "U", "V", "W", "X","Y", "Z"];
'''
Test one image against the saved models and parameters
'''
global pic
def get_one_image(test):'''Randomly pick one image from training dataReturn: ndarry'''G = genplate.GenPlate("./font/platech.ttf", './font/platechar.ttf', "./NoPlates")G.genBatch(15, 2, range(31, 65), "./plate", (272, 72)) # 注釋原因為每次其他模塊運行,若導入該庫,都會刷性該函數n = len(test)ind =np.random.randint(0,n)img_dir = test[ind]image_show = Image.open(img_dir)plt.imshow(image_show)#image = image.resize([120,30])image = cv2.imread(img_dir)global picpic = image# cv2.imshow('image', image)# cv2.waitKey(0)img = np.multiply(image,1/255.0)#image = np.array(img)#image = img.transpose(1,0,2)image = np.array([img])print(image.shape)return imagebatch_size = 1
x = tf.placeholder(tf.float32,[batch_size,72,272,3])
keep_prob =tf.placeholder(tf.float32)test_dir = r'H:/GPpython/Licence_plate_recognize/plate/'
test_image = []
for file in os.listdir(test_dir):test_image.append(test_dir + file)
test_image = list(test_image)image_array = get_one_image(test_image)#logit = model.inference(x,keep_prob)
logit1,logit2,logit3,logit4,logit5,logit6,logit7 = model.inference(x,keep_prob)#logit1 = tf.nn.softmax(logit1)
#logit2 = tf.nn.softmax(logit2)
#logit3 = tf.nn.softmax(logit3)
#logit4 = tf.nn.softmax(logit4)
#logit5 = tf.nn.softmax(logit5)
#logit6 = tf.nn.softmax(logit6)
#logit7 = tf.nn.softmax(logit7)logs_train_dir = r'H:/GPpython/Licence_plate_recognize/train_result/'
saver = tf.train.Saver()with tf.Session() as sess:print ("Reading checkpoint...")ckpt = tf.train.get_checkpoint_state(logs_train_dir)if ckpt and ckpt.model_checkpoint_path:global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]saver.restore(sess, ckpt.model_checkpoint_path)print('Loading success, global_step is %s' % global_step)else:print('No checkpoint file found')pre1,pre2,pre3,pre4,pre5,pre6,pre7 = sess.run([logit1,logit2,logit3,logit4,logit5,logit6,logit7], feed_dict={x: image_array,keep_prob:1.0})prediction = np.reshape(np.array([pre1,pre2,pre3,pre4,pre5,pre6,pre7]),[-1,65])#prediction = np.array([[pre1],[pre2],[pre3],[pre4],[pre5],[pre6],[pre7]])#print(prediction)max_index = np.argmax(prediction,axis=1)print(max_index)line = ''for i in range(prediction.shape[0]):if i == 0:result = np.argmax(prediction[i][0:31])if i == 1:result = np.argmax(prediction[i][41:65])+41if i > 1:result = np.argmax(prediction[i][31:65])+31line += chars[result]+" "print ('predicted: ' + line)cv2.imshow('pic',pic)
cv2.waitKeyEx(0)
總結
我已經開源了一個擁有相對較短代碼系統,它不用導入任何特定領域的庫以及不需要太多特定領域的知識,就能夠實現車牌號自動識別。此外,我還通過在線合成圖片的方法解決了上千張訓練圖片的需求問題(通常是在深度神經網絡的情況下)。
另一方面,我的系統也存在一些缺點:
- 只適用于特定車牌號。尤其是,網絡結構明確假定了輸出只有7個字符。
- 只適用于特定字體。
- 速度太慢。該系統運行一張適當尺寸的圖片要花費幾秒鐘。
為了解決第1個問題,谷歌團隊將他們的網絡結構的高層拆分成了多個子網絡,每一個子網絡用于假定輸出號碼中的不同號碼位。還有一個并行的子網絡來決定存在多少號碼。我覺得這種方法可以應用到這兒,但是我沒有在這個項目中實現。
關于第2點我在上面舉過例子,由于字體的稍微不同只能使用于國內車牌。如果嘗試著檢測US車牌號的話,誤檢將會更加嚴重,因為US車牌號字體類型更多。一個可能的解決方案就是使得訓練數據有更多不同的字體類型可選擇,盡管還不清楚需要多少字體類型才能成功。
第3點提到的速度慢的問題是扼殺許多應用的cancer:在一個相當強大的GPU上處理一張適當尺寸的輸入圖片就要花費幾秒鐘。我認為不引進一種級聯式結構的檢測網絡就想避開這個問題是不太可能的,比如Haar級聯,HOG檢測器,或者一個更簡單的神經網絡。
我很有興趣去嘗試和其他機器學習方法的比較會怎樣,特別是姿態回歸看起來有希望,最后可能會附加一個最基本的分類階段。如果使用了像scikit-learn這樣的機器學習庫,那么應該同樣簡單。
?