深度學習項目入門——讓你更接近數據科學的夢想
表情符號或頭像是表示非語言暗示的方式。這些暗示已成為在線聊天、產品評論、品牌情感等的重要組成部分。這也促使數據科學領域越來越多的研究致力于表情驅動的故事講述。
隨著計算機視覺和深度學習的進步,現在可以從圖像中檢測人類情感。在這個深度學習項目中,我們將對人類面部表情進行分類,并過濾和映射相應的表情符號或頭像。
關于數據集
面部表情識別數據集(FER2013)包含48*48像素的灰度人臉圖像。這些圖像中心對齊,并且占據相同的空間。該數據集包含以下類別的面部情緒:
- 0: 憤怒
- 1: 厭惡
- 2: 恐懼
- 3: 快樂
- 4: 悲傷
- 5: 驚訝
- 6: 自然
下載數據集: 面部表情識別數據集
下載項目代碼: 表情生成器項目源代碼
使用深度學習創建你的表情
我們將構建一個深度學習模型來分類圖像中的面部表情,然后將分類的情緒映射到表情符號或頭像。
使用CNN進行面部情緒識別
在以下步驟中,我們將構建一個卷積神經網絡架構,并在FER2013數據集上訓練模型,以從圖像中識別情緒。
-
導入庫:
import numpy as np import cv2 from keras.models import Sequential from keras.layers import Dense, Dropout, Flatten from keras.layers import Conv2D from keras.optimizers import Adam from keras.layers import MaxPooling2D from keras.preprocessing.image import ImageDataGenerator
-
初始化訓練和驗證生成器:
train_dir = 'data/train' val_dir = 'data/test' train_datagen = ImageDataGenerator(rescale=1./255) val_datagen = ImageDataGenerator(rescale=1./255) train_generator = train_datagen.flow_from_directory(train_dir,target_size=(48,48),batch_size=64,color_mode="grayscale",class_mode='categorical') validation_generator = val_datagen.flow_from_directory(val_dir,target_size=(48,48),batch_size=64,color_mode="grayscale",class_mode='categorical')
-
構建卷積網絡架構:
emotion_model = Sequential() emotion_model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(48,48,1))) emotion_model.add(Conv2D(64, kernel_size=(3, 3), activation='relu')) emotion_model.add(MaxPooling2D(pool_size=(2, 2))) emotion_model.add(Dropout(0.25)) emotion_model.add(Conv2D(128, kernel_size=(3, 3), activation='relu')) emotion_model.add(MaxPooling2D(pool_size=(2, 2))) emotion_model.add(Conv2D(128, kernel_size=(3, 3), activation='relu')) emotion_model.add(MaxPooling2D(pool_size=(2, 2))) emotion_model.add(Dropout(0.25)) emotion_model.add(Flatten()) emotion_model.add(Dense(1024, activation='relu')) emotion_model.add(Dropout(0.5)) emotion_model.add(Dense(7, activation='softmax'))
-
編譯和訓練模型:
emotion_model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.0001, decay=1e-6), metrics=['accuracy']) emotion_model_info = emotion_model.fit_generator(train_generator,steps_per_epoch=28709 // 64,epochs=50,validation_data=validation_generator,validation_steps=7178 // 64)
-
保存模型權重:
emotion_model.save_weights('model.h5')
-
使用OpenCV的haarcascade xml檢測網絡攝像頭中人臉的邊界框,并預測情緒:
cv2.ocl.setUseOpenCL(False) emotion_dict = {0: "Angry", 1: "Disgusted", 2: "Fearful", 3: "Happy", 4: "Neutral", 5: "Sad", 6: "Surprised"} cap = cv2.VideoCapture(0) while True:ret, frame = cap.read()if not ret:breakbounding_box = cv2.CascadeClassifier('/home/shivam/.local/lib/python3.6/site-packages/cv2/data/haarcascade_frontalface_default.xml')gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)num_faces = bounding_box.detectMultiScale(gray_frame, scaleFactor=1.3, minNeighbors=5)for (x, y, w, h) in num_faces:cv2.rectangle(frame, (x, y-50), (x+w, y+h+10), (255, 0, 0), 2)roi_gray_frame = gray_frame[y:y + h, x:x + w]cropped_img = np.expand_dims(np.expand_dims(cv2.resize(roi_gray_frame, (48, 48)), -1), 0)emotion_prediction = emotion_model.predict(cropped_img)maxindex = int(np.argmax(emotion_prediction))cv2.putText(frame, emotion_dict[maxindex], (x+20, y-60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)cv2.imshow('Video', cv2.resize(frame, (1200,860), interpolation=cv2.INTER_CUBIC))if cv2.waitKey(1) & 0xFF == ord('q'):break cap.release() cv2.destroyAllWindows()
GUI和表情符號映射的代碼
創建一個名為emojis的文件夾,并保存數據集中每種情緒對應的表情符號。
將以下代碼粘貼到gui.py中并運行文件。
import tkinter as tk
from tkinter import *
import cv2
from PIL import Image, ImageTk
import os
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D
from keras.optimizers import Adam
from keras.layers import MaxPooling2Demotion_model = Sequential()
emotion_model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(48,48,1)))
emotion_model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
emotion_model.add(MaxPooling2D(pool_size=(2, 2)))
emotion_model.add(Dropout(0.25))
emotion_model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
emotion_model.add(MaxPooling2D(pool_size=(2, 2)))
emotion_model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
emotion_model.add(MaxPooling2D(pool_size=(2, 2)))
emotion_model.add(Dropout(0.25))
emotion_model.add(Flatten())
emotion_model.add(Dense(1024, activation='relu'))
emotion_model.add(Dropout(0.5))
emotion_model.add(Dense(7, activation='softmax'))
emotion_model.load_weights('model.h5')cv2.ocl.setUseOpenCL(False)
emotion_dict = {0: " Angry ", 1: "Disgusted", 2: " Fearful ", 3: " Happy ", 4: " Neutral ", 5: " Sad ", 6: "Surprised"}
emoji_dist={0:"./emojis/angry.png", 1:"./emojis/disgusted.png", 2:"./emojis/fearful.png", 3:"./emojis/happy.png", 4:"./emojis/neutral.png", 5:"./emojis/sad.png", 6:"./emojis/surprised.png"}global last_frame1
last_frame1 = np.zeros((480, 640, 3), dtype=np.uint8)
global cap1
show_text=[0]def show_vid():cap1 = cv2.VideoCapture(0)if not cap1.isOpened():print("cant open the camera1")flag1, frame1 = cap1.read()frame1 = cv2.resize(frame1, (600,500))bounding_box = cv2.CascadeClassifier('/home/shivam/.local/lib/python3.6/site-packages/cv2/data/haarcascade_frontalface_default.xml')gray_frame = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)num_faces = bounding_box.detectMultiScale(gray_frame, scaleFactor=1.3, minNeighbors=5)for (x, y, w, h) in num_faces:cv2.rectangle(frame1, (x, y-50), (x+w, y+h+10), (255, 0, 0), 2)roi_gray_frame = gray_frame[y:y + h, x:x + w]cropped_img = np.expand_dims(np.expand_dims(cv2.resize(roi_gray_frame, (48, 48)), -1), 0)prediction = emotion_model.predict(cropped_img)maxindex = int(np.argmax(prediction))cv2.putText(frame1, emotion_dict[maxindex], (x+20, y-60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)show_text[0]=maxindexif flag1 is None:print("Major error!")elif flag1:global last_frame1last_frame1 = frame1.copy()pic = cv2.cvtColor(last_frame1, cv2.COLOR_BGR2RGB)img = Image.fromarray(pic)imgtk = ImageTk.PhotoImage(image=img)lmain.imgtk = imgtklmain.configure(image=imgtk)lmain.after(10, show_vid)if cv2.waitKey(1) & 0xFF == ord('q'):exit()def show_vid2():frame2 = cv2.imread(emoji_dist[show_text[0]])pic2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2RGB)img2 = Image.fromarray(pic2)imgtk2 = ImageTk.PhotoImage(image=img2)lmain2.imgtk2 = imgtk2lmain3.configure(text=emotion_dict[show_text[0]], font=('arial',45,'bold'))lmain2.configure(image=imgtk2)lmain2.after(10, show_vid2)if __name__ == '__main__':root = tk.Tk()img = ImageTk.PhotoImage(Image.open("logo.png"))heading = Label(root, image=img, bg='black')heading.pack()heading2 = Label(root, text="Photo to Emoji", pady=20, font=('arial',45,'bold'), bg='black', fg='#CDCDCD')heading2.pack()lmain = tk.Label(master=root, padx=50, bd=10)lmain2 = tk.Label(master=root, bd=10)lmain3 = tk.Label(master=root, bd=10, fg="#CDCDCD", bg='black')lmain.pack(side=LEFT)lmain.place(x=50,y=250)lmain3.pack()lmain3.place(x=960,y=250)lmain2.pack(side=RIGHT)lmain2.place(x=900,y=350)root.title("Photo To Emoji")root.geometry("1400x900+100+10")root['bg']='black'exitbutton = Button(root, text='Quit', fg="red", command=root.destroy, font=('arial',25,'bold')).pack(side = BOTTOM)show_vid()show_vid2()root.mainloop()
總結
在這個面向初學者的深度學習項目中,我們構建了一個卷積神經網絡來識別面部情緒。我們在FER2013數據集上訓練了我們的模型,然后將這些情緒映射到相應的情緒符號或頭像。
使用OpenCV的haar cascade xml,我們獲得了網絡攝像頭中人臉的邊界框,然后將這些框輸入到訓練好的模型中進行分類。
DataFlair致力于為用戶提供成為數據科學家所需的所有資源,包括詳細的教程、實踐、用例以及項目源代碼。
參考資料
參考資料名稱 | 鏈接 |
---|---|
FER2013數據集 | Kaggle FER2013 Dataset |
Keras官方文檔 | Keras Official Documentation |
OpenCV官方文檔 | OpenCV Official Documentation |
TensorFlow官方文檔 | TensorFlow Official Documentation |
數據科學教程 | DataFlair Data Science Tutorials |
深度學習項目 | DataFlair Deep Learning Projects |
圖像分類教程 | Image Classification Tutorial |
CNN架構詳解 | CNN Architecture Explained |
數據增強技術 | Data Augmentation Techniques |
GUI編程指南 | GUI Programming Guide |
表情符號使用指南 | Emoji Usage Guide |
情感分析研究 | Emotion Recognition Research |
深度學習框架比較 | Deep Learning Framework Comparison |
計算機視覺應用 | Computer Vision Applications |
請注意,這些鏈接可能會隨著時間的推移而更改,建議訪問時檢查其可用性。