在計算機視覺與人機交互領域,手勢識別是一個非常有趣的應用場景。本文將帶你用 Mediapipe 和 Python 實現一個基于攝像頭的手勢識別“剪刀石頭布”小游戲,并展示實時手勢與游戲結果。
1. 項目概述
該小游戲能夠實現:
-
實時檢測手勢,包括 石頭(Rock)、剪刀(Scissors)、布(Paper) 和 OK手勢。
-
玩家展示 OK 手勢時重置游戲。
-
當玩家展示 R/P/S 手勢時,與電腦隨機出拳進行比對,顯示勝負結果。
-
在攝像頭畫面上繪制手部關鍵點和手勢文字,同時在屏幕左上角顯示電腦出拳和本輪結果。
整個項目的核心依賴:
-
mediapipe
:用于手部關鍵點檢測。 -
opencv-python
:用于攝像頭讀取與圖像顯示。 -
numpy
:用于手勢判斷的數學計算。 -
random
:模擬電腦出拳。
2. 核心類:SimpleHandGestureGame
class SimpleHandGestureGame:def __init__(self, model_path="hand_landmarker.task", num_hands=1):# 初始化 Mediapipe HandLandmarkerbase_options = python.BaseOptions(model_asset_path=model_path)options = vision.HandLandmarkerOptions(base_options=base_options, num_hands=num_hands)self.detector = vision.HandLandmarker.create_from_options(options)self.computer_choice = Noneself.round_result = ""self.round_played = False
2.1 手勢繪制 _draw_landmarks
def _draw_landmarks(self, rgb_image, detection_result):annotated_image = np.copy(rgb_image)if detection_result.hand_landmarks:for hand_landmarks in detection_result.hand_landmarks:proto_landmarks = landmark_pb2.NormalizedLandmarkList()proto_landmarks.landmark.extend([landmark_pb2.NormalizedLandmark(x=lm.x, y=lm.y, z=lm.z) for lm in hand_landmarks])solutions.drawing_utils.draw_landmarks(image=annotated_image,landmark_list=proto_landmarks,connections=mp.solutions.hands.HAND_CONNECTIONS,landmark_drawing_spec=solutions.drawing_styles.get_default_hand_landmarks_style(),connection_drawing_spec=solutions.drawing_styles.get_default_hand_connections_style())return annotated_image
2.2 手勢識別 _judge_gesture
我們通過手指關鍵點的伸直狀態判斷手勢:
-
石頭(Rock):全部手指彎曲
-
剪刀(Scissors):食指和中指伸直,其他彎曲
-
布(Paper):五指全部伸直
-
OK:拇指與食指形成圓圈,其余三指伸直
def _judge_gesture(self, hand_landmarks):def is_straight(tip, pip, mcp=None):if mcp:a, b, c = np.array([tip.x, tip.y]), np.array([pip.x, pip.y]), np.array([mcp.x, mcp.y])ba, bc = a - b, c - bcos_angle = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc) + 1e-6)return np.arccos(np.clip(cos_angle, -1, 1)) * 180 / np.pi > 160else:return tip.y < pip.ythumb_straight = is_straight(hand_landmarks[4], hand_landmarks[2], hand_landmarks[1])index_straight = is_straight(hand_landmarks[8], hand_landmarks[6])middle_straight = is_straight(hand_landmarks[12], hand_landmarks[10])ring_straight = is_straight(hand_landmarks[16], hand_landmarks[14])pinky_straight = is_straight(hand_landmarks[20], hand_landmarks[18])total = sum([thumb_straight, index_straight, middle_straight, ring_straight, pinky_straight])# OK gesturethumb_tip, index_tip = np.array([hand_landmarks[4].x, hand_landmarks[4].y]), np.array([hand_landmarks[8].x, hand_landmarks[8].y])if np.linalg.norm(thumb_tip - index_tip) < 0.05 and middle_straight and ring_straight and pinky_straight:return "OK"if total == 0: return "Rock"if total == 2 and index_straight and middle_straight: return "Scissors"if total == 5: return "Paper"return "Undefined"
2.3 游戲邏輯 _play_game
def _play_game(self, player_choice):choices = ["Rock", "Scissors", "Paper"]if self.computer_choice is None:self.computer_choice = random.choice(choices)if player_choice == self.computer_choice:self.round_result = "Draw"elif (player_choice == "Rock" and self.computer_choice == "Scissors") or \(player_choice == "Scissors" and self.computer_choice == "Paper") or \(player_choice == "Paper" and self.computer_choice == "Rock"):self.round_result = "You Win"else:self.round_result = "Computer Wins"self.round_played = True
2.4 圖像處理 do
最終的 do
方法負責:
-
讀取攝像頭幀
-
調用 Mediapipe 檢測手勢
-
繪制手部關鍵點和手勢文字
-
顯示電腦出拳及勝負結果
def do(self, frame, device=None):if frame is None: return Nonemp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))detection_result = self.detector.detect(mp_image)annotated = self._draw_landmarks(mp_image.numpy_view(), detection_result)# ...繪制手勢文字和游戲結果...return cv2.cvtColor(annotated, cv2.COLOR_RGB2BGR)
3. 快速體驗
import cv2
import numpy as np
import mediapipe as mp
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
import randomclass SimpleHandGestureGame:def __init__(self, model_path="文件地址/hand_landmarker.task", num_hands=1):"""Initialize Mediapipe HandLandmarker and game state"""base_options = python.BaseOptions(model_asset_path=model_path)options = vision.HandLandmarkerOptions(base_options=base_options, num_hands=num_hands)self.detector = vision.HandLandmarker.create_from_options(options)self.computer_choice = Noneself.round_result = ""self.round_played = Falsedef _draw_landmarks(self, rgb_image, detection_result):annotated_image = np.copy(rgb_image)if detection_result.hand_landmarks:for hand_landmarks in detection_result.hand_landmarks:proto_landmarks = landmark_pb2.NormalizedLandmarkList()proto_landmarks.landmark.extend([landmark_pb2.NormalizedLandmark(x=lm.x, y=lm.y, z=lm.z) for lm in hand_landmarks])solutions.drawing_utils.draw_landmarks(image=annotated_image,landmark_list=proto_landmarks,connections=mp.solutions.hands.HAND_CONNECTIONS,landmark_drawing_spec=solutions.drawing_styles.get_default_hand_landmarks_style(),connection_drawing_spec=solutions.drawing_styles.get_default_hand_connections_style())return annotated_imagedef _judge_gesture(self, hand_landmarks):"""Determine hand gesture: Rock-Paper-Scissors + OK"""def is_straight(tip, pip, mcp=None):if mcp:a, b, c = np.array([tip.x, tip.y]), np.array([pip.x, pip.y]), np.array([mcp.x, mcp.y])ba, bc = a - b, c - bcos_angle = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc) + 1e-6)return np.arccos(np.clip(cos_angle, -1, 1)) * 180 / np.pi > 160else:return tip.y < pip.ythumb_straight = is_straight(hand_landmarks[4], hand_landmarks[2], hand_landmarks[1])index_straight = is_straight(hand_landmarks[8], hand_landmarks[6])middle_straight = is_straight(hand_landmarks[12], hand_landmarks[10])ring_straight = is_straight(hand_landmarks[16], hand_landmarks[14])pinky_straight = is_straight(hand_landmarks[20], hand_landmarks[18])thumb, index, middle, ring, pinky = thumb_straight, index_straight, middle_straight, ring_straight, pinky_straighttotal = sum([thumb, index, middle, ring, pinky])# OK gesturethumb_tip, index_tip = np.array([hand_landmarks[4].x, hand_landmarks[4].y]), np.array([hand_landmarks[8].x, hand_landmarks[8].y])if np.linalg.norm(thumb_tip - index_tip) < 0.05 and middle and ring and pinky:return "OK"# Rock-Paper-Scissorsif total == 0:return "Rock"if total == 2 and index and middle:return "Scissors"if total == 5:return "Paper"return "Undefined"def _play_game(self, player_choice):"""Determine the result of Rock-Paper-Scissors round"""choices = ["Rock", "Scissors", "Paper"]if self.computer_choice is None:self.computer_choice = random.choice(choices)if player_choice == self.computer_choice:self.round_result = "Draw"elif (player_choice == "Rock" and self.computer_choice == "Scissors") or \(player_choice == "Scissors" and self.computer_choice == "Paper") or \(player_choice == "Paper" and self.computer_choice == "Rock"):self.round_result = "You Win"else:self.round_result = "Computer Wins"self.round_played = Truedef do(self, frame, device=None):"""Process a single frame, overlay hand gesture and game result (vertically)"""if frame is None:return Nonemp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))detection_result = self.detector.detect(mp_image)annotated = self._draw_landmarks(mp_image.numpy_view(), detection_result)gesture_display = ""if detection_result.hand_landmarks:for hand_landmarks in detection_result.hand_landmarks:gesture = self._judge_gesture(hand_landmarks)if gesture == "OK":self.computer_choice = random.choice(["Rock", "Scissors", "Paper"])self.round_result = ""self.round_played = Falsegesture_display = "Game Ready..."elif gesture in ["Rock", "Scissors", "Paper"] and not self.round_played:self._play_game(gesture)gesture_display = f"{gesture}"else:gesture_display = gestureh, w, _ = annotated.shapeindex_finger_tip = hand_landmarks[8]cx, cy = int(index_finger_tip.x * w), int(index_finger_tip.y * h)cv2.putText(annotated, gesture_display, (cx, cy - 20), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 0), 2)if self.round_result:start_x, start_y, line_height = 30, 50, 40lines = [f"Computer Choice: {self.computer_choice}", f"Result: {self.round_result}"]for i, line in enumerate(lines):cv2.putText(annotated, line, (start_x, start_y + i * line_height), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255), 3)return cv2.cvtColor(annotated, cv2.COLOR_RGB2BGR)
4. 總結
本文展示了如何用 Mediapipe HandLandmarker 快速搭建一個實時手勢識別小游戲。通過關鍵點計算與簡單邏輯判斷,實現了石頭、剪刀、布和 OK 手勢識別,并結合游戲邏輯輸出結果。
對 PiscTrace or PiscCode感興趣?更多精彩內容請移步官網看看~🔗?PiscTrace