import cv2 import numpy as np import math import mediapipe as mp # 初始化MediaPipe Hands mp_hands = mp.solutions.hands mp_drawing = mp.solutions.drawing_utils mp_drawing_styles = mp.solutions.drawing_styles # 加载MediaPipe手部检测模型 def load_mediapipe_model(max_num_hands=1, min_detection_confidence=0.5, min_tracking_confidence=0.5): """ 加载MediaPipe手部检测模型 参数: max_num_hands: 最大检测手的数量 min_detection_confidence: 最小检测置信度 min_tracking_confidence: 最小跟踪置信度 返回: MediaPipe Hands对象 """ hands = mp_hands.Hands( static_image_mode=False, max_num_hands=max_num_hands, min_detection_confidence=min_detection_confidence, min_tracking_confidence=min_tracking_confidence ) return hands # 处理帧并返回三轴控制信号 def process_frame_3d(frame, hands_model, prev_hand_data=None): """ 处理视频帧,检测手部并计算三轴控制信号 参数: frame: 输入的BGR格式帧 hands_model: MediaPipe Hands模型 prev_hand_data: 前一帧的手部数据 返回: 三轴控制信号字典和当前手部数据 """ # 获取帧尺寸 frame_height, frame_width = frame.shape[:2] # 初始化三轴控制信号 control_signal = { "x_angle": 90, # 水平角度 (左右) "y_angle": 90, # 垂直角度 (上下) "z_angle": 90, # 深度角度 (前后) "grip": 0, # 抓取状态 (0=松开, 1=抓取) "action": "none", # 当前动作 "speed": 5 # 移动速度 (1-10) } # MediaPipe需要RGB格式的图像 rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # 处理图像 results = hands_model.process(rgb_frame) # 如果没有检测到手,返回默认控制信号和前一帧数据 if not results.multi_hand_landmarks: return control_signal, prev_hand_data # 获取第一只手的关键点 hand_landmarks = results.multi_hand_landmarks[0] # 计算手部中心点 (使用所有关键点的平均位置) center_x, center_y, center_z = 0, 0, 0 for landmark in hand_landmarks.landmark: center_x += landmark.x center_y += landmark.y center_z += landmark.z num_landmarks = len(hand_landmarks.landmark) center_x /= num_landmarks center_y /= num_landmarks center_z /= num_landmarks # 转换为像素坐标 hand_center = { 'x': center_x * frame_width, 'y': center_y * frame_height, 'z': center_z # 保留归一化的z值 } # 获取更精确的手部姿态信息 wrist = hand_landmarks.landmark[mp_hands.HandLandmark.WRIST] index_finger_tip = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP] pinky_tip = hand_landmarks.landmark[mp_hands.HandLandmark.PINKY_TIP] thumb_tip = hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP] # 计算手部方向向量 (从手腕到手中心) direction_vector = { 'x': center_x - wrist.x, 'y': center_y - wrist.y, 'z': center_z - wrist.z } # 计算屏幕中心点 frame_center = { 'x': frame_width / 2, 'y': frame_height / 2, 'z': 0 # 参考点 } # 计算手部相对于屏幕中心的位移 dx = hand_center['x'] - frame_center['x'] dy = hand_center['y'] - frame_center['y'] dz = hand_center['z'] * 10 # 缩放z值使其更明显 # 转换为机械臂的三个轴的角度 # X轴角度 (左右移动) - 范围0-180度,中间是90度 x_angle = map_to_angle(dx, frame_width / 2) # Y轴角度 (上下移动) - 范围0-180度,中间是90度 y_angle = map_to_angle(-dy, frame_height / 2) # 注意负号:图像y轴向下,但机械臂y轴向上 # Z轴角度 (前后移动) - 范围0-180度,中间是90度 # Z值需要正确映射,这里我们假设z值范围在-0.5至0.5之间 # 越靠近摄像头z值越小(更负),越远离摄像头z值越大(更正) z_angle = 90 + (dz * 90) # 将z值映射到0-180范围 z_angle = max(0, min(180, z_angle)) # 确保在有效范围内 # 检测抓取动作 (拇指和食指距离) pinch_distance = calculate_3d_distance( thumb_tip.x, thumb_tip.y, thumb_tip.z, index_finger_tip.x, index_finger_tip.y, index_finger_tip.z ) # 如果拇指和食指距离小于阈值,认为是抓取状态 grip = 1 if pinch_distance < 0.05 else 0 # 检测手部动作 (如果有前一帧数据) action = "none" speed = 5 # 默认中等速度 if prev_hand_data: # 计算移动向量 move_x = hand_center['x'] - prev_hand_data['x'] move_y = hand_center['y'] - prev_hand_data['y'] move_z = hand_center['z'] - prev_hand_data['z'] # 计算移动距离 move_distance = math.sqrt(move_x**2 + move_y**2 + move_z**2) # 如果移动足够大,确定主要移动方向 if move_distance > 0.01: # 计算移动速度 (1-10范围) speed = min(10, max(1, int(move_distance * 200))) # 确定主导移动方向 abs_move = [abs(move_x), abs(move_y), abs(move_z)] max_move = max(abs_move) if max_move == abs_move[0]: # X轴移动 action = "right" if move_x > 0 else "left" elif max_move == abs_move[1]: # Y轴移动 action = "down" if move_y > 0 else "up" else: # Z轴移动 action = "forward" if move_z < 0 else "backward" # 更新控制信号 control_signal = { "x_angle": x_angle, "y_angle": y_angle, "z_angle": z_angle, "grip": grip, "action": action, "speed": speed } # 在帧上绘制手部关键点和连接线 mp_drawing.draw_landmarks( frame, hand_landmarks, mp_hands.HAND_CONNECTIONS, mp_drawing_styles.get_default_hand_landmarks_style(), mp_drawing_styles.get_default_hand_connections_style() ) # 绘制三轴控制指示器 draw_3d_control_visualization(frame, control_signal, hand_center) return control_signal, hand_center def calculate_3d_distance(x1, y1, z1, x2, y2, z2): """计算3D空间中两点之间的欧几里得距离""" return math.sqrt((x2 - x1)**2 + (y2 - y1)**2 + (z2 - z1)**2) def map_to_angle(value, max_value): """将-max_value到max_value范围内的值映射到0-180度范围的角度""" # 计算相对位置 (-1到1) relative_position = value / max_value # 映射到角度 (90度为中点) angle = 90 + (relative_position * 45) # 使用45度作为最大偏移量 # 确保角度在有效范围内 return max(0, min(180, angle)) def draw_3d_control_visualization(frame, control_signal, hand_center): """在帧上绘制三轴控制可视化""" height, width = frame.shape[:2] # 绘制坐标轴 origin_x, origin_y = width - 150, height - 150 axis_length = 100 # X轴 (红色) cv2.line(frame, (origin_x, origin_y), (origin_x + axis_length, origin_y), (0, 0, 255), 2) cv2.putText(frame, "X", (origin_x + axis_length + 5, origin_y + 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) # Y轴 (绿色) cv2.line(frame, (origin_x, origin_y), (origin_x, origin_y - axis_length), (0, 255, 0), 2) cv2.putText(frame, "Y", (origin_x - 15, origin_y - axis_length - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) # Z轴 (蓝色) - 以45度角向右上方 z_x = int(origin_x + axis_length * 0.7) z_y = int(origin_y - axis_length * 0.7) cv2.line(frame, (origin_x, origin_y), (z_x, z_y), (255, 0, 0), 2) cv2.putText(frame, "Z", (z_x + 5, z_y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2) # 绘制控制角度值 x_text = f"X: {control_signal['x_angle']:.1f}°" y_text = f"Y: {control_signal['y_angle']:.1f}°" z_text = f"Z: {control_signal['z_angle']:.1f}°" grip_text = f"抓取: {'开' if control_signal['grip'] == 0 else '关'}" action_text = f"动作: {control_signal['action']}" speed_text = f"速度: {control_signal['speed']}" cv2.putText(frame, x_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) cv2.putText(frame, y_text, (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2) cv2.putText(frame, z_text, (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0), 2) cv2.putText(frame, grip_text, (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 0), 2) cv2.putText(frame, action_text, (10, 150), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2) cv2.putText(frame, speed_text, (10, 180), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 255), 2) # 绘制手部位置到屏幕中心的连接线 if hand_center: center_x, center_y = int(width/2), int(height/2) hand_x, hand_y = int(hand_center['x']), int(hand_center['y']) # 绘制屏幕中心点 cv2.circle(frame, (center_x, center_y), 5, (0, 0, 255), -1) # 绘制手部位置点 cv2.circle(frame, (hand_x, hand_y), 10, (0, 255, 0), -1) # 绘制连接线 cv2.line(frame, (center_x, center_y), (hand_x, hand_y), (255, 0, 0), 2) def analyze_hand_gesture(hand_landmarks): """分析手势以确定机械臂控制模式""" # 获取指尖和手掌关键点 thumb_tip = hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP] index_tip = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP] middle_tip = hand_landmarks.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_TIP] ring_tip = hand_landmarks.landmark[mp_hands.HandLandmark.RING_FINGER_TIP] pinky_tip = hand_landmarks.landmark[mp_hands.HandLandmark.PINKY_TIP] # 获取手掌底部关键点 wrist = hand_landmarks.landmark[mp_hands.HandLandmark.WRIST] index_mcp = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_MCP] pinky_mcp = hand_landmarks.landmark[mp_hands.HandLandmark.PINKY_MCP] # 检测拳头 - 所有手指弯曲 fist = ( thumb_tip.y > index_mcp.y and index_tip.y > index_mcp.y and middle_tip.y > index_mcp.y and ring_tip.y > index_mcp.y and pinky_tip.y > pinky_mcp.y ) # 检测手掌打开 - 所有手指伸直 open_palm = ( thumb_tip.y < wrist.y and index_tip.y < index_mcp.y and middle_tip.y < index_mcp.y and ring_tip.y < index_mcp.y and pinky_tip.y < pinky_mcp.y ) # 检测指向手势 - 食指伸出,其他手指弯曲 pointing = ( index_tip.y < index_mcp.y and middle_tip.y > index_mcp.y and ring_tip.y > index_mcp.y and pinky_tip.y > pinky_mcp.y ) # 检测"OK"手势 - 拇指和食指形成圆圈,其他手指伸出 pinch_distance = calculate_3d_distance( thumb_tip.x, thumb_tip.y, thumb_tip.z, index_tip.x, index_tip.y, index_tip.z ) ok_gesture = (pinch_distance < 0.05 and middle_tip.y < index_mcp.y and ring_tip.y < index_mcp.y and pinky_tip.y < pinky_mcp.y) # 返回识别的手势 if fist: return "fist" elif open_palm: return "open_palm" elif pointing: return "pointing" elif ok_gesture: return "ok" else: return "unknown"