checkhandpi/src/hand_detection_3d.py
2025-08-11 13:15:21 +08:00

316 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import cv2
import numpy as np
import math
import mediapipe as mp
# 初始化MediaPipe Hands
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
# 加载MediaPipe手部检测模型
def load_mediapipe_model(max_num_hands=1, min_detection_confidence=0.5, min_tracking_confidence=0.5):
"""
加载MediaPipe手部检测模型
参数:
max_num_hands: 最大检测手的数量
min_detection_confidence: 最小检测置信度
min_tracking_confidence: 最小跟踪置信度
返回:
MediaPipe Hands对象
"""
hands = mp_hands.Hands(
static_image_mode=False,
max_num_hands=max_num_hands,
min_detection_confidence=min_detection_confidence,
min_tracking_confidence=min_tracking_confidence
)
return hands
# 处理帧并返回三轴控制信号
def process_frame_3d(frame, hands_model, prev_hand_data=None):
"""
处理视频帧,检测手部并计算三轴控制信号
参数:
frame: 输入的BGR格式帧
hands_model: MediaPipe Hands模型
prev_hand_data: 前一帧的手部数据
返回:
三轴控制信号字典和当前手部数据
"""
# 获取帧尺寸
frame_height, frame_width = frame.shape[:2]
# 初始化三轴控制信号
control_signal = {
"x_angle": 90, # 水平角度 (左右)
"y_angle": 90, # 垂直角度 (上下)
"z_angle": 90, # 深度角度 (前后)
"grip": 0, # 抓取状态 (0=松开, 1=抓取)
"action": "none", # 当前动作
"speed": 5 # 移动速度 (1-10)
}
# MediaPipe需要RGB格式的图像
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# 处理图像
results = hands_model.process(rgb_frame)
# 如果没有检测到手,返回默认控制信号和前一帧数据
if not results.multi_hand_landmarks:
return control_signal, prev_hand_data
# 获取第一只手的关键点
hand_landmarks = results.multi_hand_landmarks[0]
# 计算手部中心点 (使用所有关键点的平均位置)
center_x, center_y, center_z = 0, 0, 0
for landmark in hand_landmarks.landmark:
center_x += landmark.x
center_y += landmark.y
center_z += landmark.z
num_landmarks = len(hand_landmarks.landmark)
center_x /= num_landmarks
center_y /= num_landmarks
center_z /= num_landmarks
# 转换为像素坐标
hand_center = {
'x': center_x * frame_width,
'y': center_y * frame_height,
'z': center_z # 保留归一化的z值
}
# 获取更精确的手部姿态信息
wrist = hand_landmarks.landmark[mp_hands.HandLandmark.WRIST]
index_finger_tip = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP]
pinky_tip = hand_landmarks.landmark[mp_hands.HandLandmark.PINKY_TIP]
thumb_tip = hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP]
# 计算手部方向向量 (从手腕到手中心)
direction_vector = {
'x': center_x - wrist.x,
'y': center_y - wrist.y,
'z': center_z - wrist.z
}
# 计算屏幕中心点
frame_center = {
'x': frame_width / 2,
'y': frame_height / 2,
'z': 0 # 参考点
}
# 计算手部相对于屏幕中心的位移
dx = hand_center['x'] - frame_center['x']
dy = hand_center['y'] - frame_center['y']
dz = hand_center['z'] * 10 # 缩放z值使其更明显
# 转换为机械臂的三个轴的角度
# X轴角度 (左右移动) - 范围0-180度中间是90度
x_angle = map_to_angle(dx, frame_width / 2)
# Y轴角度 (上下移动) - 范围0-180度中间是90度
y_angle = map_to_angle(-dy, frame_height / 2) # 注意负号图像y轴向下但机械臂y轴向上
# Z轴角度 (前后移动) - 范围0-180度中间是90度
# Z值需要正确映射这里我们假设z值范围在-0.5至0.5之间
# 越靠近摄像头z值越小更负越远离摄像头z值越大更正
z_angle = 90 + (dz * 90) # 将z值映射到0-180范围
z_angle = max(0, min(180, z_angle)) # 确保在有效范围内
# 检测抓取动作 (拇指和食指距离)
pinch_distance = calculate_3d_distance(
thumb_tip.x, thumb_tip.y, thumb_tip.z,
index_finger_tip.x, index_finger_tip.y, index_finger_tip.z
)
# 如果拇指和食指距离小于阈值,认为是抓取状态
grip = 1 if pinch_distance < 0.05 else 0
# 检测手部动作 (如果有前一帧数据)
action = "none"
speed = 5 # 默认中等速度
if prev_hand_data:
# 计算移动向量
move_x = hand_center['x'] - prev_hand_data['x']
move_y = hand_center['y'] - prev_hand_data['y']
move_z = hand_center['z'] - prev_hand_data['z']
# 计算移动距离
move_distance = math.sqrt(move_x**2 + move_y**2 + move_z**2)
# 如果移动足够大,确定主要移动方向
if move_distance > 0.01:
# 计算移动速度 (1-10范围)
speed = min(10, max(1, int(move_distance * 200)))
# 确定主导移动方向
abs_move = [abs(move_x), abs(move_y), abs(move_z)]
max_move = max(abs_move)
if max_move == abs_move[0]: # X轴移动
action = "right" if move_x > 0 else "left"
elif max_move == abs_move[1]: # Y轴移动
action = "down" if move_y > 0 else "up"
else: # Z轴移动
action = "forward" if move_z < 0 else "backward"
# 更新控制信号
control_signal = {
"x_angle": x_angle,
"y_angle": y_angle,
"z_angle": z_angle,
"grip": grip,
"action": action,
"speed": speed
}
# 在帧上绘制手部关键点和连接线
mp_drawing.draw_landmarks(
frame,
hand_landmarks,
mp_hands.HAND_CONNECTIONS,
mp_drawing_styles.get_default_hand_landmarks_style(),
mp_drawing_styles.get_default_hand_connections_style()
)
# 绘制三轴控制指示器
draw_3d_control_visualization(frame, control_signal, hand_center)
return control_signal, hand_center
def calculate_3d_distance(x1, y1, z1, x2, y2, z2):
"""计算3D空间中两点之间的欧几里得距离"""
return math.sqrt((x2 - x1)**2 + (y2 - y1)**2 + (z2 - z1)**2)
def map_to_angle(value, max_value):
"""将-max_value到max_value范围内的值映射到0-180度范围的角度"""
# 计算相对位置 (-1到1)
relative_position = value / max_value
# 映射到角度 (90度为中点)
angle = 90 + (relative_position * 45) # 使用45度作为最大偏移量
# 确保角度在有效范围内
return max(0, min(180, angle))
def draw_3d_control_visualization(frame, control_signal, hand_center):
"""在帧上绘制三轴控制可视化"""
height, width = frame.shape[:2]
# 绘制坐标轴
origin_x, origin_y = width - 150, height - 150
axis_length = 100
# X轴 (红色)
cv2.line(frame, (origin_x, origin_y), (origin_x + axis_length, origin_y), (0, 0, 255), 2)
cv2.putText(frame, "X", (origin_x + axis_length + 5, origin_y + 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
# Y轴 (绿色)
cv2.line(frame, (origin_x, origin_y), (origin_x, origin_y - axis_length), (0, 255, 0), 2)
cv2.putText(frame, "Y", (origin_x - 15, origin_y - axis_length - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
# Z轴 (蓝色) - 以45度角向右上方
z_x = int(origin_x + axis_length * 0.7)
z_y = int(origin_y - axis_length * 0.7)
cv2.line(frame, (origin_x, origin_y), (z_x, z_y), (255, 0, 0), 2)
cv2.putText(frame, "Z", (z_x + 5, z_y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
# 绘制控制角度值
x_text = f"X: {control_signal['x_angle']:.1f}°"
y_text = f"Y: {control_signal['y_angle']:.1f}°"
z_text = f"Z: {control_signal['z_angle']:.1f}°"
grip_text = f"抓取: {'' if control_signal['grip'] == 0 else ''}"
action_text = f"动作: {control_signal['action']}"
speed_text = f"速度: {control_signal['speed']}"
cv2.putText(frame, x_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
cv2.putText(frame, y_text, (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
cv2.putText(frame, z_text, (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0), 2)
cv2.putText(frame, grip_text, (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 0), 2)
cv2.putText(frame, action_text, (10, 150), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)
cv2.putText(frame, speed_text, (10, 180), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 255), 2)
# 绘制手部位置到屏幕中心的连接线
if hand_center:
center_x, center_y = int(width/2), int(height/2)
hand_x, hand_y = int(hand_center['x']), int(hand_center['y'])
# 绘制屏幕中心点
cv2.circle(frame, (center_x, center_y), 5, (0, 0, 255), -1)
# 绘制手部位置点
cv2.circle(frame, (hand_x, hand_y), 10, (0, 255, 0), -1)
# 绘制连接线
cv2.line(frame, (center_x, center_y), (hand_x, hand_y), (255, 0, 0), 2)
def analyze_hand_gesture(hand_landmarks):
"""分析手势以确定机械臂控制模式"""
# 获取指尖和手掌关键点
thumb_tip = hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP]
index_tip = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP]
middle_tip = hand_landmarks.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_TIP]
ring_tip = hand_landmarks.landmark[mp_hands.HandLandmark.RING_FINGER_TIP]
pinky_tip = hand_landmarks.landmark[mp_hands.HandLandmark.PINKY_TIP]
# 获取手掌底部关键点
wrist = hand_landmarks.landmark[mp_hands.HandLandmark.WRIST]
index_mcp = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_MCP]
pinky_mcp = hand_landmarks.landmark[mp_hands.HandLandmark.PINKY_MCP]
# 检测拳头 - 所有手指弯曲
fist = (
thumb_tip.y > index_mcp.y and
index_tip.y > index_mcp.y and
middle_tip.y > index_mcp.y and
ring_tip.y > index_mcp.y and
pinky_tip.y > pinky_mcp.y
)
# 检测手掌打开 - 所有手指伸直
open_palm = (
thumb_tip.y < wrist.y and
index_tip.y < index_mcp.y and
middle_tip.y < index_mcp.y and
ring_tip.y < index_mcp.y and
pinky_tip.y < pinky_mcp.y
)
# 检测指向手势 - 食指伸出,其他手指弯曲
pointing = (
index_tip.y < index_mcp.y and
middle_tip.y > index_mcp.y and
ring_tip.y > index_mcp.y and
pinky_tip.y > pinky_mcp.y
)
# 检测"OK"手势 - 拇指和食指形成圆圈,其他手指伸出
pinch_distance = calculate_3d_distance(
thumb_tip.x, thumb_tip.y, thumb_tip.z,
index_tip.x, index_tip.y, index_tip.z
)
ok_gesture = (pinch_distance < 0.05 and
middle_tip.y < index_mcp.y and
ring_tip.y < index_mcp.y and
pinky_tip.y < pinky_mcp.y)
# 返回识别的手势
if fist:
return "fist"
elif open_palm:
return "open_palm"
elif pointing:
return "pointing"
elif ok_gesture:
return "ok"
else:
return "unknown"