Skip to content

Commit 51edf38

Browse files
authored
PR #23: 3D Pose Estimation
Added 3d pose estimation file Merge pull request #23 from mahipalimkar/Mahi
2 parents 2e74230 + f2f3b66 commit 51edf38

File tree

2 files changed

+98
-0
lines changed

2 files changed

+98
-0
lines changed
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
import cv2
2+
import mediapipe as mp
3+
import numpy as np
4+
import json
5+
import socket
6+
7+
# Set up the UDP socket
8+
sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
9+
server_address = ('localhost', 12345) # Port for Unity to listen to
10+
11+
# Function to read and return 3D landmark positions
12+
def read_landmark_positions_3d(results):
13+
if results.pose_world_landmarks is None:
14+
return None
15+
else:
16+
# Extract 3D landmark positions
17+
landmarks = [results.pose_world_landmarks.landmark[lm] for lm in mp.solutions.pose.PoseLandmark]
18+
return np.array([(lm.x, lm.y, lm.z) for lm in landmarks])
19+
20+
# Function to draw landmarks on the image
21+
def draw_landmarks_on_image(frame, results):
22+
if results.pose_landmarks is not None:
23+
mp_drawing = mp.solutions.drawing_utils
24+
mp_pose = mp.solutions.pose
25+
mp_drawing.draw_landmarks(
26+
frame,
27+
results.pose_landmarks,
28+
mp_pose.POSE_CONNECTIONS,
29+
mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=2),
30+
mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2),
31+
)
32+
33+
# Real-time 3D pose estimation function
34+
def real_time_pose_estimation():
35+
# Initialize webcam or video
36+
cap = cv2.VideoCapture(0) # Use 0 for webcam
37+
38+
# Initialize Mediapipe Pose model
39+
mp_pose = mp.solutions.pose
40+
pose_detector = mp_pose.Pose(static_image_mode=False, model_complexity=2)
41+
42+
while cap.isOpened():
43+
ret, frame = cap.read()
44+
if not ret:
45+
break
46+
47+
# Convert the frame to RGB (required by Mediapipe)
48+
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
49+
50+
# Process the frame to obtain pose landmarks
51+
results = pose_detector.process(frame_rgb)
52+
53+
# Draw landmarks on the frame (optional for visualization)
54+
draw_landmarks_on_image(frame, results)
55+
56+
# Extract 3D landmarks
57+
landmark_positions_3d = read_landmark_positions_3d(results)
58+
if landmark_positions_3d is not None:
59+
# Send landmark positions to Unity via UDP
60+
data = json.dumps(landmark_positions_3d.tolist()) # Convert to JSON format
61+
sock.sendto(data.encode('utf-8'), server_address) # Send data to Unity
62+
print(f'3D Landmarks: {landmark_positions_3d}') # Optional: Print landmarks to console
63+
64+
# Display the frame with landmarks drawn
65+
cv2.imshow('Real-Time 3D Pose Estimation', frame)
66+
67+
# Exit loop when 'q' key is pressed
68+
if cv2.waitKey(1) & 0xFF == ord('q'):
69+
break
70+
71+
cap.release()
72+
cv2.destroyAllWindows()
73+
74+
if __name__ == "__main__":
75+
real_time_pose_estimation()

3d_pose_estimation/README.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Real-Time 3D Pose Estimation
2+
3+
This project implements real-time 3D pose estimation using MediaPipe and OpenCV. It captures video from a webcam, detects human pose landmarks in 3D, and sends the landmark data to a Unity application via UDP.
4+
5+
## Table of Contents
6+
- [Features](#features)
7+
- [Requirements](#requirements)
8+
9+
10+
## Features
11+
- Real-time detection of 3D pose landmarks using MediaPipe.
12+
- Visualization of detected landmarks on the webcam feed.
13+
- Sending 3D landmark coordinates to a Unity application via UDP for further processing or visualization.
14+
- Option to terminate the program by pressing the 'q' key.
15+
16+
## Requirements
17+
- Python 3.x
18+
- OpenCV
19+
- MediaPipe
20+
- NumPy
21+
- JSON
22+
- Socket
23+

0 commit comments

Comments
 (0)