Skip to content

Commit 6995093

Browse files
author
kpatch
committed
feat: implement face recognition
1 parent 610ab34 commit 6995093

File tree

11 files changed

+3131
-148
lines changed

11 files changed

+3131
-148
lines changed

coffee_ws/src/coffee_head/.nojekyll

Lines changed: 0 additions & 1 deletion
This file was deleted.

coffee_ws/src/coffee_head/coffee_head/camera_node.py

Lines changed: 559 additions & 147 deletions
Large diffs are not rendered by default.
Lines changed: 339 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,339 @@
1+
#!/usr/bin/env python3
2+
3+
import rclpy
4+
import cv2
5+
import time
6+
import numpy as np
7+
import threading
8+
import os
9+
import json
10+
from rclpy.node import Node
11+
from std_msgs.msg import Float32MultiArray, String
12+
from sensor_msgs.msg import Image
13+
from cv_bridge import CvBridge
14+
15+
# Models directory for face detection models
16+
MODELS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "models")
17+
os.makedirs(MODELS_DIR, exist_ok=True)
18+
19+
class SimpleCamera:
20+
"""A simple camera class without Qt dependencies"""
21+
def __init__(self, node):
22+
self.node = node
23+
self.running = True
24+
self.camera = None
25+
self.camera_index = 0
26+
self.face_detection_enabled = True
27+
self.face_net = None
28+
self.face_confidence_threshold = 0.5
29+
30+
# Create face publisher
31+
self.face_pub = node.create_publisher(String, 'face_detection_data', 10)
32+
33+
# Create frame publisher
34+
self.frame_pub = node.create_publisher(Image, 'camera_frame', 10)
35+
36+
# Create face image publisher
37+
self.face_image_pub = node.create_publisher(Image, 'face_images', 10)
38+
39+
# Create CV bridge
40+
self.bridge = CvBridge()
41+
42+
# Init face detector
43+
self.init_face_detector()
44+
45+
# Camera thread
46+
self.camera_thread = None
47+
48+
def init_face_detector(self):
49+
"""Initialize OpenCV DNN face detector"""
50+
try:
51+
# Model files
52+
model_file = os.path.join(MODELS_DIR, "opencv_face_detector_uint8.pb")
53+
config_file = os.path.join(MODELS_DIR, "opencv_face_detector.pbtxt")
54+
55+
# Download if needed
56+
if not os.path.exists(model_file) or not os.path.exists(config_file):
57+
self.download_face_model(model_file, config_file)
58+
59+
# Load detector
60+
self.face_net = cv2.dnn.readNet(model_file, config_file)
61+
62+
# Use GPU if available
63+
if cv2.cuda.getCudaEnabledDeviceCount() > 0:
64+
self.face_net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
65+
self.face_net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
66+
else:
67+
self.face_net.setPreferableBackend(cv2.dnn.DNN_BACKEND_DEFAULT)
68+
self.face_net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)
69+
70+
print("Face detector (OpenCV DNN) initialized successfully")
71+
except Exception as e:
72+
print(f"Error initializing face detector: {e}")
73+
self.face_net = None
74+
75+
def download_face_model(self, model_file, config_file):
76+
"""Download face detection model files"""
77+
try:
78+
# URLs
79+
model_url = "https://github.com/spmallick/learnopencv/raw/refs/heads/master/AgeGender/opencv_face_detector_uint8.pb"
80+
config_url = "https://raw.githubusercontent.com/spmallick/learnopencv/refs/heads/master/AgeGender/opencv_face_detector.pbtxt"
81+
82+
# Download
83+
import urllib.request
84+
print("Downloading face detection model...")
85+
urllib.request.urlretrieve(model_url, model_file)
86+
urllib.request.urlretrieve(config_url, config_file)
87+
print("Face detection model downloaded successfully")
88+
except Exception as e:
89+
print(f"Error downloading face model: {e}")
90+
raise
91+
92+
def detect_faces(self, frame):
93+
"""Detect faces using OpenCV DNN detector"""
94+
if self.face_net is None:
95+
return []
96+
97+
# Get dimensions
98+
h, w = frame.shape[:2]
99+
100+
# Create blob
101+
blob = cv2.dnn.blobFromImage(frame, 1.0, (300, 300), [104, 117, 123], False, False)
102+
self.face_net.setInput(blob)
103+
104+
# Run detection
105+
detections = self.face_net.forward()
106+
107+
# Parse results
108+
faces = []
109+
for i in range(detections.shape[2]):
110+
confidence = detections[0, 0, i, 2]
111+
if confidence > self.face_confidence_threshold:
112+
# Get coords
113+
x1 = int(detections[0, 0, i, 3] * w)
114+
y1 = int(detections[0, 0, i, 4] * h)
115+
x2 = int(detections[0, 0, i, 5] * w)
116+
y2 = int(detections[0, 0, i, 6] * h)
117+
118+
# Validate
119+
x1 = max(0, min(x1, w-1))
120+
y1 = max(0, min(y1, h-1))
121+
x2 = max(0, min(x2, w-1))
122+
y2 = max(0, min(y2, h-1))
123+
124+
if x2 > x1 and y2 > y1:
125+
faces.append({
126+
'x1': x1, 'y1': y1, 'x2': x2, 'y2': y2,
127+
'center_x': (x1 + x2) // 2,
128+
'center_y': (y1 + y2) // 2,
129+
'radius': max((x2 - x1), (y2 - y1)) // 2,
130+
'confidence': float(confidence)
131+
})
132+
133+
return faces
134+
135+
def start(self):
136+
"""Start the camera thread"""
137+
print("Starting simple camera node")
138+
self.camera_thread = threading.Thread(target=self._camera_loop)
139+
self.camera_thread.daemon = True
140+
self.camera_thread.start()
141+
142+
def _camera_loop(self):
143+
"""Main camera processing loop"""
144+
print("Camera thread starting")
145+
146+
# Try to open camera
147+
self.camera = cv2.VideoCapture(self.camera_index, cv2.CAP_V4L2)
148+
if not self.camera.isOpened():
149+
print(f"Failed to open camera at index {self.camera_index}")
150+
self.camera = cv2.VideoCapture(self.camera_index) # Try default backend
151+
if not self.camera.isOpened():
152+
print("Failed to open camera with any backend")
153+
return
154+
155+
print(f"Successfully opened camera at index {self.camera_index}")
156+
157+
# Set resolution - 720p for better performance
158+
self.camera.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
159+
self.camera.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
160+
161+
# Frame counter
162+
frame_count = 0
163+
164+
while self.running:
165+
try:
166+
# Read frame
167+
ret, frame = self.camera.read()
168+
if not ret:
169+
print("Failed to read frame")
170+
time.sleep(0.1)
171+
continue
172+
173+
# Detect faces
174+
if self.face_detection_enabled and frame_count % 3 == 0: # Every 3rd frame
175+
faces = self.detect_faces(frame)
176+
if faces:
177+
self.publish_face_data(faces)
178+
self.publish_face_images(frame, faces)
179+
180+
# Draw basic info on frame
181+
cv2.putText(frame, f"Camera: {self.camera_index}", (10, 30),
182+
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
183+
184+
# Publish frame
185+
self.publish_frame(frame)
186+
187+
# Update counter
188+
frame_count += 1
189+
190+
# Short sleep to prevent CPU overuse
191+
time.sleep(0.01)
192+
193+
except Exception as e:
194+
print(f"Error in camera loop: {e}")
195+
time.sleep(0.1)
196+
197+
# Clean up
198+
if self.camera and self.camera.isOpened():
199+
self.camera.release()
200+
201+
def stop(self):
202+
"""Stop the camera thread"""
203+
self.running = False
204+
if self.camera_thread:
205+
self.camera_thread.join(timeout=2.0)
206+
if self.camera and self.camera.isOpened():
207+
self.camera.release()
208+
209+
def publish_face_data(self, faces):
210+
"""Publish face detection data"""
211+
if not faces:
212+
return
213+
214+
# Get frame dimensions for the head tracking node
215+
frame_width = 1280 # Default frame width
216+
frame_height = 720 # Default frame height
217+
218+
if self.camera:
219+
frame_width = int(self.camera.get(cv2.CAP_PROP_FRAME_WIDTH))
220+
frame_height = int(self.camera.get(cv2.CAP_PROP_FRAME_HEIGHT))
221+
222+
# Create JSON with face data - include frame dimensions
223+
face_data = {
224+
"timestamp": time.time(),
225+
"frame_width": frame_width,
226+
"frame_height": frame_height,
227+
"faces": [
228+
{
229+
"x1": face["x1"],
230+
"y1": face["y1"],
231+
"x2": face["x2"],
232+
"y2": face["y2"],
233+
"center_x": face["center_x"],
234+
"center_y": face["center_y"],
235+
"confidence": face["confidence"]
236+
}
237+
for face in faces
238+
]
239+
}
240+
241+
# Publish
242+
msg = String()
243+
msg.data = json.dumps(face_data)
244+
self.face_pub.publish(msg)
245+
246+
def publish_face_images(self, frame, faces):
247+
"""Extract and publish individual face images"""
248+
try:
249+
for i, face in enumerate(faces):
250+
# Extract face with margin
251+
margin = 0.2 # 20%
252+
253+
# Get dimensions
254+
x1, y1 = face['x1'], face['y1']
255+
x2, y2 = face['x2'], face['y2']
256+
h, w = frame.shape[:2]
257+
258+
# Calculate margin
259+
margin_x = int((x2 - x1) * margin)
260+
margin_y = int((y2 - y1) * margin)
261+
262+
# Apply margin
263+
x1_margin = max(0, x1 - margin_x)
264+
y1_margin = max(0, y1 - margin_y)
265+
x2_margin = min(w, x2 + margin_x)
266+
y2_margin = min(h, y2 + margin_y)
267+
268+
# Extract face
269+
face_img = frame[y1_margin:y2_margin, x1_margin:x2_margin]
270+
271+
# Skip if too small
272+
if face_img.size == 0 or face_img.shape[0] < 30 or face_img.shape[1] < 30:
273+
continue
274+
275+
# Resize
276+
face_img = cv2.resize(face_img, (150, 150))
277+
278+
# Publish
279+
face_msg = self.bridge.cv2_to_imgmsg(face_img, encoding="bgr8")
280+
face_msg.header.stamp = self.node.get_clock().now().to_msg()
281+
face_msg.header.frame_id = f"face_{i}"
282+
self.face_image_pub.publish(face_msg)
283+
284+
except Exception as e:
285+
print(f"Error publishing face images: {e}")
286+
287+
def publish_frame(self, frame):
288+
"""Publish camera frame"""
289+
try:
290+
frame_msg = self.bridge.cv2_to_imgmsg(frame, encoding="bgr8")
291+
frame_msg.header.stamp = self.node.get_clock().now().to_msg()
292+
self.frame_pub.publish(frame_msg)
293+
except Exception as e:
294+
print(f"Error publishing frame: {e}")
295+
296+
297+
class SimpleCameraNode(Node):
298+
"""Simple camera node without Qt dependencies"""
299+
def __init__(self):
300+
super().__init__('camera_node')
301+
self.get_logger().info('Starting simple camera node')
302+
303+
# Create camera
304+
self.camera = SimpleCamera(self)
305+
306+
# Start camera
307+
self.camera.start()
308+
309+
# Handle shutdown
310+
self.get_logger().info('Simple camera node started')
311+
312+
def destroy_node(self):
313+
"""Clean shutdown"""
314+
self.get_logger().info('Shutting down camera node')
315+
if hasattr(self, 'camera'):
316+
self.camera.stop()
317+
super().destroy_node()
318+
319+
320+
def main(args=None):
321+
rclpy.init(args=args)
322+
323+
try:
324+
node = SimpleCameraNode()
325+
rclpy.spin(node)
326+
except KeyboardInterrupt:
327+
pass
328+
except Exception as e:
329+
print(f"Error in camera node: {e}")
330+
import traceback
331+
traceback.print_exc()
332+
finally:
333+
if 'node' in locals():
334+
node.destroy_node()
335+
rclpy.shutdown()
336+
337+
338+
if __name__ == '__main__':
339+
main()

0 commit comments

Comments
 (0)