-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathobject_scanner.py
More file actions
213 lines (169 loc) · 6.92 KB
/
object_scanner.py
File metadata and controls
213 lines (169 loc) · 6.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
import cv2
import numpy as np
import os
import urllib.request
import time
from datetime import datetime
from hand_tracker import HandTracker
def load_model():
# Load the pre-trained model and configuration
model_path = "yolov3.weights"
config_path = "yolov3.cfg"
# Download the model files if they don't exist
if not os.path.exists(model_path):
print("Downloading YOLOv3 weights...")
# Using a mirror from GitHub
urllib.request.urlretrieve(
"https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov3.weights",
model_path
)
if not os.path.exists(config_path):
print("Downloading YOLOv3 configuration...")
# Using the configuration from the darknet repository
urllib.request.urlretrieve(
"https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov3.cfg",
config_path
)
# Load the network
net = cv2.dnn.readNetFromDarknet(config_path, model_path)
# Load class names
with open("coco.names", "r") as f:
classes = [line.strip() for line in f.readlines()]
return net, classes
def process_frame(frame, net, classes, confidence_threshold=0.5):
height, width, _ = frame.shape
# Create a blob from the frame
blob = cv2.dnn.blobFromImage(frame, 1/255.0, (416, 416), swapRB=True, crop=False)
# Set the input to the network
net.setInput(blob)
# Get the output layer names
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
# Run forward pass
outputs = net.forward(output_layers)
# Process detections
boxes = []
confidences = []
class_ids = []
for output in outputs:
for detection in output:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > confidence_threshold:
# Object detected
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
# Rectangle coordinates
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
# Apply non-maximum suppression
indices = cv2.dnn.NMSBoxes(boxes, confidences, confidence_threshold, 0.4)
return boxes, confidences, class_ids, indices
def draw_detections(frame, boxes, confidences, class_ids, indices, classes, fps, hand_boxes=None, gesture_texts=None):
if len(indices) > 0:
for i in indices.flatten():
x, y, w, h = boxes[i]
label = str(classes[class_ids[i]])
confidence = confidences[i]
# Draw bounding box
cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
# Add label with confidence
text = f"{label}: {confidence:.2f}"
cv2.putText(frame, text, (x, y - 5),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
# Draw hand boxes and gestures
if hand_boxes and gesture_texts:
for box, gesture in zip(hand_boxes, gesture_texts):
x_min, y_min, x_max, y_max = box
# Draw hand bounding box
cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (255, 0, 0), 2)
# Add gesture label
cv2.putText(frame, gesture, (x_min, y_min - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0), 2)
# Add FPS counter
cv2.putText(frame, f"FPS: {fps:.1f}", (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
return frame
def save_frame(frame):
# Create screenshots directory if it doesn't exist
if not os.path.exists("screenshots"):
os.makedirs("screenshots")
# Generate filename with timestamp
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"screenshots/screenshot_{timestamp}.jpg"
# Save the frame
cv2.imwrite(filename, frame)
print(f"Saved screenshot: {filename}")
def main():
# Initialize webcam
cap = cv2.VideoCapture(0)
if not cap.isOpened():
print("Error: Could not open webcam")
return
print("Loading models...")
net, classes = load_model()
hand_tracker = HandTracker()
print("Models loaded successfully!")
# Initialize variables
confidence_threshold = 0.5
frame_count = 0
start_time = time.time()
fps = 0
print("\nControls:")
print("Press 'q' to quit")
print("Press 's' to save screenshot")
print("Press 'c' to toggle confidence threshold")
print("\nHand Gestures:")
print("- Open Hand: All fingers up")
print("- Closed Fist: All fingers down")
print("- Peace Sign: Index and middle fingers up")
print("- Pointing: Only index finger up")
print("- Gun Sign: Thumb and index fingers up")
print("- Four Fingers: All fingers up except thumb")
while True:
ret, frame = cap.read()
if not ret:
print("Error: Could not read frame")
break
# Calculate FPS
frame_count += 1
if frame_count >= 30: # Update FPS every 30 frames
end_time = time.time()
fps = frame_count / (end_time - start_time)
frame_count = 0
start_time = time.time()
# Process hand tracking
frame, hands, hand_boxes = hand_tracker.find_hands(frame)
gesture_texts = []
if hands:
# Get finger states and gestures for each detected hand
for hand in hands:
finger_states = hand_tracker.get_finger_state(hand)
gesture = hand_tracker.get_hand_gesture(finger_states)
gesture_texts.append(gesture)
# Process object detection
boxes, confidences, class_ids, indices = process_frame(frame, net, classes, confidence_threshold)
# Draw detections
frame = draw_detections(frame, boxes, confidences, class_ids, indices, classes, fps, hand_boxes, gesture_texts)
# Display the frame
cv2.imshow('Object Detection with Hand Tracking', frame)
# Handle key presses
key = cv2.waitKey(1) & 0xFF
if key == ord('q'):
break
elif key == ord('s'):
save_frame(frame)
elif key == ord('c'):
confidence_threshold = 0.3 if confidence_threshold > 0.3 else 0.5
print(f"Confidence threshold: {confidence_threshold}")
# Clean up
cap.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
main()